瀏覽代碼

added example with FV Encoding

Dimitri Korsch 6 年之前
父節點
當前提交
9bc09007f2

+ 0 - 0
examples/fve_example/core/__init__.py


+ 105 - 0
examples/fve_example/core/classifier.py

@@ -0,0 +1,105 @@
+import logging
+
+import chainer
+import chainer.functions as F
+import chainer.links as L
+
+from chainer_addons.links.fisher_encoding import FVLayer
+from finetune.classifier import SeparateModelClassifier
+
+
+class FVEClassifier(SeparateModelClassifier):
+
+	def __init__(self, n_comps=2**4, fv_insize=256, alpha=0.99,
+		*args, **kwargs):
+		super(FVEClassifier, self).__init__(*args, **kwargs)
+
+		with self.init_scope():
+
+			if fv_insize < 1:
+				self.pre_fv = F.identity
+				fv_insize = self.model.meta.feature_size
+			else:
+				self.pre_fv = L.Convolution2D(
+					self.model.meta.feature_size,
+					fv_insize,
+					ksize=1)
+
+			self.pre_fv_bn = L.BatchNormalization(fv_insize)
+			self.fv_encoding = FVLayer(
+				fv_insize, n_comps,
+				alpha=alpha)
+
+		self.fv_insize = fv_insize
+		self.n_comps = n_comps
+
+	def __call__(self, *inputs):
+		parts, X, y = inputs
+
+		n, t, c, h, w = parts.shape
+		_parts = parts.reshape(n*t, c, h, w)
+		part_convs, _ = self.model(_parts, layer_name=self.model.meta.conv_map_layer)
+
+		part_local_feats = self.pre_fv_bn(self.pre_fv(part_convs))
+
+		n0, n_feats, conv_h, conv_w = part_local_feats.shape
+
+		part_local_feats = F.reshape(part_local_feats, (n, t, n_feats, conv_h, conv_w))
+
+		# N x T x C x H x W -> N x T x H x W x C
+		part_local_feats = F.transpose(part_local_feats, (0, 1, 3, 4, 2))
+		# N x T x H x W x C -> N x T*H*W x C
+		part_local_feats = F.reshape(part_local_feats, (n, t*conv_h*conv_w, n_feats))
+
+		logits = self.fv_encoding(part_local_feats)
+
+		logL, _ = self.fv_encoding.log_proba(part_local_feats, weighted=True)
+
+		# may be used later to maximize the log-likelihood
+		self.neg_logL = -F.mean(logL)
+
+		# avarage over all local features
+		avg_logL = F.logsumexp(logL) - self.xp.log(logL.size)
+
+		part_pred = self.model.clf_layer(logits)
+		part_loss = self.loss(part_pred, y)
+		part_accu = self.model.accuracy(part_pred, y)
+
+		self.report(
+			part_accu=part_accu,
+			part_loss=part_loss,
+			logL=avg_logL
+		)
+
+		glob_loss, glob_pred = self.predict_global(X, y)
+
+		pred = part_pred + glob_pred
+		accuracy = self.model.accuracy(pred, y)
+		loss = self.loss(pred, y)
+
+		self.report(
+			loss      =  loss.array,
+			accuracy  =  accuracy.array,
+
+		)
+		return loss
+
+	def predict_global(self, X, y):
+		glob_pred, _ = self.separate_model(X)
+		glob_loss = self.loss(glob_pred, y)
+		glob_accu = self.separate_model.accuracy(glob_pred, y)
+
+		self.report(
+			glob_loss =  glob_loss.data,
+			glob_accu =  glob_accu.data,
+		)
+		return glob_loss, glob_pred
+
+
+	@property
+	def feat_size(self):
+		return self.model.meta.feature_size
+
+	@property
+	def output_size(self):
+		return self.fv_insize * self.n_comps * 2

+ 44 - 0
examples/fve_example/core/dataset.py

@@ -0,0 +1,44 @@
+from abc import ABC
+
+from chainer_addons.dataset import AugmentationMixin
+from chainer_addons.dataset import PreprocessMixin
+
+from cvdatasets.dataset import AnnotationsReadMixin
+from cvdatasets.dataset import CroppedPartMixin
+from cvdatasets.dataset import IteratorMixin
+
+from finetune.dataset import _base_mixin
+
+class _parts_mixin(ABC):
+
+	def get_example(self, i):
+		im_obj = super(_parts_mixin, self).get_example(i)
+		crops = im_obj.visible_crops(None)
+		parts = crops + [im_obj.im_array]
+
+		return parts, im_obj.label + self.label_shift
+
+class PartsDataset(_base_mixin,
+	# augmentation and preprocessing
+	AugmentationMixin, PreprocessMixin,
+	_parts_mixin,
+	# random uniform region selection
+	CroppedPartMixin,
+	# reads image
+	AnnotationsReadMixin,
+	IteratorMixin):
+
+	def __init__(self, no_glob=False, *args, **kwargs):
+		super(PartsDataset, self).__init__(*args, **kwargs)
+		# mask = self.labels < 10
+		# self.uuids = self.uuids[mask]
+
+		self.no_glob = no_glob
+
+	def get_example(self, i):
+		X, y = super(PartsDataset, self).get_example(i)
+		X_parts, X_glob = X[:-1], X[-1]
+		if self.no_glob:
+			return X_parts, y
+		else:
+			return X_parts, X_glob, y

+ 52 - 0
examples/fve_example/main.py

@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+if __name__ != '__main__': raise Exception("Do not import me!")
+
+import socket
+if socket.gethostname() != "sigma25":
+	import matplotlib
+	matplotlib.use('Agg')
+
+import chainer
+import logging
+
+from chainer.training.updaters import StandardUpdater
+
+from finetune.finetuner import DefaultFinetuner
+from finetune.training.trainer import Trainer
+from finetune.dataset import BaseDataset
+from finetune.classifier import Classifier
+
+
+from utils import parser
+from core import classifier, dataset
+
+def main(args):
+	if args.debug:
+		chainer.set_debug(args.debug)
+		logging.warning("DEBUG MODE ENABLED!")
+
+
+	tuner = DefaultFinetuner(
+		args,
+		classifier_cls=classifier.FVEClassifier,
+		classifier_kwargs=dict(
+			n_comps=args.n_components,
+			fv_insize=args.fv_insize,
+			alpha=args.alpha,
+		),
+
+		model_kwargs=dict(
+			pooling=args.pooling,
+		),
+
+		dataset_cls=dataset.PartsDataset,
+
+		updater_cls=StandardUpdater,
+		updater_kwargs={},
+	)
+
+
+	tuner.run(trainer_cls=Trainer, opts=args)
+
+
+main(parser.parse_args())

+ 75 - 0
examples/fve_example/scripts/config.sh

@@ -0,0 +1,75 @@
+source /home/korsch/.anaconda3/etc/profile.d/conda.sh
+conda activate chainer4
+
+if [[ $GDB == "1" ]]; then
+	PYTHON="gdb -ex run --args python"
+
+elif [[ $MPI == "1" ]]; then
+	N_MPI=${N_MPI:-2}
+	HOSTFILE=${HOSTFILE:-hosts.conf}
+	PYTHON="mpirun -n $N_MPI --hostfile ${HOSTFILE} -x PATH -x CUDA_PATH python"
+	OPTS="${OPTS} --mpi"
+
+elif [[ $PROFILE == "1" ]]; then
+	PYTHON="python -m cProfile -o profile"
+
+else
+	PYTHON="python"
+
+fi
+
+RUN_SCRIPT="../main.py"
+
+###### Dataset config ######
+
+BASE_DIR=/home/korsch/Data
+
+OPTIMIZER=${OPTIMIZER:-adam}
+MODEL_TYPE=${MODEL_TYPE:-resnet}
+PREPARE_TYPE=${PREPARE_TYPE:-model}
+
+MODEL_DIR=${BASE_DIR}/MODELS/${MODEL_TYPE}
+
+OUTPUT_DIR=${OUTPUT_DIR:-../.results/ft_${DATASET}/${OPTIMIZER}${OUTPUT_SUFFIX}}
+
+
+###### Training config ######
+
+BATCH_SIZE=${BATCH_SIZE:-24}
+GPU=${GPU:-"0"}
+
+EPOCHS=${EPOCHS:-100}
+
+DECAY=${DECAY:-5e-4}
+LR=${LR:-"-lr 1e-3 -lrd 1e-1 -lrt 1e-8 -lrs 20"}
+
+FINAL_POOLING=${FINAL_POOLING:-g_avg}
+
+N_JOBS=${N_JOBS:-1}
+
+if [[ $N_JOBS != "0" ]]; then
+	export OMP_NUM_THREADS=2
+fi
+
+###### OPTIONS ######
+
+OPTS="${OPTS} --epochs ${EPOCHS}"
+OPTS="${OPTS} --gpu ${GPU}"
+OPTS="${OPTS} --batch_size ${BATCH_SIZE}"
+
+# if [[ -f sacred/creds.sh ]]; then
+# 	source sacred/creds.sh
+# else
+# 	echo "No sacred credentials found! Disabling sacred."
+# 	OPTS="${OPTS} --no_sacred"
+# fi
+
+OPTS="${OPTS} --augment"
+OPTS="${OPTS} --model_type ${MODEL_TYPE}"
+OPTS="${OPTS} --prepare_type ${PREPARE_TYPE}"
+OPTS="${OPTS} --n_jobs ${N_JOBS}"
+OPTS="${OPTS} --optimizer ${OPTIMIZER}"
+OPTS="${OPTS} --output ${OUTPUT_DIR}"
+OPTS="${OPTS} --pooling ${FINAL_POOLING}"
+OPTS="${OPTS} --decay ${DECAY}"
+OPTS="${OPTS} ${LR}"

+ 34 - 0
examples/fve_example/scripts/train.sh

@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# resnet inception inception_tf [vgg]
+MODEL_TYPE=${MODEL_TYPE:-inception}
+DATA=${DATA:-/home/korsch/Data/info.yml}
+
+GPU=${GPU:-0}
+N_JOBS=${N_JOBS:-3}
+
+OPTIMIZER=${OPTIMIZER:-rmsprop}
+LR=${LR:-"-lr 1e-4 -lrd 0.1 -lrt 1e-6 -lrs 20"}
+DECAY=${DECAY:-5e-4}
+EPOCHS=${EPOCHS:-60}
+BATCH_SIZE=${BATCH_SIZE:-10}
+
+export OMP_NUM_THREADS=2
+
+DATASET=${DATASET:-CUB200}
+# NAC GT GT2 L1_pred L1_full
+PARTS=${PARTS:-GT2}
+
+source config.sh
+
+
+OPTS="${OPTS} --label_smoothing 0.1"
+OPTS="${OPTS} --input_size 299"
+
+$PYTHON $RUN_SCRIPT \
+	${DATA} \
+	${DATASET} \
+	${DATASET}_${PARTS} \
+	${OPTS} \
+	$@
+

+ 0 - 0
examples/fve_example/utils/__init__.py


+ 54 - 0
examples/fve_example/utils/parser.py

@@ -0,0 +1,54 @@
+import os
+
+from cvargparse import GPUParser, Arg
+from chainer_addons.links import PoolingType
+
+from finetune.parser import default_factory
+
+
+def parse_args():
+
+	parser = GPUParser(default_factory([
+
+			PoolingType.as_arg("pooling",
+				help_text="type of pre-classification pooling"),
+
+			Arg("--n_components", type=int, default=1,
+				help="Number of mixtures"),
+
+			Arg("--alpha", type=float, default=0.99,
+				help="EM update factor"),
+
+			Arg("--fv_insize", type=float, default=256,
+				help="input size for the FVE Layer"),
+
+
+			# Arg("--normalize", action="store_true",
+			# 	help="normalize features after cbil- or alpha-poolings"),
+
+			# Arg("--subset", "-s", type=int, nargs="*", default=[-1], help="select specific classes"),
+			# Arg("--no_sacred", action="store_true", help="do save outputs to sacred"),
+
+			# Arg("--use_parts", action="store_true",
+			# 	help="use parts, if present"),
+			# Arg("--simple_parts", action="store_true",
+			# 	help="use simple parts classifier, that only concatenates the features"),
+			# Arg("--no_global", action="store_true",
+			# 	help="use parts only, no global feature"),
+
+
+			# Arg("--parts_in_bb", action="store_true", help="take only uniform regions where the centers are inside the bounding box"),
+
+			# Arg("--rnd_select", action="store_true", help="hide random uniform regions of the image"),
+			# Arg("--recurrent", action="store_true", help="observe all parts in recurrent manner instead of the whole image at once"),
+
+			# ## AlphaPooling options
+			# Arg("--init_alpha", type=int, default=1, help="initial parameter for alpha pooling"),
+			# Arg("--kappa", type=float, default=1., help="Learning rate factor for alpha pooling"),
+			# Arg("--switch_epochs", type=int, default=0, help="train alpha pooling layer and the rest of the network alternating")
+		])
+	)
+
+	parser.init_logger()
+
+	return parser.parse_args()