Ver código fonte

finalized the first stable (more or less) version

Dimitri Korsch 6 anos atrás
pai
commit
75b3b0f775

+ 18 - 5
finetune/classifier.py

@@ -2,11 +2,27 @@ import chainer
 import chainer.functions as F
 import chainer.links as L
 
-from chainer_addons.models.base import BaseClassifier
+from chainer_addons.models.classifier import Classifier as C
 import logging
 
-class SeparateModelClassifier(BaseClassifier):
+class Classifier(C):
+
+	def __init__(self, *args, **kwargs):
+		super(Classifier, self).__init__(*args, **kwargs)
+
+		assert hasattr(self, "model"), \
+			"This classifiert has no \"model\" attribute!"
+
+	@property
+	def feat_size(self):
+		if hasattr(self.model.pool, "output_dim") and self.model.pool.output_dim is not None:
+			return self.model.pool.output_dim
+
+		return self.model.meta.feature_size
+
+class SeparateModelClassifier(Classifier):
 	"""Classifier, that holds two separate models"""
+
 	def __init__(self, *args, **kwargs):
 		super(SeparateModelClassifier, self).__init__(*args, **kwargs)
 
@@ -15,9 +31,6 @@ class SeparateModelClassifier(BaseClassifier):
 
 	def init_separate_model(self):
 
-		assert hasattr(self, "model"), \
-			"This classifiert has no \"model\" attribute!"
-
 		if hasattr(self, "separate_model"):
 			logging.warn("Global Model already initialized! Skipping further execution!")
 			return

+ 2 - 0
finetune/finetuner/__init__.py

@@ -0,0 +1,2 @@
+from .base import DefaultFinetuner
+from .mpi import MPIFinetuner

+ 47 - 52
finetune/finetuner/base.py

@@ -20,12 +20,6 @@ from chainer_addons.functions import smoothed_cross_entropy
 from cvdatasets.annotations import AnnotationType
 from cvdatasets.utils import new_iterator
 
-from finetune.core.classifier import FVEMixin
-from finetune.core.classifier import BasePartsClassifier
-from finetune.core.training import AlphaPoolingTrainer
-from finetune.core.training import Trainer
-from finetune.core.dataset import Dataset
-
 from functools import partial
 from os.path import join
 
@@ -36,15 +30,19 @@ def check_param_for_decay(param):
 	return param.name != "alpha"
 
 
+def _format_kwargs(kwargs):
+	return " ".join([f"{key}={value}" for key, value in kwargs.items()])
+
 class _ModelMixin(abc.ABC):
 	"""This mixin is responsible for optimizer creation, model creation,
 	model wrapping around a classifier and model weights loading.
 	"""
 
-	def __init__(self, classifier_cls, classifier_kwargs, *args, **kwargs):
+	def __init__(self, classifier_cls, classifier_kwargs={}, model_kwargs={}, *args, **kwargs):
 		super(_ModelMixin, self).__init__(*args, **kwargs)
-		self.classifier_kwargs = classifier_kwargs
 		self.classifier_cls = classifier_cls
+		self.classifier_kwargs = classifier_kwargs
+		self.model_kwargs = model_kwargs
 
 	def wrap_model(self, opts):
 
@@ -55,10 +53,9 @@ class _ModelMixin(abc.ABC):
 			loss_func=self._loss_func(opts),
 			**kwargs)
 
-		kwargs_info = " ".join([f"{key}={value}" for key, value in kwargs.items()])
 		logging.info(" ".join([
 			f"Wrapped the model around {clf_class.__name__}",
-			f"with kwargs: {kwargs_info}",
+			f"with kwargs: {_format_kwargs(kwargs)}",
 		]))
 
 	def _loss_func(self, opts):
@@ -115,12 +112,13 @@ class _ModelMixin(abc.ABC):
 		self.model = ModelType.new(
 			model_type=self.model_info.class_key,
 			input_size=opts.input_size,
-			pooling=opts.pooling,
-			pooling_params=dict(
-				init_alpha=opts.init_alpha,
-				output_dim=8192,
-				normalize=opts.normalize),
-			aux_logits=False
+			**self.model_kwargs,
+			# pooling=opts.pooling,
+			# pooling_params=dict(
+			# 	init_alpha=opts.init_alpha,
+			# 	output_dim=8192,
+			# 	normalize=opts.normalize),
+			# aux_logits=False
 		)
 
 	def load_model_weights(self, args):
@@ -144,14 +142,7 @@ class _ModelMixin(abc.ABC):
 				loader = partial(self.model.load_for_finetune, weights=self.weights)
 
 
-		if hasattr(self.model.pool, "output_dim") and self.model.pool.output_dim is not None:
-			feat_size = self.model.pool.output_dim
-
-		elif isinstance(self.clf, (BasePartsClassifier, FVEMixin)):
-			feat_size = self.clf.outsize
-
-		else:
-			feat_size = self.model.meta.feature_size
+		feat_size = self.clf.feat_size
 
 		if hasattr(self.clf, "loader"):
 			loader = self.clf.loader(loader)
@@ -180,10 +171,11 @@ class _DatasetMixin(abc.ABC):
 			subset=subset,
 			dataset_cls=self.dataset_cls,
 		)
-		if opts.use_parts:
-			kwargs.update(dict(
-				no_glob=opts.no_global,
-			))
+
+		# if opts.use_parts:
+		# 	kwargs.update(dict(
+		# 		no_glob=opts.no_global,
+		# 	))
 
 		if not opts.only_head:
 			kwargs.update(dict(
@@ -192,13 +184,12 @@ class _DatasetMixin(abc.ABC):
 				size=size,
 				center_crop_on_val=not opts.no_center_crop_on_val,
 
-				# return_part_crops=args.use_parts,
 			))
 
 		d = self.annot.new_dataset(**kwargs)
 		logging.info("Loaded {} images".format(len(d)))
 		logging.info("Data augmentation is {}abled".format("en" if augment else "dis"))
-		logging.info("Global feature is {}used".format("not " if opts.no_global else ""))
+		# logging.info("Global feature is {}used".format("not " if opts.no_global else ""))
 		return d
 
 	def init_annotations(self, opts):
@@ -218,7 +209,7 @@ class _DatasetMixin(abc.ABC):
 
 		self.dataset_cls.label_shift = opts.label_shift
 
-		size = 112 if opts.recurrent else self.model.meta.input_size
+		size = self.model.meta.input_size
 
 		self.prepare = partial(PrepareType[opts.prepare_type](self.model),
 			swap_channels=opts.swap_channels,
@@ -251,16 +242,25 @@ class _TrainerMixin(abc.ABC):
 	Furthermore, it implements the run method
 	"""
 
-	def init_updater(self, updater_cls=StandardUpdater, updater_kwargs={}):
-		"""Creates an updater from training iterator and the optimizer."""
+	def __init__(self, updater_cls, updater_kwargs={}, *args, **kwargs):
+		super(_TrainerMixin, self).__init__(*args, **kwargs)
+		self.updater_cls = updater_cls
+		self.updater_kwargs = updater_kwargs
 
-		self.updater = updater_cls(
+	def init_updater(self):
+		"""Creates an updater from training iterator and the optimizer."""
+		self.updater = self.updater_cls(
 			iterator=self.train_iter,
 			optimizer=self.opt,
 			device=self.device,
-			**updater_kwargs,
+			**self.updater_kwargs,
+		)
+		logging.info(" ".join([
+			f"Using single GPU: {self.device}."
+			f"{self.updater_cls.__name__} is initialized",
+			f"with following kwargs: {_format_kwargs(self.updater_kwargs)}"
+			])
 		)
-		logging.info(f"Using single GPU: {self.device}. {updater_cls.__name__} is initialized.")
 
 	def init_evaluator(self, default_name="val"):
 		"""Creates evaluation extension from validation iterator and the classifier."""
@@ -272,16 +272,14 @@ class _TrainerMixin(abc.ABC):
 
 		self.evaluator.default_name = default_name
 
-	def run(self, opts, ex, no_observe=False):
+	def run(self, trainer_cls, opts, *args, **kwargs):
 
-		trainer_cls = AlphaPoolingTrainer if opts.pooling=="alpha" else Trainer
 		trainer = trainer_cls(
-			ex=ex,
 			opts=opts,
 			updater=self.updater,
 			evaluator=self.evaluator,
 			weights=self.weights,
-			no_observe=no_observe
+			*args, **kwargs
 		)
 		def dump(suffix):
 			if opts.only_eval or opts.no_snapshot:
@@ -309,21 +307,11 @@ class DefaultFinetuner(_ModelMixin, _DatasetMixin, _TrainerMixin):
 
 	"""
 
-
 	def __init__(self, opts, *args, **kwargs):
-		super(BaseFinetuner, self).__init__()
+		super(DefaultFinetuner, self).__init__(*args, **kwargs)
 
 		self.gpu_config(opts, *args, **kwargs)
 
-	def gpu_config(self, opts):
-		if -1 in opts.gpu:
-			self.device = -1
-		else:
-			self.device = opts.gpu[0]
-		cuda.get_device_from_id(self.device).use()
-
-	def setup(self, opts, updater_cls, updater_kwargs):
-
 		self.init_annotations(opts)
 		self.init_model(opts)
 
@@ -334,6 +322,13 @@ class DefaultFinetuner(_ModelMixin, _DatasetMixin, _TrainerMixin):
 		self.load_model_weights(opts)
 
 		self.init_optimizer(opts)
-		self.init_updater(updater_cls=updater_cls, updater_kwargs=updater_kwargs)
+		self.init_updater()
 		self.init_evaluator()
 
+	def gpu_config(self, opts, *args, **kwargs):
+		if -1 in opts.gpu:
+			self.device = -1
+		else:
+			self.device = opts.gpu[0]
+		cuda.get_device_from_id(self.device).use()
+

+ 5 - 5
finetune/finetuner/mpi.py

@@ -1,6 +1,9 @@
-from .base import BaseFinetuner
+import chainermn
+from chainermn import scatter_dataset as scatter
 
-class MPIFinetuner(BaseFinetuner):
+from .base import DefaultFinetuner
+
+class MPIFinetuner(DefaultFinetuner):
 
 	@property
 	def mpi(self):
@@ -21,7 +24,6 @@ class MPIFinetuner(BaseFinetuner):
 
 	def scatter_datasets(self):
 		if self.mpi:
-			from chainermn import scatter_dataset as scatter
 			self.train_data = scatter(self.train_data, self.comm)
 			self.val_data = scatter(self.val_data, self.comm)
 
@@ -39,14 +41,12 @@ class MPIFinetuner(BaseFinetuner):
 		super(MPIFinetuner, self).init_optimizer(opts)
 
 		if self.mpi:
-			import chainermn
 			self.opt = chainermn.create_multi_node_optimizer(self.opt, self.comm)
 
 	def init_evaluator(self):
 		super(MPIFinetuner, self).init_evaluator()
 
 		if self.mpi:
-			import chainermn
 			self.evaluator = chainermn.create_multi_node_evaluator(
 				self.evaluator, self.comm)
 

+ 77 - 0
finetune/parser.py

@@ -0,0 +1,77 @@
+import os
+
+from chainer_addons.training import OptimizerType
+from chainer_addons.models import PrepareType
+
+from cvargparse import Arg, ArgFactory
+from cvdatasets.utils import read_info_file
+
+DEFAULT_INFO_FILE=os.environ.get("DATA", "/home/korsch/Data/info.yml")
+
+info_file = read_info_file(DEFAULT_INFO_FILE)
+
+def default_factory(extra_list=[]):
+	return ArgFactory(extra_list + [
+
+			Arg("data", default=DEFAULT_INFO_FILE),
+
+			Arg("dataset", choices=info_file.DATASETS.keys()),
+			Arg("parts", choices=info_file.PARTS.keys()),
+			Arg("--model_type", "-mt",
+				default="resnet", choices=info_file.MODELS.keys(),
+				help="type of the model"),
+
+			Arg("--input_size", type=int, nargs="+", default=0,
+				help="overrides default input size of the model, if greater than 0"),
+
+			PrepareType.as_arg("prepare_type",
+				help_text="type of image preprocessing"),
+
+			Arg("--load", type=str, help="ignore weights and load already fine-tuned model"),
+
+			Arg("--n_jobs", "-j", type=int, default=0,
+				help="number of loading processes. If 0, then images are loaded in the same process"),
+
+			Arg("--warm_up", type=int, help="warm up epochs"),
+
+			OptimizerType.as_arg("optimizer", "opt",
+				help_text="type of the optimizer"),
+
+			Arg("--cosine_schedule", action="store_true",
+				help="enable cosine annealing LR schedule"),
+
+			Arg("--l1_loss", action="store_true",
+				help="(only with \"--only_head\" option!) use L1 Hinge Loss instead of Softmax Cross-Entropy"),
+
+			Arg("--from_scratch", action="store_true",
+				help="Do not load any weights. Train the model from scratch"),
+
+			Arg("--label_shift", type=int, default=1,
+				help="label shift"),
+
+			Arg("--swap_channels", action="store_true",
+				help="preprocessing option: swap channels from RGB to BGR"),
+
+			Arg("--label_smoothing", type=float, default=0,
+				help="Factor for label smoothing"),
+
+			Arg("--no_center_crop_on_val", action="store_true",
+				help="do not center crop imaages in the validation step!"),
+
+			Arg("--only_head", action="store_true", help="fine-tune only last layer"),
+			Arg("--no_progress", action="store_true", help="dont show progress bar"),
+			Arg("--augment", action="store_true", help="do data augmentation (random croping and random hor. flipping)"),
+			Arg("--force_load", action="store_true", help="force loading from caffe model"),
+			Arg("--only_eval", action="store_true", help="evaluate the model only. do not train!"),
+			Arg("--init_eval", action="store_true", help="evaluate the model before training"),
+			Arg("--no_snapshot", action="store_true", help="do not save trained model"),
+
+			Arg("--output", "-o", type=str, default=".out", help="output folder"),
+
+		])\
+		.seed()\
+		.batch_size()\
+		.epochs()\
+		.debug()\
+		.learning_rate(lr=1e-2, lrs=10, lrt=1e-5, lrd=1e-1)\
+		.weight_decay(default=5e-4)

+ 1 - 0
finetune/training/__init__.py

@@ -0,0 +1 @@
+from .trainer import Trainer

+ 236 - 0
finetune/training/trainer.py

@@ -0,0 +1,236 @@
+import logging
+from os.path import join, basename
+from datetime import datetime
+
+import chainer
+from chainer.training import extensions, Trainer as T
+from chainer_addons.training import lr_shift
+from chainer_addons.training.optimizer import OptimizerType
+from chainer_addons.training.extensions import SacredReport
+from chainer_addons.training.extensions.learning_rate import CosineAnnealingLearningRate
+from chainer_addons.training.extensions import AlternateTrainable, SwitchTrainables, WarmUp
+
+from cvdatasets.utils import attr_dict
+
+def debug_hook(trainer):
+	pass
+	# print(trainer.updater.get_optimizer("main").target.model.fc6.W.data.mean(), file=open("debug.out", "a"))
+
+default_intervals = attr_dict(
+	print =		(1,  'epoch'),
+	log =		(1,  'epoch'),
+	eval =		(1,  'epoch'),
+	snapshot =	(10, 'epoch'),
+)
+
+def observe_alpha(trainer):
+	model = trainer.updater.get_optimizer("main").target.model
+	return float(model.pool.alpha.array)
+
+def _is_adam(opts):
+	return opts.optimizer == OptimizerType.ADAM.name.lower()
+
+class Trainer(T):
+	_default_base_model = "model"
+
+	def __init__(self, opts, updater, evaluator=None, weights=None, intervals=default_intervals, no_observe=False):
+
+		self._only_eval = opts.only_eval
+		if weights is None or weights == "auto":
+			self.base_model = self._default_base_model
+		else:
+			self.base_model, _, _ = basename(weights).rpartition(".")
+
+		optimizer = updater.get_optimizer("main")
+		# adam has some specific attributes, so we need to check this
+		is_adam = _is_adam(opts)
+		clf = optimizer.target
+		model = clf.model
+
+		outdir = self.output_directory(opts)
+		logging.info("Training outputs are saved under \"{}\"".format(outdir))
+
+		super(Trainer, self).__init__(
+			updater=updater,
+			stop_trigger=(opts.epochs, 'epoch'),
+			out=outdir
+		)
+
+		### Evaluator ###
+		if evaluator is not None:
+			self.extend(evaluator, trigger=intervals.eval)
+
+		### Warm up ###
+		lr_offset = 0
+		if opts.warm_up:
+			assert opts.warm_up > 0, "Warm-up argument must be positive!"
+			lr_offset = opts.warm_up
+
+			warm_up_lr = opts.learning_rate
+			logging.info("Warm-up of {} epochs enabled!".format(opts.warm_up))
+			self.extend(WarmUp(
+				opts.warm_up, model,
+				opts.learning_rate, warm_up_lr))
+
+
+		### LR shift ###
+		if opts.cosine_schedule:
+			lr_shift_ext = CosineAnnealingLearningRate(
+				attr="alpha" if is_adam else "lr",
+				lr=opts.learning_rate,
+				target=opts.lr_target,
+				epochs=opts.epochs,
+				offset=lr_offset
+			)
+			self.extend(lr_shift_ext)
+		else:
+			lr_shift_ext = lr_shift(optimizer,
+				init=opts.learning_rate,
+				rate=opts.lr_decrease_rate, target=opts.lr_target)
+			self.extend(lr_shift_ext, trigger=(opts.lr_shift, 'epoch'))
+
+		### Code below is only for "main" Trainers ###
+		if no_observe: return
+
+		self.extend(extensions.observe_lr(), trigger=intervals.log)
+		self.extend(extensions.LogReport(trigger=intervals.log))
+
+		### Snapshotting ###
+		self.setup_snapshots(opts, clf.model, intervals.snapshot)
+
+		### Reports and Plots ###
+		print_values, plot_values = self.reportables(opts, model, evaluator)
+		self.extend(extensions.PrintReport(print_values), trigger=intervals.print)
+		for name, values in plot_values.items():
+			ext = extensions.PlotReport(values, 'epoch', file_name='{}.png'.format(name))
+			self.extend(ext)
+
+		### Progress bar ###
+		if not opts.no_progress:
+			self.extend(extensions.ProgressBar(update_interval=1))
+
+	def setup_snapshots(self, opts, obj, trigger):
+
+		if opts.no_snapshot:
+			logging.warning("Models are not snapshot!")
+		else:
+			dump_fmt = "ft_model_epoch{0.updater.epoch:03d}.npz"
+			self.extend(extensions.snapshot_object(obj, dump_fmt), trigger=trigger)
+			logging.info("Snapshot format: \"{}\"".format(dump_fmt))
+
+
+	def reportables(self, opts, model, evaluator):
+		eval_name = lambda name: f"{evaluator.default_name}/{name}"
+
+
+		print_values = [
+			"elapsed_time",
+			"epoch",
+			# "lr",
+
+			"main/accuracy", eval_name("main/accuracy"),
+			"main/loss", eval_name("main/loss"),
+
+		]
+
+		plot_values = {
+			"accuracy": [
+				"main/accuracy",  eval_name("main/accuracy"),
+			],
+			"loss": [
+				"main/loss", eval_name("main/loss"),
+			],
+		}
+
+		# if opts.triplet_loss:
+		# 	print_values.extend(["main/t_loss", eval_name("main/t_loss")])
+		# 	plot_values.update({
+		# 		"t_loss": [
+		# 			"main/t_loss", eval_name("main/t_loss"),
+		# 		]
+		# 	})
+
+		# if opts.use_parts:
+		# 	print_values.extend(["main/logL", eval_name("main/logL")])
+		# 	plot_values.update({
+		# 		"logL": [
+		# 			"main/logL", eval_name("main/logL"),
+		# 		]
+		# 	})
+
+		# 	if not opts.no_global:
+		# 		print_values.extend([
+		# 			"main/glob_accu", eval_name("main/glob_accu"),
+		# 			# "main/glob_loss", eval_name("main/glob_loss"),
+
+		# 			"main/part_accu", eval_name("main/part_accu"),
+		# 			# "main/part_loss", eval_name("main/part_loss"),
+		# 		])
+
+		# 		plot_values["accuracy"].extend([
+		# 			"main/part_accu", eval_name("main/part_accu"),
+		# 			"main/glob_accu", eval_name("main/glob_accu"),
+		# 		])
+
+		# 		plot_values["loss"].extend([
+		# 			"main/part_loss", eval_name("main/part_loss"),
+		# 			"main/glob_loss", eval_name("main/glob_loss"),
+		# 		])
+
+
+		return print_values, plot_values
+
+
+	def output_directory(self, opts):
+
+		result = opts.output
+
+		if self.base_model != self._default_base_model:
+			result = join(result, self.base_model)
+
+		result = join(result, datetime.now().strftime("%Y-%m-%d-%H.%M.%S"))
+		return result
+
+	def run(self, init_eval=True):
+		if init_eval:
+			logging.info("Evaluating initial model ...")
+			evaluator = self.get_extension("val")
+			init_perf = evaluator(self)
+			logging.info("Initial accuracy: {val/main/accuracy:.3%} initial loss: {val/main/loss:.3f}".format(
+				**{key: float(value) for key, value in init_perf.items()}
+			))
+		if self._only_eval:
+			return
+		return super(Trainer, self).run()
+
+class SacredTrainer(Trainer):
+	def __init__(self, ex, *args, **kwargs):
+		super(SacredTrainer, self).__init__(*args, **kwargs)
+		self.extend(SacredReport(ex=ex, trigger=intervals.log))
+
+class AlphaPoolingTrainer(SacredTrainer):
+
+	def __init__(self, opts, updater, *args, **kwargs):
+		super(AlphaPoolingTrainer, self).__init__(opts=opts, updater=updater, *args, **kwargs)
+		model = updater.get_optimizer("main").target.model
+		### Alternating training of CNN and FC layers (only for alpha-pooling) ###
+		if opts.switch_epochs:
+			self.extend(SwitchTrainables(
+				opts.switch_epochs,
+				model=model,
+				pooling=model.pool))
+
+	def reportables(self, opts, model, evaluator):
+		print_values, plot_values = super(AlphaPoolingTrainer, self).reportables(opts, model, evaluator)
+		alpha_update_rule = model.pool.alpha.update_rule
+		if _is_adam(opts):
+			# in case of Adam optimizer
+			alpha_update_rule.hyperparam.alpha *= opts.kappa
+		else:
+			alpha_update_rule.hyperparam.lr *= opts.kappa
+
+		self.extend(extensions.observe_value("alpha", observe_alpha), trigger=intervals.print)
+		print_values.append("alpha")
+		plot_values["alpha"]= ["alpha"]
+
+		return print_values, plot_values