|
@@ -0,0 +1,105 @@
|
|
|
+import logging
|
|
|
+
|
|
|
+import chainer
|
|
|
+import chainer.functions as F
|
|
|
+import chainer.links as L
|
|
|
+
|
|
|
+from chainer_addons.links.fisher_encoding import FVLayer
|
|
|
+from finetune.classifier import SeparateModelClassifier
|
|
|
+
|
|
|
+
|
|
|
+class FVEClassifier(SeparateModelClassifier):
|
|
|
+
|
|
|
+ def __init__(self, n_comps=2**4, fv_insize=256, alpha=0.99,
|
|
|
+ *args, **kwargs):
|
|
|
+ super(FVEClassifier, self).__init__(*args, **kwargs)
|
|
|
+
|
|
|
+ with self.init_scope():
|
|
|
+
|
|
|
+ if fv_insize < 1:
|
|
|
+ self.pre_fv = F.identity
|
|
|
+ fv_insize = self.model.meta.feature_size
|
|
|
+ else:
|
|
|
+ self.pre_fv = L.Convolution2D(
|
|
|
+ self.model.meta.feature_size,
|
|
|
+ fv_insize,
|
|
|
+ ksize=1)
|
|
|
+
|
|
|
+ self.pre_fv_bn = L.BatchNormalization(fv_insize)
|
|
|
+ self.fv_encoding = FVLayer(
|
|
|
+ fv_insize, n_comps,
|
|
|
+ alpha=alpha)
|
|
|
+
|
|
|
+ self.fv_insize = fv_insize
|
|
|
+ self.n_comps = n_comps
|
|
|
+
|
|
|
+ def __call__(self, *inputs):
|
|
|
+ parts, X, y = inputs
|
|
|
+
|
|
|
+ n, t, c, h, w = parts.shape
|
|
|
+ _parts = parts.reshape(n*t, c, h, w)
|
|
|
+ part_convs, _ = self.model(_parts, layer_name=self.model.meta.conv_map_layer)
|
|
|
+
|
|
|
+ part_local_feats = self.pre_fv_bn(self.pre_fv(part_convs))
|
|
|
+
|
|
|
+ n0, n_feats, conv_h, conv_w = part_local_feats.shape
|
|
|
+
|
|
|
+ part_local_feats = F.reshape(part_local_feats, (n, t, n_feats, conv_h, conv_w))
|
|
|
+
|
|
|
+ # N x T x C x H x W -> N x T x H x W x C
|
|
|
+ part_local_feats = F.transpose(part_local_feats, (0, 1, 3, 4, 2))
|
|
|
+ # N x T x H x W x C -> N x T*H*W x C
|
|
|
+ part_local_feats = F.reshape(part_local_feats, (n, t*conv_h*conv_w, n_feats))
|
|
|
+
|
|
|
+ logits = self.fv_encoding(part_local_feats)
|
|
|
+
|
|
|
+ logL, _ = self.fv_encoding.log_proba(part_local_feats, weighted=True)
|
|
|
+
|
|
|
+ # may be used later to maximize the log-likelihood
|
|
|
+ self.neg_logL = -F.mean(logL)
|
|
|
+
|
|
|
+ # avarage over all local features
|
|
|
+ avg_logL = F.logsumexp(logL) - self.xp.log(logL.size)
|
|
|
+
|
|
|
+ part_pred = self.model.clf_layer(logits)
|
|
|
+ part_loss = self.loss(part_pred, y)
|
|
|
+ part_accu = self.model.accuracy(part_pred, y)
|
|
|
+
|
|
|
+ self.report(
|
|
|
+ part_accu=part_accu,
|
|
|
+ part_loss=part_loss,
|
|
|
+ logL=avg_logL
|
|
|
+ )
|
|
|
+
|
|
|
+ glob_loss, glob_pred = self.predict_global(X, y)
|
|
|
+
|
|
|
+ pred = part_pred + glob_pred
|
|
|
+ accuracy = self.model.accuracy(pred, y)
|
|
|
+ loss = self.loss(pred, y)
|
|
|
+
|
|
|
+ self.report(
|
|
|
+ loss = loss.array,
|
|
|
+ accuracy = accuracy.array,
|
|
|
+
|
|
|
+ )
|
|
|
+ return loss
|
|
|
+
|
|
|
+ def predict_global(self, X, y):
|
|
|
+ glob_pred, _ = self.separate_model(X)
|
|
|
+ glob_loss = self.loss(glob_pred, y)
|
|
|
+ glob_accu = self.separate_model.accuracy(glob_pred, y)
|
|
|
+
|
|
|
+ self.report(
|
|
|
+ glob_loss = glob_loss.data,
|
|
|
+ glob_accu = glob_accu.data,
|
|
|
+ )
|
|
|
+ return glob_loss, glob_pred
|
|
|
+
|
|
|
+
|
|
|
+ @property
|
|
|
+ def feat_size(self):
|
|
|
+ return self.model.meta.feature_size
|
|
|
+
|
|
|
+ @property
|
|
|
+ def output_size(self):
|
|
|
+ return self.fv_insize * self.n_comps * 2
|