Browse Source

added inceptionV3 definition and weight loading logic

Dimitri Korsch 4 năm trước cách đây
mục cha
commit
1c21facb94

+ 4 - 1
cvmodelz/models/__init__.py

@@ -29,6 +29,8 @@ supported = dict(
 		pretrained.ResNet50,
 		pretrained.ResNet101,
 		pretrained.ResNet152,
+
+		pretrained.InceptionV3,
 	),
 )
 
@@ -55,5 +57,6 @@ if __name__ == '__main__':
 
 	# model = L.VGG19Layers(pretrained_model=None)
 	model = pretrained.ResNet35()
-	utils.print_model_info(model, input_size=224)
+	# print(model.pool)
+	utils.print_model_info(model)
 

+ 9 - 4
cvmodelz/models/pretrained/__init__.py

@@ -1,18 +1,23 @@
 from cvmodelz.models.pretrained.base import PretrainedModelMixin
-from cvmodelz.models.pretrained.vgg import VGG16
-from cvmodelz.models.pretrained.vgg import VGG19
-from cvmodelz.models.pretrained.resnet import ResNet35
-from cvmodelz.models.pretrained.resnet import ResNet50
+from cvmodelz.models.pretrained.inception.inception_v3 import InceptionV3
 from cvmodelz.models.pretrained.resnet import ResNet101
 from cvmodelz.models.pretrained.resnet import ResNet152
+from cvmodelz.models.pretrained.resnet import ResNet35
+from cvmodelz.models.pretrained.resnet import ResNet50
+from cvmodelz.models.pretrained.vgg import VGG16
+from cvmodelz.models.pretrained.vgg import VGG19
 
 
 __all__ = [
 	"PretrainedModelMixin",
+
 	"VGG16",
 	"VGG19",
+
 	"ResNet35",
 	"ResNet50",
 	"ResNet101",
 	"ResNet152",
+
+	"InceptionV3",
 ]

+ 0 - 0
cvmodelz/models/pretrained/inception/__init__.py


+ 202 - 0
cvmodelz/models/pretrained/inception/blocks.py

@@ -0,0 +1,202 @@
+import chainer
+import chainer.functions as F
+import chainer.links as L
+
+from chainer_addons.links import Conv2D_BN
+from chainer_addons.links.pooling import PoolingType
+
+
+class InceptionHead(chainer.Chain):
+	def __init__(self):
+		super(InceptionHead, self).__init__()
+		with self.init_scope():
+			# input 3 x 299 x 299
+			self.conv1 = Conv2D_BN(3,   32, ksize=3, stride=2)
+			# input 32 x 149 x 149
+			self.conv2 = Conv2D_BN(32,  32, ksize=3)
+			# input 32 x 147 x 147
+			self.conv3 = Conv2D_BN(32,  64, ksize=3, pad=1)
+			# input 64 x 147 x 147
+			self.pool4 = PoolingType.new("max", ksize=3, stride=2)
+
+			# input 64 x 73 x 73
+			self.conv5 = Conv2D_BN(64,  80, ksize=1)
+			# input 80 x 73 x 73
+			self.conv6 = Conv2D_BN(80, 192, ksize=3)
+			# input 192 x 71 x 71
+			self.pool7 = PoolingType.new("max", ksize=3, stride=2)
+			# output 192 x 35 x 35
+
+	def __call__(self, x):
+		x = self.conv1(x)
+		x = self.conv2(x)
+		x = self.conv3(x)
+		x = self.pool4(x)
+		x = self.conv5(x)
+		x = self.conv6(x)
+		x = self.pool7(x)
+		return x
+
+class Inception1(chainer.Chain):
+	def __init__(self, insize, sizes, outputs, **pool_args):
+		super(Inception1, self).__init__()
+
+		out1x1, out5x5, out3x3, out_pool = outputs
+		s5x5, s3x3_1, s3x3_2 = sizes
+
+		with self.init_scope():
+			self.conv1x1   = Conv2D_BN(insize, out1x1, ksize=1)
+
+			self.conv5x5_1 = Conv2D_BN(insize, s5x5,   ksize=1)
+			self.conv5x5_2 = Conv2D_BN(s5x5,   out5x5, ksize=5, pad=2)
+
+			self.conv3x3_1 = Conv2D_BN(insize, s3x3_1, ksize=1)
+			self.conv3x3_2 = Conv2D_BN(s3x3_1, s3x3_2, ksize=3, pad=1)
+			self.conv3x3_3 = Conv2D_BN(s3x3_2, out3x3, ksize=3, pad=1)
+
+			self.pool_conv = Conv2D_BN(insize, out_pool, ksize=1)
+		self.pool = PoolingType.new(**pool_args)
+
+	def __call__(self, x):
+
+		y0 = self.conv1x1(x)
+		y1 = self.conv5x5_2(self.conv5x5_1(x))
+		y2 = self.conv3x3_3(self.conv3x3_2(self.conv3x3_1(x)))
+		y3 = self.pool_conv(self.pool(x))
+		return F.concat([y0, y1, y2, y3])
+
+class Inception2(chainer.Chain):
+	def __init__(self, insize, sizes, outputs, **pool_args):
+		super(Inception2, self).__init__()
+
+		out1, out2 = outputs
+		size1, size2 = sizes
+		with self.init_scope():
+			self.conv3x3   = Conv2D_BN(insize, out1 , ksize=3, stride=2)
+
+			self.conv3x3_1 = Conv2D_BN(insize, size1, ksize=1)
+			self.conv3x3_2 = Conv2D_BN(size1,  size2, ksize=3, pad=1)
+			self.conv3x3_3 = Conv2D_BN(size2,  out2,  ksize=3, stride=2)
+
+		self.pool = PoolingType.new(**pool_args)
+
+	def __call__(self, x):
+		y0 = self.conv3x3(x)
+		y1 = self.conv3x3_3(self.conv3x3_2(self.conv3x3_1(x)))
+		y2 = self.pool(x)
+		return F.concat([y0, y1, y2])
+
+class Inception3(chainer.Chain):
+	def __init__(self, insize, sizes, outputs, **pool_args):
+		super(Inception3, self).__init__()
+
+		out1x1, out7x7, out7x7x2, out_pool = outputs
+		s7x7_1, s7x7_2, s7x7x2_1, s7x7x2_2, s7x7x2_3, s7x7x2_4 = sizes
+
+		with self.init_scope():
+			self.conv1x1 = Conv2D_BN(insize,       out1x1, ksize=1)
+
+			self.conv7x7_1 = Conv2D_BN(insize,     s7x7_1, ksize=1)
+			self.conv7x7_2 = Conv2D_BN(s7x7_1,     s7x7_2, ksize=(1,7), pad=(0,3))
+			self.conv7x7_3 = Conv2D_BN(s7x7_2,     out7x7, ksize=(7,1), pad=(3,0))
+
+			self.conv7x7x2_1 = Conv2D_BN(insize,   s7x7x2_1, ksize=1)
+			self.conv7x7x2_2 = Conv2D_BN(s7x7x2_1, s7x7x2_2, ksize=(7,1), pad=(3,0))
+			self.conv7x7x2_3 = Conv2D_BN(s7x7x2_2, s7x7x2_3, ksize=(1,7), pad=(0,3))
+			self.conv7x7x2_4 = Conv2D_BN(s7x7x2_3, s7x7x2_4, ksize=(7,1), pad=(3,0))
+			self.conv7x7x2_5 = Conv2D_BN(s7x7x2_4, out7x7x2, ksize=(1,7), pad=(0,3))
+
+			self.pool_conv = Conv2D_BN(insize, out_pool, ksize=1)
+
+		self.pool = PoolingType.new(**pool_args)
+
+	def __call__(self, x):
+		y0 = self.conv1x1(x)
+		y1 = self.conv7x7_3(self.conv7x7_2(self.conv7x7_1(x)))
+		y2 = self.conv7x7x2_5(self.conv7x7x2_4(self.conv7x7x2_3(self.conv7x7x2_2(self.conv7x7x2_1(x)))))
+		y3 = self.pool_conv(self.pool(x))
+
+		return F.concat([y0, y1, y2, y3])
+
+class Inception4(chainer.Chain):
+	def __init__(self, insize, sizes, outputs, **pool_args):
+		super(Inception4, self).__init__()
+
+		out3x3, out7x7 = outputs
+		s3x3, s7x7_1, s7x7_2, s7x7_3 = sizes
+
+		with self.init_scope():
+			self.conv3x3_1 = Conv2D_BN(insize, s3x3, ksize=1)
+			self.conv3x3_2 = Conv2D_BN(s3x3, out3x3, ksize=3, stride=2)
+
+			self.conv7x7_1 = Conv2D_BN(insize, s7x7_1, ksize=1)
+			self.conv7x7_2 = Conv2D_BN(s7x7_1, s7x7_2, ksize=(1, 7), pad=(0, 3))
+			self.conv7x7_3 = Conv2D_BN(s7x7_2, s7x7_3, ksize=(7, 1), pad=(3, 0))
+			self.conv7x7_4 = Conv2D_BN(s7x7_3, out7x7, ksize=3, stride=2)
+
+		self.pool = PoolingType.new(**pool_args)
+
+	def __call__(self, x):
+
+		y0 = self.conv3x3_2(self.conv3x3_1(x))
+		y1 = self.conv7x7_4(self.conv7x7_3(self.conv7x7_2(self.conv7x7_1(x))))
+		y2 = self.pool(x)
+
+		return F.concat([y0, y1, y2])
+
+class Inception5(chainer.Chain):
+	def __init__(self, insize, sizes, outputs, **pool_args):
+		super(Inception5, self).__init__()
+
+		out1x1, out3x3, out3x3x2, out_pool = outputs
+		s3x3, s3x3x2_1, s3x3x2_2 = sizes
+
+		with self.init_scope():
+
+			self.conv1x1 = Conv2D_BN(insize, out1x1, ksize=1)
+
+			self.conv3x3_1 = Conv2D_BN(insize, s3x3, ksize=1)
+			self.conv3x3_2 = Conv2D_BN(s3x3, out3x3, ksize=(1, 3), pad=(0,1))
+			self.conv3x3_3 = Conv2D_BN(s3x3, out3x3, ksize=(3, 1), pad=(1,0))
+
+			self.conv3x3x2_1 = Conv2D_BN(insize  , s3x3x2_1, ksize=1)
+			self.conv3x3x2_2 = Conv2D_BN(s3x3x2_1, s3x3x2_2, ksize=3, pad=1)
+			self.conv3x3x2_3 = Conv2D_BN(s3x3x2_2, out3x3x2, ksize=(1, 3), pad=(0,1))
+			self.conv3x3x2_4 = Conv2D_BN(s3x3x2_2, out3x3x2, ksize=(3, 1), pad=(1,0))
+
+			self.pool_conv = Conv2D_BN(insize, out_pool, ksize=1)
+
+		self.pool = PoolingType.new(**pool_args)
+
+
+	def __call__(self, x):
+		y0 = self.conv1x1(x)
+
+
+		y1 = self.conv3x3_1(x)
+		y1 = F.concat([self.conv3x3_2(y1), self.conv3x3_3(y1)])
+
+		y2 = self.conv3x3x2_2(self.conv3x3x2_1(x))
+		y2 = F.concat([self.conv3x3x2_3(y2), self.conv3x3x2_4(y2)])
+
+		y3 = self.pool_conv(self.pool(x))
+
+		return F.concat([y0, y1, y2, y3])
+
+
+class AuxilaryClassifier(chainer.Chain):
+	def __init__(self, n_classes):
+		super(AuxilaryClassifier, self).__init__()
+		with self.init_scope():
+			self.conv1 = Conv2D_BN(768, 128, ksize=1, pad=1)
+			self.conv2 = Conv2D_BN(128, 768, ksize=7)
+
+			self.fc = L.Linear(n_classes)
+
+
+	def __call__(self, x):
+		x = F.average_pooling_2d(x, ksize=5, stride=3)
+		x = self.conv1(x)
+		x = self.conv2(x)
+		return self.fc(x)
+

+ 236 - 0
cvmodelz/models/pretrained/inception/inception_v3.py

@@ -0,0 +1,236 @@
+import chainer
+import chainer.functions as F
+import chainer.links as L
+import numpy as np
+
+from chainercv.transforms import resize
+from chainercv.transforms import scale
+from collections import OrderedDict
+from collections.abc import Iterable
+from os.path import isfile
+# TODO: replace this!
+from chainer_addons.links.pooling import PoolingType
+
+
+from cvmodelz.models.meta_info import ModelInfo
+from cvmodelz.models.pretrained.base import PretrainedModelMixin
+from cvmodelz.models.pretrained.inception import blocks
+from cvmodelz.models.pretrained.inception import link_mappings
+
+
+def _assign(name, param, data):
+	assert data.shape == param.shape, \
+		"\"{}\" does not match the shape: {} != {}!".format(
+			name, data.shape, param.shape)
+	if isinstance(param, chainer.variable.Parameter):
+		param.data[:] = data
+	else:
+		param[:] = data
+
+def _assign_batch_norm(name, link, beta, avg_mean, avg_var):
+	_assign(name, link.beta, beta)
+	_assign(name, link.avg_mean, avg_mean)
+	_assign(name, link.avg_var, avg_var)
+
+
+class InceptionV3(PretrainedModelMixin, chainer.Chain):
+
+	def __init__(self, pretrained_model=None, aux_logits=False, *args, **kwargs):
+		self.aux_logits = aux_logits
+		pooling = PoolingType.G_AVG.value()
+		super(InceptionV3, self).__init__(*args, pooling=pooling, **kwargs)
+
+		if pretrained_model is not None and isfile(pretrained_model):
+			self.load(pretrained_model, strict=True)
+
+		self.meta = ModelInfo(
+			name="InceptionV3",
+			input_size=299,
+			feature_size=2048,
+			n_conv_maps=2048,
+
+			conv_map_layer="mixed10",
+			feature_layer="pool",
+
+			classifier_layers=["fc"],
+		)
+
+	def __call__(self, x, layer_name='fc'):
+		aux_logit = None
+		for key, funcs in self.functions.items():
+			for func in funcs:
+				x = func(x)
+
+				if chainer.config.train and self.aux_logits and func == self.mixed07:
+					aux_logit = self.aux(x)
+
+			if key == layer_name:
+				return x if aux_logit is None else (x, aux_logit)
+
+	@property
+	def functions(self):
+
+		names = ["mixed{:02d}".format(i) for i in range(11)]
+		body = [(name, [getattr(self, name)]) for name in names]
+		links = [
+			("head", [self.head]),
+		] + body + [
+			("pool", [self.pool]),
+			("fc", [self.fc]),
+		]
+
+		return OrderedDict(links)
+
+	def extract(self, x):
+		x = self.head(x)
+		x = self.mixed00(x)
+		x = self.mixed01(x)
+		x = self.mixed02(x)
+		x = self.mixed03(x)
+		x = self.mixed04(x)
+		x = self.mixed05(x)
+		x = self.mixed06(x)
+		x = self.mixed07(x)
+		x = self.mixed08(x)
+		x = self.mixed09(x)
+		x = self.mixed10(x)
+
+		return self.pool(x)
+
+
+	def load(self, weights, *args, **kwargs):
+		if weights.endswith(".h5"):
+			self._load_from_keras(weights)
+		elif weights.endswith(".ckpt.npz"):
+			self._load_from_ckpt_weights(weights)
+		else:
+			return super(InceptionV3, self).load(weights, *args, **kwargs)
+
+	def init_extra_layers(self, n_classes):
+		# input 3 x 299 x 299
+		self.head = blocks.InceptionHead()
+		# output 192 x 35 x 35
+
+
+		pool_args = dict(pool_type=PoolingType.TF_AVG, ksize=3, stride=1, pad=1)
+		# input 192 x 35 x 35
+		self.mixed00 = blocks.Inception1(insize=192, sizes=[48, 64, 96], outputs=[64, 64, 96, 32], **pool_args)
+		# input 256 x 35 x 35
+		self.mixed01 = blocks.Inception1(insize=256, sizes=[48, 64, 96], outputs=[64, 64, 96, 64], **pool_args)
+		# input 288 x 35 x 35
+		self.mixed02 = blocks.Inception1(insize=288, sizes=[48, 64, 96], outputs=[64, 64, 96, 64], **pool_args)
+
+		pool_args = dict(pool_type=PoolingType.MAX, ksize=3, stride=2, pad=0)
+		# input 288 x 35 x 35
+		self.mixed03 = blocks.Inception2(288, sizes=[64, 96], outputs=[384, 96], **pool_args)
+
+		# input 768 x 17 x 17
+		pool_args = dict(pool_type=PoolingType.TF_AVG, ksize=3, stride=1, pad=1)
+		# input 768 x 17 x 17
+		self.mixed04 = blocks.Inception3(768, sizes=[128] * 6, outputs=[192] * 4, **pool_args)
+		# input 768 x 17 x 17
+		self.mixed05 = blocks.Inception3(768, sizes=[160] * 6, outputs=[192] * 4, **pool_args)
+		# input 768 x 17 x 17
+		self.mixed06 = blocks.Inception3(768, sizes=[160] * 6, outputs=[192] * 4, **pool_args)
+		# input 768 x 17 x 17
+		self.mixed07 = blocks.Inception3(768, sizes=[192] * 6, outputs=[192] * 4, **pool_args)
+
+		self.aux = blocks.AuxilaryClassifier(n_classes) if self.aux_logits else F.identity
+
+		pool_args = dict(pool_type=PoolingType.MAX, ksize=3, stride=2, pad=0)
+		# input 768 x 17 x 17
+		self.mixed08 = blocks.Inception4(768, sizes=[192] * 4, outputs=[320, 192], **pool_args)
+		# output 1280 x 8 x 8
+
+		# input 1280 x 8 x 8
+		pool_args = dict(pool_type=PoolingType.TF_AVG, ksize=3, stride=1, pad=1)
+		# input 1280 x 8 x 8
+		self.mixed09 = blocks.Inception5(1280, sizes=[384, 448, 384], outputs=[320, 384, 384, 192], **pool_args)
+		# input 2048 x 8 x 8
+		self.mixed10 = blocks.Inception5(2048, sizes=[384, 448, 384], outputs=[320, 384, 384, 192], **pool_args)
+
+		# input 2048 x 8 x 8
+		# global average pooling
+		# output 2048 x 1 x 1
+		self.fc = L.Linear(2048, n_classes)
+
+	def loss(self, pred, gt, loss_func=F.softmax_cross_entropy, alpha=0.4):
+		if isinstance(pred, tuple):
+			pred0, aux_pred = pred
+		else:
+			pred0, aux_pred = pred, None
+
+		loss = loss_func(pred0, gt)
+		if aux_pred is None:
+			return loss
+		else:
+			aux_loss = loss_func(aux_pred, gt)
+			return (1-alpha) * loss + alpha * aux_loss
+
+	def accuracy(self, pred, gt):
+		if isinstance(pred, tuple):
+			pred0, aux_pred = pred
+		else:
+			pred0, aux_pred = pred, None
+		return F.accuracy(pred0, gt)
+
+	def _load_from_ckpt_weights(self, weights):
+		content = np.load(weights)
+		for name, link in self.namedlinks(skipself=True):
+			if name not in link_mappings.chainer_to_tf_ckpt:
+				continue
+
+			ckpt_key = link_mappings.chainer_to_tf_ckpt[name]
+
+			if isinstance(link, L.Convolution2D):
+				W = content["{}/weights".format(ckpt_key)]
+				W = W.transpose(3,2,0,1)
+				_assign(name, link.W, W)
+
+			elif isinstance(link, L.BatchNormalization):
+				beta = content["{}/beta".format(ckpt_key)]
+				avg_mean = content["{}/moving_mean".format(ckpt_key)]
+				avg_var = content["{}/moving_variance".format(ckpt_key)]
+				_assign_batch_norm(name, link, beta, avg_mean, avg_var)
+
+			elif isinstance(link, L.Linear):
+				W = content["{}/weights".format(ckpt_key)]
+				W = W.transpose(3,2,0,1).squeeze()
+				b = content["{}/biases".format(ckpt_key)]
+				_assign(name, link.W, W)
+				_assign(name, link.b, b)
+
+			else:
+				raise ValueError("Unkown link type: {}!".format(type(link)))
+
+	def _load_from_keras(self, weights):
+		import h5py
+		with h5py.File(weights, "r") as f:
+			if "model_weights" in f:
+				f = f["model_weights"]
+			for name, link in self.namedlinks(skipself=True):
+				if name not in link_mappings.chainer_to_keras: continue
+				keras_key = link_mappings.chainer_to_keras[name]
+
+				if isinstance(link, L.Convolution2D):
+					W = np.asarray(f[keras_key][keras_key + "/kernel:0"])
+					W = W.transpose(3,2,0,1)
+
+					_assign(name, link.W, W)
+
+				elif isinstance(link, L.Linear):
+					W = np.asarray(f[keras_key][keras_key + "/kernel:0"])
+					b = np.asarray(f[keras_key][keras_key + "/bias:0"])
+
+					_assign(name, link.W, W.transpose(1,0))
+					_assign(name, link.b, b)
+
+				elif isinstance(link, L.BatchNormalization):
+					beta = np.asarray(f[keras_key][keras_key + "/beta:0"])
+					avg_mean = np.asarray(f[keras_key][keras_key + "/moving_mean:0"])
+					avg_var = np.asarray(f[keras_key][keras_key + "/moving_variance:0"])
+
+					_assign_batch_norm(name, link, beta, avg_mean, avg_var)
+				else:
+					raise ValueError("Unkown link type: {}!".format(type(link)))
+

+ 0 - 0
cvmodelz/models/pretrained/inception/link_mappings.py