ResNet Paper
Kaiming He 何恺明 领域:cv & DL
Residual Networks (ResNet) – Deep Learning
7.6. Residual Networks (ResNet)
背景
深度网络的退化问题
当神经网络的层数不断增多时,会产生梯度爆炸/消失(Vanishing/Exploding gradient)的问题,如果层数再增加,训练和测试误差也会跟着增加。
ResNet,2015年,微软研究院,何恺明引进了残差网络(Residual Network)概念
- instead of layers learn the underlying mapping, we allow network fit the residual mapping. So, instead of say H(x), initial mapping, _let the network fit, _F(x) := H(x) – x _which gives _H(x) := F(x) + x.
- if any layer hurt the performance of architecture then it will be skipped by regularization
网络架构
ResNet50的TensorFlow实现
class ResNet50(object):def __init__(self, inputs, num_classes=1000, is_training=True,scope="resnet50"):self.inputs =inputsself.is_training = is_trainingself.num_classes = num_classeswith tf.variable_scope(scope):# construct the modelnet = conv2d(inputs, 64, 7, 2, scope="conv1") # -> [batch, 112, 112, 64]net = tf.nn.relu(batch_norm(net, is_training=self.is_training, scope="bn1"))net = max_pool(net, 3, 2, scope="maxpool1") # -> [batch, 56, 56, 64]net = self._block(net, 256, 3, init_stride=1, is_training=self.is_training,scope="block2") # -> [batch, 56, 56, 256]net = self._block(net, 512, 4, is_training=self.is_training, scope="block3")# -> [batch, 28, 28, 512]net = self._block(net, 1024, 6, is_training=self.is_training, scope="block4")# -> [batch, 14, 14, 1024]net = self._block(net, 2048, 3, is_training=self.is_training, scope="block5")# -> [batch, 7, 7, 2048]net = avg_pool(net, 7, scope="avgpool5") # -> [batch, 1, 1, 2048]net = tf.squeeze(net, [1, 2], name="SpatialSqueeze") # -> [batch, 2048]self.logits = fc(net, self.num_classes, "fc6") # -> [batch, num_classes]self.predictions = tf.nn.softmax(self.logits)def _block(self, x, n_out, n, init_stride=2, is_training=True, scope="block"):with tf.variable_scope(scope):h_out = n_out // 4out = self._bottleneck(x, h_out, n_out, stride=init_stride,is_training=is_training, scope="bottlencek1")for i in range(1, n):out = self._bottleneck(out, h_out, n_out, is_training=is_training,scope=("bottlencek%s" % (i + 1)))return outdef _bottleneck(self, x, h_out, n_out, stride=None, is_training=True, scope="bottleneck"):""" A residual bottleneck unit"""n_in = x.get_shape()[-1]if stride is None:stride = 1 if n_in == n_out else 2with tf.variable_scope(scope):h = conv2d(x, h_out, 1, stride=stride, scope="conv_1")h = batch_norm(h, is_training=is_training, scope="bn_1")h = tf.nn.relu(h)h = conv2d(h, h_out, 3, stride=1, scope="conv_2")h = batch_norm(h, is_training=is_training, scope="bn_2")h = tf.nn.relu(h)h = conv2d(h, n_out, 1, stride=1, scope="conv_3")h = batch_norm(h, is_training=is_training, scope="bn_3")if n_in != n_out:shortcut = conv2d(x, n_out, 1, stride=stride, scope="conv_4")shortcut = batch_norm(shortcut, is_training=is_training, scope="bn_4")else:shortcut = xreturn tf.nn.relu(shortcut + h)
