# ResNet ## 残差快 ```python import tensorflow as tf class Residual(tf.keras.Model): def __init__(self, num_channels, use_1x1conv=False, strides=1): super(Residual, self).__init__() self.conv1 = tf.keras.layers.Conv2D(num_channels, kernel_size=3, strides=strides, padding='same') self.conv2 = tf.keras.layers.Conv2D(num_channels, kernel_size=3, strides=strides, padding='same') self.conv3=None if use_1x1conv: self.conv3=tf.keras.layers.Conv2D(num_channels, kernel_size=1, strides=strides) self.bn1 = tf.keras.layers.BatchNormalization() self.bn2 = tf.keras.layers.BatchNormalization() def call(self, X): Y = tf.keras.activations.relu(self.bn1(self.conv1(X))) Y = self.bn2(self.conv2(Y)) if self.conv3 is not None: X = self.conv3(x) Y += X return tf.keras.activations.relu(Y) ``` ```python blk = Residual(3) X = tf.random.uniform((4, 6, 6, 3)) Y = blk(X) Y.shape ``` ## ResNet模型 ResNet 的前两层跟之前介绍的 GoogLeNet 中的一样: 在输出通道数为 64、步幅为 2 的 7×77×7 卷积层后,接步幅为 2 的 3×33×3 的最大汇聚层。 不同之处在于 ResNet 每个卷积层后增加了批量归一化层。 ```python b1 = tf.keras.models.Sequential([ tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same'), tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.Maxpool2D(pool_size=3, strides=2, padding='same'), ]) ``` GoogLeNet 在后面接了 4 个由Inception块组成的模块。 ResNet 则使用 4 个由残差块组成的模块,每个模块使用若干个同样输出通道数的残差块。 第一个模块的通道数同输入通道数一致。 由于之前已经使用了步幅为 2 的最大汇聚层,所以无须减小高和宽。 之后的每个模块在第一个残差块里将上一个模块的通道数翻倍,并将高和宽减半。 ```python class ResnetBlock(tf.keras.layers.Layer): def __init__(self, num_channels, num_residuals, first_block=False, **kwargs): super(ResnetBlock, self).__init__(**kwargs) self.residual_layers = [] for i in range(num_residuals): if i == 0 and not first_block: self.residual_layers.append(Residual(num_channels, use_1x1conv=True, strides=2)) else: self.residual_layers.append(Residual(num_channels)) def call(self, X): for layer in self.residual_layers.layers: X = layer(X) return X ``` ```python b2 = ResnetBlock(64, 2, first_block=True) b3 = ResnetBlock(128, 2) b4 = ResnetBlock(256, 2) b5 = ResnetBlock(512, 2) ``` ```python def net(): return tf.keras.Sequential([ tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same'), tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.Maxpool2D(pool_size=3, strides=2, padding='same'), ResnetBlock(64, 2, first_block=True), ResnetBlock(128, 2), ResnetBlock(256, 2), ResnetBlock(512, 2), tf.keras.layers.GlobalAvgPool2D(), tf.keras.layers.Dense(units=10) ]) ```