导入模块
import os,glob
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from matplotlib import patches
import imgaug as ia
from imgaug import augmenters as iaa
创建网络模型
class SpaceToDepth(layers.Layer):
def __init__(self, block_size, **kwargs):
self.block_size = block_size
super(SpaceToDepth, self).__init__(**kwargs)
def call(self, inputs):
x = inputs
batch, height, width, depth = K.int_shape(x)
batch = -1
reduced_height = height // self.block_size
reduced_width = width // self.block_size
y = K.reshape(x, (batch, reduced_height, self.block_size,
reduced_width, self.block_size, depth))
z = K.permute_dimensions(y, (0, 1, 3, 2, 4, 5))
t = K.reshape(z, (batch, reduced_height, reduced_width, depth * self.block_size **2))
return t
def compute_output_shape(self, input_shape):
shape = (input_shape[0], input_shape[1] // self.block_size, input_shape[2] // self.block_size,
input_shape[3] * self.block_size **2)
return tf.TensorShape(shape)
# 3.1
input_image = layers.Input((IMGSZ,IMGSZ, 3), dtype='float32')
# unit1
x = layers.Conv2D(32, (3,3), strides=(1,1),padding='same', name='conv_1', use_bias=False)(input_image)
x = layers.BatchNormalization(name='norm_1')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)
# unit2
x = layers.Conv2D(64, (3,3), strides=(1,1), padding='same', name='conv_2',use_bias=False)(x)
x = layers.BatchNormalization(name='norm_2')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)
# Layer 3
x = layers.Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_3', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_3')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 4
x = layers.Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_4', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_4')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 5
x = layers.Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_5', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_5')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
# Layer 6
x = layers.Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_6')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 7
x = layers.Conv2D(128, (1,1), strides=(1,1), padding='same', name='conv_7', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_7')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 8
x = layers.Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_8', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_8')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
# Layer 9
x = layers.Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_9', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_9')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 10
x = layers.Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_10', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_10')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 11
x = layers.Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_11', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_11')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 12
x = layers.Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_12', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_12')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 13
x = layers.Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_13', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_13')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# for skip connection
skip_x = x # [b,32,32,512]
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
# Layer 14
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_14', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_14')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 15
x = layers.Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_15', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_15')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 16
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_16', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_16')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 17
x = layers.Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_17', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_17')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 18
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_18', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_18')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 19
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_19', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_19')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 20
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_20', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_20')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
# Layer 21
skip_x = layers.Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_x)
skip_x = layers.BatchNormalization(name='norm_21')(skip_x)
skip_x = layers.LeakyReLU(alpha=0.1)(skip_x)
skip_x = SpaceToDepth(block_size=2)(skip_x)
# concat
# [b,16,16,1024], [b,16,16,256],=> [b,16,16,1280]
x = tf.concat([skip_x, x], axis=-1)
# Layer 22
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_22')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.Dropout(0.5)(x) # add dropout
# [b,16,16,5,7] => [b,16,16,35]
x = layers.Conv2D(5*7, (1,1), strides=(1,1), padding='same', name='conv_23')(x)
output = layers.Reshape((GRIDSZ,GRIDSZ,5,7))(x)
model = keras.models.Model(input_image, output)
x = tf.random.normal((4,512,512,3))
out = model(x)
print('out:', out.shape)
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 512, 512, 3 0 []
)]
conv_1 (Conv2D) (None, 512, 512, 32 864 ['input_1[0][0]']
)
norm_1 (BatchNormalization) (None, 512, 512, 32 128 ['conv_1[0][0]']
)
leaky_re_lu (LeakyReLU) (None, 512, 512, 32 0 ['norm_1[0][0]']
)
max_pooling2d (MaxPooling2D) (None, 256, 256, 32 0 ['leaky_re_lu[0][0]']
)
conv_2 (Conv2D) (None, 256, 256, 64 18432 ['max_pooling2d[0][0]']
)
norm_2 (BatchNormalization) (None, 256, 256, 64 256 ['conv_2[0][0]']
)
leaky_re_lu_1 (LeakyReLU) (None, 256, 256, 64 0 ['norm_2[0][0]']
)
max_pooling2d_1 (MaxPooling2D) (None, 128, 128, 64 0 ['leaky_re_lu_1[0][0]']
)
conv_3 (Conv2D) (None, 128, 128, 12 73728 ['max_pooling2d_1[0][0]']
8)
norm_3 (BatchNormalization) (None, 128, 128, 12 512 ['conv_3[0][0]']
8)
leaky_re_lu_2 (LeakyReLU) (None, 128, 128, 12 0 ['norm_3[0][0]']
8)
conv_4 (Conv2D) (None, 128, 128, 64 8192 ['leaky_re_lu_2[0][0]']
)
norm_4 (BatchNormalization) (None, 128, 128, 64 256 ['conv_4[0][0]']
)
leaky_re_lu_3 (LeakyReLU) (None, 128, 128, 64 0 ['norm_4[0][0]']
)
conv_5 (Conv2D) (None, 128, 128, 12 73728 ['leaky_re_lu_3[0][0]']
8)
norm_5 (BatchNormalization) (None, 128, 128, 12 512 ['conv_5[0][0]']
8)
leaky_re_lu_4 (LeakyReLU) (None, 128, 128, 12 0 ['norm_5[0][0]']
8)
max_pooling2d_2 (MaxPooling2D) (None, 64, 64, 128) 0 ['leaky_re_lu_4[0][0]']
conv_6 (Conv2D) (None, 64, 64, 256) 294912 ['max_pooling2d_2[0][0]']
norm_6 (BatchNormalization) (None, 64, 64, 256) 1024 ['conv_6[0][0]']
leaky_re_lu_5 (LeakyReLU) (None, 64, 64, 256) 0 ['norm_6[0][0]']
conv_7 (Conv2D) (None, 64, 64, 128) 32768 ['leaky_re_lu_5[0][0]']
norm_7 (BatchNormalization) (None, 64, 64, 128) 512 ['conv_7[0][0]']
leaky_re_lu_6 (LeakyReLU) (None, 64, 64, 128) 0 ['norm_7[0][0]']
conv_8 (Conv2D) (None, 64, 64, 256) 294912 ['leaky_re_lu_6[0][0]']
norm_8 (BatchNormalization) (None, 64, 64, 256) 1024 ['conv_8[0][0]']
leaky_re_lu_7 (LeakyReLU) (None, 64, 64, 256) 0 ['norm_8[0][0]']
max_pooling2d_3 (MaxPooling2D) (None, 32, 32, 256) 0 ['leaky_re_lu_7[0][0]']
conv_9 (Conv2D) (None, 32, 32, 512) 1179648 ['max_pooling2d_3[0][0]']
norm_9 (BatchNormalization) (None, 32, 32, 512) 2048 ['conv_9[0][0]']
leaky_re_lu_8 (LeakyReLU) (None, 32, 32, 512) 0 ['norm_9[0][0]']
conv_10 (Conv2D) (None, 32, 32, 256) 131072 ['leaky_re_lu_8[0][0]']
norm_10 (BatchNormalization) (None, 32, 32, 256) 1024 ['conv_10[0][0]']
leaky_re_lu_9 (LeakyReLU) (None, 32, 32, 256) 0 ['norm_10[0][0]']
conv_11 (Conv2D) (None, 32, 32, 512) 1179648 ['leaky_re_lu_9[0][0]']
norm_11 (BatchNormalization) (None, 32, 32, 512) 2048 ['conv_11[0][0]']
leaky_re_lu_10 (LeakyReLU) (None, 32, 32, 512) 0 ['norm_11[0][0]']
conv_12 (Conv2D) (None, 32, 32, 256) 131072 ['leaky_re_lu_10[0][0]']
norm_12 (BatchNormalization) (None, 32, 32, 256) 1024 ['conv_12[0][0]']
leaky_re_lu_11 (LeakyReLU) (None, 32, 32, 256) 0 ['norm_12[0][0]']
conv_13 (Conv2D) (None, 32, 32, 512) 1179648 ['leaky_re_lu_11[0][0]']
norm_13 (BatchNormalization) (None, 32, 32, 512) 2048 ['conv_13[0][0]']
leaky_re_lu_12 (LeakyReLU) (None, 32, 32, 512) 0 ['norm_13[0][0]']
max_pooling2d_4 (MaxPooling2D) (None, 16, 16, 512) 0 ['leaky_re_lu_12[0][0]']
conv_14 (Conv2D) (None, 16, 16, 1024 4718592 ['max_pooling2d_4[0][0]']
)
norm_14 (BatchNormalization) (None, 16, 16, 1024 4096 ['conv_14[0][0]']
)
leaky_re_lu_13 (LeakyReLU) (None, 16, 16, 1024 0 ['norm_14[0][0]']
)
conv_15 (Conv2D) (None, 16, 16, 512) 524288 ['leaky_re_lu_13[0][0]']
norm_15 (BatchNormalization) (None, 16, 16, 512) 2048 ['conv_15[0][0]']
leaky_re_lu_14 (LeakyReLU) (None, 16, 16, 512) 0 ['norm_15[0][0]']
conv_16 (Conv2D) (None, 16, 16, 1024 4718592 ['leaky_re_lu_14[0][0]']
)
norm_16 (BatchNormalization) (None, 16, 16, 1024 4096 ['conv_16[0][0]']
)
leaky_re_lu_15 (LeakyReLU) (None, 16, 16, 1024 0 ['norm_16[0][0]']
)
conv_17 (Conv2D) (None, 16, 16, 512) 524288 ['leaky_re_lu_15[0][0]']
norm_17 (BatchNormalization) (None, 16, 16, 512) 2048 ['conv_17[0][0]']
leaky_re_lu_16 (LeakyReLU) (None, 16, 16, 512) 0 ['norm_17[0][0]']
conv_18 (Conv2D) (None, 16, 16, 1024 4718592 ['leaky_re_lu_16[0][0]']
)
norm_18 (BatchNormalization) (None, 16, 16, 1024 4096 ['conv_18[0][0]']
)
leaky_re_lu_17 (LeakyReLU) (None, 16, 16, 1024 0 ['norm_18[0][0]']
)
conv_19 (Conv2D) (None, 16, 16, 1024 9437184 ['leaky_re_lu_17[0][0]']
)
norm_19 (BatchNormalization) (None, 16, 16, 1024 4096 ['conv_19[0][0]']
)
conv_21 (Conv2D) (None, 32, 32, 64) 32768 ['leaky_re_lu_12[0][0]']
leaky_re_lu_18 (LeakyReLU) (None, 16, 16, 1024 0 ['norm_19[0][0]']
)
norm_21 (BatchNormalization) (None, 32, 32, 64) 256 ['conv_21[0][0]']
conv_20 (Conv2D) (None, 16, 16, 1024 9437184 ['leaky_re_lu_18[0][0]']
)
leaky_re_lu_20 (LeakyReLU) (None, 32, 32, 64) 0 ['norm_21[0][0]']
norm_20 (BatchNormalization) (None, 16, 16, 1024 4096 ['conv_20[0][0]']
)
space_to_depth (SpaceToDepth) (None, 16, 16, 256) 0 ['leaky_re_lu_20[0][0]']
leaky_re_lu_19 (LeakyReLU) (None, 16, 16, 1024 0 ['norm_20[0][0]']
)
tf.concat (TFOpLambda) (None, 16, 16, 1280 0 ['space_to_depth[0][0]',
) 'leaky_re_lu_19[0][0]']
conv_22 (Conv2D) (None, 16, 16, 1024 11796480 ['tf.concat[0][0]']
)
norm_22 (BatchNormalization) (None, 16, 16, 1024 4096 ['conv_22[0][0]']
)
leaky_re_lu_21 (LeakyReLU) (None, 16, 16, 1024 0 ['norm_22[0][0]']
)
dropout (Dropout) (None, 16, 16, 1024 0 ['leaky_re_lu_21[0][0]']
)
conv_23 (Conv2D) (None, 16, 16, 35) 35875 ['dropout[0][0]']
reshape (Reshape) (None, 16, 16, 5, 7 0 ['conv_23[0][0]']
)
==================================================================================================
Total params: 50,583,811
Trainable params: 50,563,139
Non-trainable params: 20,672
__________________________________________________________________________________________________
网络初始化
class WeightReader:
def __init__(self, weight_file):
self.offset = 4
self.all_weights = np.fromfile(weight_file, dtype='float32')
def read_bytes(self, size):
self.offset = self.offset + size
return self.all_weights[self.offset - size:self.offset]
def reset(self):
self.offset = 4
weight_reader = WeightReader('yolo.weights')
weight_reader.reset()
nb_conv = 23
for i in range(1, nb_conv + 1):
conv_layer = model.get_layer('conv_' + str(i))
conv_layer.trainable = True
if i < nb_conv:
norm_layer = model.get_layer('norm_' + str(i))
norm_layer.trainable = True
size = np.prod(norm_layer.get_weights()[0].shape)
beta = weight_reader.read_bytes(size)
gamma = weight_reader.read_bytes(size)
mean = weight_reader.read_bytes(size)
var = weight_reader.read_bytes(size)
weights = norm_layer.set_weights([gamma, beta, mean, var])
if len(conv_layer.get_weights()) > 1:
bias = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2, 3, 1, 0])
conv_layer.set_weights([kernel, bias])
else:
kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2, 3, 1, 0])
conv_layer.set_weights([kernel])
layer = model.layers[-2]
layer.trainable = True
weights = layer.get_weights()
new_kernel = np.random.normal(size=weights[0].shape) / (GRIDSZ * GRIDSZ)
new_bias = np.random.normal(size=weights[1].shape) / (GRIDSZ * GRIDSZ)
layer.set_weights([new_kernel, new_bias])
# model.load_weights('weights\\ckpt.h5')
网络输出可视化
img, detector_mask, matching_gt_boxes, matching_classes_oh, gt_boxes_grid = next(train_gen)
img, detector_mask, matching_gt_boxes, matching_classes_oh, gt_boxes_grid = img[0], detector_mask[0], matching_gt_boxes[0], matching_classes_oh[0], gt_boxes_grid[0]
# [b,512,512,3]=>[b,16,16,5,7]=>[16,16,5,x-y-w-h-conf-l1-l2]
y_pred = model(tf.expand_dims(img, axis=0))[0][...,4]
# [16,16,5] => [16,16]
y_pred = tf.reduce_sum(y_pred,axis=2)
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
ax1.imshow(img)
ax2.matshow(tf.reduce_sum(detector_mask,axis=2)[...,0])
ax3.matshow(y_pred)
plt.show()