YOLOv2训练与测试(二)

最新推荐文章于 2024-11-23 18:02:04 发布

原创最新推荐文章于 2024-11-23 18:02:04 发布 · 324 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#tensorflow #python #深度学习

python 专栏收录该内容

9 篇文章

订阅专栏

本文介绍了如何使用SpaceToDepth层构建深度神经网络模型，并详细步骤地展示了YOLO网络的初始化过程，包括权重读取和调整。还展示了网络输出的可视化方法。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

导入模块

import os,glob 
import numpy as np 
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras import layers
import  tensorflow.keras.backend as K 
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from matplotlib import patches 
import imgaug as ia
from imgaug import augmenters as iaa

创建网络模型

class SpaceToDepth(layers.Layer):
    def __init__(self, block_size, **kwargs):
        self.block_size = block_size
        super(SpaceToDepth, self).__init__(**kwargs)

    def call(self, inputs):
        x = inputs
        batch, height, width, depth = K.int_shape(x)
        batch = -1
        reduced_height = height // self.block_size
        reduced_width = width // self.block_size
        y = K.reshape(x, (batch, reduced_height, self.block_size,
                             reduced_width, self.block_size, depth))
        z = K.permute_dimensions(y, (0, 1, 3, 2, 4, 5))
        t = K.reshape(z, (batch, reduced_height, reduced_width, depth * self.block_size **2))
        return t

    def compute_output_shape(self, input_shape):
        shape =  (input_shape[0], input_shape[1] // self.block_size, input_shape[2] // self.block_size,
                  input_shape[3] * self.block_size **2)
        return tf.TensorShape(shape)

# 3.1
input_image = layers.Input((IMGSZ,IMGSZ, 3), dtype='float32')

# unit1
x = layers.Conv2D(32, (3,3), strides=(1,1),padding='same', name='conv_1', use_bias=False)(input_image)
x = layers.BatchNormalization(name='norm_1')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)

# unit2
x = layers.Conv2D(64, (3,3), strides=(1,1), padding='same', name='conv_2',use_bias=False)(x)
x = layers.BatchNormalization(name='norm_2')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2,2))(x)


# Layer 3
x = layers.Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_3', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_3')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 4
x = layers.Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_4', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_4')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 5
x = layers.Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_5', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_5')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

# Layer 6
x = layers.Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_6')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 7
x = layers.Conv2D(128, (1,1), strides=(1,1), padding='same', name='conv_7', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_7')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 8
x = layers.Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_8', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_8')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

# Layer 9
x = layers.Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_9', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_9')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 10
x = layers.Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_10', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_10')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 11
x = layers.Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_11', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_11')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 12
x = layers.Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_12', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_12')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 13
x = layers.Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_13', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_13')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# for skip connection
skip_x = x  # [b,32,32,512]
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

# Layer 14
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_14', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_14')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 15
x = layers.Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_15', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_15')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 16
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_16', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_16')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 17
x = layers.Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_17', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_17')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 18
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_18', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_18')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 19
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_19', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_19')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 20
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_20', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_20')(x)
x = layers.LeakyReLU(alpha=0.1)(x)

# Layer 21
skip_x = layers.Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_x)
skip_x = layers.BatchNormalization(name='norm_21')(skip_x)
skip_x = layers.LeakyReLU(alpha=0.1)(skip_x)
skip_x = SpaceToDepth(block_size=2)(skip_x)
 
# concat
# [b,16,16,1024], [b,16,16,256],=> [b,16,16,1280]
x = tf.concat([skip_x, x], axis=-1)

# Layer 22
x = layers.Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)
x = layers.BatchNormalization(name='norm_22')(x)
x = layers.LeakyReLU(alpha=0.1)(x)
x = layers.Dropout(0.5)(x) # add dropout
# [b,16,16,5,7] => [b,16,16,35]
x = layers.Conv2D(5*7, (1,1), strides=(1,1), padding='same', name='conv_23')(x)

output = layers.Reshape((GRIDSZ,GRIDSZ,5,7))(x)
model = keras.models.Model(input_image, output)
x = tf.random.normal((4,512,512,3))
out = model(x)
print('out:', out.shape)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to
==================================================================================================
 input_1 (InputLayer)           [(None, 512, 512, 3  0           []
                                )]

 conv_1 (Conv2D)                (None, 512, 512, 32  864         ['input_1[0][0]']
                                )

 norm_1 (BatchNormalization)    (None, 512, 512, 32  128         ['conv_1[0][0]']
                                )

 leaky_re_lu (LeakyReLU)        (None, 512, 512, 32  0           ['norm_1[0][0]']
                                )

 max_pooling2d (MaxPooling2D)   (None, 256, 256, 32  0           ['leaky_re_lu[0][0]']
                                )

 conv_2 (Conv2D)                (None, 256, 256, 64  18432       ['max_pooling2d[0][0]']
                                )

 norm_2 (BatchNormalization)    (None, 256, 256, 64  256         ['conv_2[0][0]']
                                )

 leaky_re_lu_1 (LeakyReLU)      (None, 256, 256, 64  0           ['norm_2[0][0]']
                                )

 max_pooling2d_1 (MaxPooling2D)  (None, 128, 128, 64  0          ['leaky_re_lu_1[0][0]']
                                )

 conv_3 (Conv2D)                (None, 128, 128, 12  73728       ['max_pooling2d_1[0][0]']
                                8)

 norm_3 (BatchNormalization)    (None, 128, 128, 12  512         ['conv_3[0][0]']
                                8)

 leaky_re_lu_2 (LeakyReLU)      (None, 128, 128, 12  0           ['norm_3[0][0]']
                                8)

 conv_4 (Conv2D)                (None, 128, 128, 64  8192        ['leaky_re_lu_2[0][0]']
                                )

 norm_4 (BatchNormalization)    (None, 128, 128, 64  256         ['conv_4[0][0]']
                                )

 leaky_re_lu_3 (LeakyReLU)      (None, 128, 128, 64  0           ['norm_4[0][0]']
                                )

 conv_5 (Conv2D)                (None, 128, 128, 12  73728       ['leaky_re_lu_3[0][0]']
                                8)

 norm_5 (BatchNormalization)    (None, 128, 128, 12  512         ['conv_5[0][0]']
                                8)

 leaky_re_lu_4 (LeakyReLU)      (None, 128, 128, 12  0           ['norm_5[0][0]']
                                8)

 max_pooling2d_2 (MaxPooling2D)  (None, 64, 64, 128)  0          ['leaky_re_lu_4[0][0]']

 conv_6 (Conv2D)                (None, 64, 64, 256)  294912      ['max_pooling2d_2[0][0]']

 norm_6 (BatchNormalization)    (None, 64, 64, 256)  1024        ['conv_6[0][0]']

 leaky_re_lu_5 (LeakyReLU)      (None, 64, 64, 256)  0           ['norm_6[0][0]']

 conv_7 (Conv2D)                (None, 64, 64, 128)  32768       ['leaky_re_lu_5[0][0]']

 norm_7 (BatchNormalization)    (None, 64, 64, 128)  512         ['conv_7[0][0]']

 leaky_re_lu_6 (LeakyReLU)      (None, 64, 64, 128)  0           ['norm_7[0][0]']

 conv_8 (Conv2D)                (None, 64, 64, 256)  294912      ['leaky_re_lu_6[0][0]']

 norm_8 (BatchNormalization)    (None, 64, 64, 256)  1024        ['conv_8[0][0]']

 leaky_re_lu_7 (LeakyReLU)      (None, 64, 64, 256)  0           ['norm_8[0][0]']

 max_pooling2d_3 (MaxPooling2D)  (None, 32, 32, 256)  0          ['leaky_re_lu_7[0][0]']

 conv_9 (Conv2D)                (None, 32, 32, 512)  1179648     ['max_pooling2d_3[0][0]']

 norm_9 (BatchNormalization)    (None, 32, 32, 512)  2048        ['conv_9[0][0]']

 leaky_re_lu_8 (LeakyReLU)      (None, 32, 32, 512)  0           ['norm_9[0][0]']

 conv_10 (Conv2D)               (None, 32, 32, 256)  131072      ['leaky_re_lu_8[0][0]']

 norm_10 (BatchNormalization)   (None, 32, 32, 256)  1024        ['conv_10[0][0]']

 leaky_re_lu_9 (LeakyReLU)      (None, 32, 32, 256)  0           ['norm_10[0][0]']

 conv_11 (Conv2D)               (None, 32, 32, 512)  1179648     ['leaky_re_lu_9[0][0]']

 norm_11 (BatchNormalization)   (None, 32, 32, 512)  2048        ['conv_11[0][0]']

 leaky_re_lu_10 (LeakyReLU)     (None, 32, 32, 512)  0           ['norm_11[0][0]']

 conv_12 (Conv2D)               (None, 32, 32, 256)  131072      ['leaky_re_lu_10[0][0]']

 norm_12 (BatchNormalization)   (None, 32, 32, 256)  1024        ['conv_12[0][0]']

 leaky_re_lu_11 (LeakyReLU)     (None, 32, 32, 256)  0           ['norm_12[0][0]']

 conv_13 (Conv2D)               (None, 32, 32, 512)  1179648     ['leaky_re_lu_11[0][0]']

 norm_13 (BatchNormalization)   (None, 32, 32, 512)  2048        ['conv_13[0][0]']

 leaky_re_lu_12 (LeakyReLU)     (None, 32, 32, 512)  0           ['norm_13[0][0]']

 max_pooling2d_4 (MaxPooling2D)  (None, 16, 16, 512)  0          ['leaky_re_lu_12[0][0]']

 conv_14 (Conv2D)               (None, 16, 16, 1024  4718592     ['max_pooling2d_4[0][0]']
                                )

 norm_14 (BatchNormalization)   (None, 16, 16, 1024  4096        ['conv_14[0][0]']
                                )

 leaky_re_lu_13 (LeakyReLU)     (None, 16, 16, 1024  0           ['norm_14[0][0]']
                                )

 conv_15 (Conv2D)               (None, 16, 16, 512)  524288      ['leaky_re_lu_13[0][0]']

 norm_15 (BatchNormalization)   (None, 16, 16, 512)  2048        ['conv_15[0][0]']

 leaky_re_lu_14 (LeakyReLU)     (None, 16, 16, 512)  0           ['norm_15[0][0]']

 conv_16 (Conv2D)               (None, 16, 16, 1024  4718592     ['leaky_re_lu_14[0][0]']
                                )

 norm_16 (BatchNormalization)   (None, 16, 16, 1024  4096        ['conv_16[0][0]']
                                )

 leaky_re_lu_15 (LeakyReLU)     (None, 16, 16, 1024  0           ['norm_16[0][0]']
                                )

 conv_17 (Conv2D)               (None, 16, 16, 512)  524288      ['leaky_re_lu_15[0][0]']

 norm_17 (BatchNormalization)   (None, 16, 16, 512)  2048        ['conv_17[0][0]']

 leaky_re_lu_16 (LeakyReLU)     (None, 16, 16, 512)  0           ['norm_17[0][0]']

 conv_18 (Conv2D)               (None, 16, 16, 1024  4718592     ['leaky_re_lu_16[0][0]']
                                )

 norm_18 (BatchNormalization)   (None, 16, 16, 1024  4096        ['conv_18[0][0]']
                                )

 leaky_re_lu_17 (LeakyReLU)     (None, 16, 16, 1024  0           ['norm_18[0][0]']
                                )

 conv_19 (Conv2D)               (None, 16, 16, 1024  9437184     ['leaky_re_lu_17[0][0]']
                                )

 norm_19 (BatchNormalization)   (None, 16, 16, 1024  4096        ['conv_19[0][0]']
                                )

 conv_21 (Conv2D)               (None, 32, 32, 64)   32768       ['leaky_re_lu_12[0][0]']

 leaky_re_lu_18 (LeakyReLU)     (None, 16, 16, 1024  0           ['norm_19[0][0]']
                                )

 norm_21 (BatchNormalization)   (None, 32, 32, 64)   256         ['conv_21[0][0]']

 conv_20 (Conv2D)               (None, 16, 16, 1024  9437184     ['leaky_re_lu_18[0][0]']
                                )

 leaky_re_lu_20 (LeakyReLU)     (None, 32, 32, 64)   0           ['norm_21[0][0]']

 norm_20 (BatchNormalization)   (None, 16, 16, 1024  4096        ['conv_20[0][0]']
                                )

 space_to_depth (SpaceToDepth)  (None, 16, 16, 256)  0           ['leaky_re_lu_20[0][0]']

 leaky_re_lu_19 (LeakyReLU)     (None, 16, 16, 1024  0           ['norm_20[0][0]']
                                )

 tf.concat (TFOpLambda)         (None, 16, 16, 1280  0           ['space_to_depth[0][0]',
                                )                                 'leaky_re_lu_19[0][0]']

 conv_22 (Conv2D)               (None, 16, 16, 1024  11796480    ['tf.concat[0][0]']
                                )

 norm_22 (BatchNormalization)   (None, 16, 16, 1024  4096        ['conv_22[0][0]']
                                )

 leaky_re_lu_21 (LeakyReLU)     (None, 16, 16, 1024  0           ['norm_22[0][0]']
                                )

 dropout (Dropout)              (None, 16, 16, 1024  0           ['leaky_re_lu_21[0][0]']
                                )

 conv_23 (Conv2D)               (None, 16, 16, 35)   35875       ['dropout[0][0]']

 reshape (Reshape)              (None, 16, 16, 5, 7  0           ['conv_23[0][0]']
                                )

==================================================================================================
Total params: 50,583,811
Trainable params: 50,563,139
Non-trainable params: 20,672
__________________________________________________________________________________________________

网络初始化

class WeightReader:
    def __init__(self, weight_file):
        self.offset = 4
        self.all_weights = np.fromfile(weight_file, dtype='float32')

    def read_bytes(self, size):
        self.offset = self.offset + size
        return self.all_weights[self.offset - size:self.offset]

    def reset(self):
        self.offset = 4
        
weight_reader = WeightReader('yolo.weights')
weight_reader.reset()
nb_conv = 23
for i in range(1, nb_conv + 1):
    conv_layer = model.get_layer('conv_' + str(i))
    conv_layer.trainable = True
    if i < nb_conv:
        norm_layer = model.get_layer('norm_' + str(i))
        norm_layer.trainable = True
        size = np.prod(norm_layer.get_weights()[0].shape)
        beta = weight_reader.read_bytes(size)
        gamma = weight_reader.read_bytes(size)
        mean = weight_reader.read_bytes(size)
        var = weight_reader.read_bytes(size)
        weights = norm_layer.set_weights([gamma, beta, mean, var])
    if len(conv_layer.get_weights()) > 1:
        bias = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
        kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
        kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
        kernel = kernel.transpose([2, 3, 1, 0])
        conv_layer.set_weights([kernel, bias])
    else:
        kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
        kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
        kernel = kernel.transpose([2, 3, 1, 0])
        conv_layer.set_weights([kernel])

layer = model.layers[-2]  
layer.trainable = True
weights = layer.get_weights()
new_kernel = np.random.normal(size=weights[0].shape) / (GRIDSZ * GRIDSZ)
new_bias = np.random.normal(size=weights[1].shape) / (GRIDSZ * GRIDSZ)
layer.set_weights([new_kernel, new_bias])
# model.load_weights('weights\\ckpt.h5')

网络输出可视化

img, detector_mask, matching_gt_boxes, matching_classes_oh, gt_boxes_grid = next(train_gen)
img, detector_mask, matching_gt_boxes, matching_classes_oh, gt_boxes_grid = img[0], detector_mask[0], matching_gt_boxes[0], matching_classes_oh[0], gt_boxes_grid[0]
# [b,512,512,3]=>[b,16,16,5,7]=>[16,16,5,x-y-w-h-conf-l1-l2]
y_pred = model(tf.expand_dims(img, axis=0))[0][...,4]
# [16,16,5] => [16,16]
y_pred = tf.reduce_sum(y_pred,axis=2)
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
ax1.imshow(img)
ax2.matshow(tf.reduce_sum(detector_mask,axis=2)[...,0])
ax3.matshow(y_pred)
plt.show()

在这里插入图片描述