目标检测之yolo源码分析

三、配置文件详解(config.py)

import os


# 数据集路径,和模型检查点路径
#
# path and dataset parameter
#

DATA_PATH = 'data'        # 所有数据所在的根目录

PASCAL_PATH = os.path.join(DATA_PATH, 'pascal_voc')  # VOC2012数据集所在的目录

CACHE_PATH = os.path.join(PASCAL_PATH, 'cache')      # 保存生成的数据集标签缓冲文件所在文件夹

OUTPUT_DIR = os.path.join(PASCAL_PATH, 'output')     # 保存生成的网络模型和日志文件所在的文件夹

WEIGHTS_DIR = os.path.join(PASCAL_PATH, 'weights')   # 检查点文件所在的目录

WEIGHTS_FILE = None
# WEIGHTS_FILE = os.path.join(DATA_PATH, 'weights', 'YOLO_small.ckpt')

# voc2012数据集类别名
CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
           'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
           'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
           'train', 'tvmonitor']

# 使用水平镜像,扩大一倍数据集?
FLIPPED = True


# 网络模型参数
#
# model parameter
#

# 图片大小
IMAGE_SIZE = 448

# 单元格大小,一共有7x7个单元格
CELL_SIZE = 7

# 每个单元格边界框的个数B = 2
BOXES_PER_CELL = 2

# 泄露修正线性激活函数的系数,就是lReLU的系数
ALPHA = 0.1

# 控制台输出信息
DISP_CONSOLE = False

# 损失函数的权重设置
OBJECT_SCALE = 1.0    # 有目标时,置信度权重
NOOBJECT_SCALE = 1.0  # 没有目标时,置信度权重
CLASS_SCALE = 2.0     # 类别权重
COORD_SCALE = 5.0     # 边界框权重

# 训练参数设置
#
# solver parameter
#

GPU = ''
# 学习率
LEARNING_RATE = 0.0001
# 退化学习率衰减步数
DECAY_STEPS = 30000
# 衰减率
DECAY_RATE = 0.1

STAIRCASE = True

#批量大小
BATCH_SIZE = 45
# 最大迭代次数
MAX_ITER = 15000

# 日志文件保存间隔步
SUMMARY_ITER = 10
# 模型保存间隔步
SAVE_ITER = 1000

# 测试时的相关参数
#
# test parameter
#
# 格子有目标的置信度阈值
THRESHOLD = 0.2
# 非极大值抑制 IOU阈值
IOU_THRESHOLD = 0.5

四、yolo文件夹详解

yolo网络的建立是通过yolo文件夹中的yolo_net.py文件的代码实现的,yolo_net.py文件定义了YOLONet类,该类包含了网络初始化(__init__()),建立网络(build_networks)和loss函数(loss_layer())等方法

import numpy as np
import tensorflow as tf
import yolo.config as cfg

slim = tf.contrib.slim


class YOLONet(object):

1. 网络参数初始化

网络的所有初始化参数包含于__init__()方法中

    # 网络参数初始化
    def __init__(self, is_training=True):
        '''
        构造函数利用cfg文件对网络参数进行初始化,同时定义网络的输入和输出size等信息,
        其中offset的作用应该是一个定长的偏移,boundery1和boundery2 作用是在输出中确定每种信息的长度(如类别,置信度等)
        其中 boundery1指的是对于所有cell的类别的预测的张量维度,所以是self.cell_size * self.cell_size * self.num_class
        boundery2指的是在类别之后每个cell所对应的bounding boxes的数量的总合,所以是
        '''
        # voc 2012数据集类别名
        self.classes = cfg.CLASSES
        # 类别个数C 20
        self.num_class = len(self.classes)
        
        # 网络输入图像大小448, 448 x 448
        self.image_size = cfg.IMAGE_SIZE
        # 单元格大小S=7,将图像分为SxS的格子
        self.cell_size = cfg.CELL_SIZE
        # 每个网格边界框的个数B=2
        self.boxes_per_cell = cfg.BOXES_PER_CELL
        
        # 网络输出的大小 S*S*(B*5 + C) = 1470
        self.output_size = (self.cell_size * self.cell_size) *
            (self.num_class + self.boxes_per_cell * 5)
            
        # 图片的缩放比例 64
        self.scale = 1.0 * self.image_size / self.cell_size
        
        # 将网络输出分离为类别和置信度以及边界框的大小,输出维度为7*7*20 + 7*7*2 + 7*7*2*4=1470
        # 7*7*20
        self.boundary1 = self.cell_size * self.cell_size * self.num_class
        # 7*7*20 + 7*7*2
        self.boundary2 = self.boundary1 +
            self.cell_size * self.cell_size * self.boxes_per_cell
        
        # 代价函数 权重
        self.object_scale = cfg.OBJECT_SCALE      # 1
        self.noobject_scale = cfg.NOOBJECT_SCALE  # 1
        self.class_scale = cfg.CLASS_SCALE        # 2.0
        self.coord_scale = cfg.COORD_SCALE        # 2.0

        # 学习率0.0001
        self.learning_rate = cfg.LEARNING_RATE
        # 批大小 45
        self.batch_size = cfg.BATCH_SIZE
        #泄露修正线性激活函数 系数0.1
        self.alpha = cfg.ALPHA

        # 偏置 形状[7,7,2]
        self.offset = np.transpose(np.reshape(np.array(
            [np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
            (self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))

        # 输入图片占位符 [None,image_size,image_size,3]
        self.images = tf.placeholder(
            tf.float32, [None, self.image_size, self.image_size, 3],
            name='images')
        # 构建网络,获取YOLO网络的输出(不经过激活函数的输出) 形状[None,1470]
        self.logits = self.build_network(
            self.images, num_outputs=self.output_size, alpha=self.alpha,
            is_training=is_training)

        if is_training:
            # 设置标签占位符 [None,S,S,5+C] 即[None,7,7,25]
            self.labels = tf.placeholder(
                tf.float32,
                [None, self.cell_size, self.cell_size, 5 + self.num_class])
            # 设置损失函数
            self.loss_layer(self.logits, self.labels)
            # 加入权重正则化之后的损失
            self.total_loss = tf.losses.get_total_loss()
            # 将损失以标量形式显示,该变量命名为total_loss
            tf.summary.scalar('total_loss', self.total_loss)

2. 构建网络

网络的建立是通过build_network()函数实现的,网络由卷积层,池化层和全连接层组成,网络的输入维度是[None,448,448,3],输出维度为[None,1470]

    def build_network(self,
                      images,
                      num_outputs,
                      alpha,
                      keep_prob=0.5,
                      is_training=True,
                      scope='yolo'):
        '''
        构建YOLO网络
        args:
            images:输入图片占位符[None,image_size,image_size,3]  这里是[None,448,448,3]
            num_outputs: 标量,网络输出节点数 1470
            alpha:泄露修正线性激活函数 系数0.1
            keep_prob: 弃权 保留率
            is_training:训练?
            scope:命名空间名
        return:
            返回网络最后一层,激活函数处理之前的值 形状[None,1470]
        '''
        # 定义变量命名空间
        with tf.variable_scope(scope):
            # 定义共享参数,使用L2正则化
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                activation_fn=leaky_relu(alpha),
                weights_regularizer=slim.l2_regularizer(0.0005),
                weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
            ):
                # pad_1 填充 454x454x3
                net = tf.pad(
                    images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),
                    name='pad_1')
                # 卷积层 conv_2 s=2     (n-f+1)/s向上取整   224x224x64
                net = slim.conv2d(
                    net, 64, 7, 2, padding='VALID', scope='conv_2')
                # 池化层 pool_3 112x112x64
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
                
                # 卷积层 conv_4 3x3x192 s=1 n/s向上取整  112x112x192
                net = slim.conv2d(net, 192, 3, scope='conv_4')
                # 池化层 pool_5 56x56x192
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
                
                # 卷积层conv_6 1x1x128 s=1 n/s向上取整  56x56x128
                net = slim.conv2d(net, 128, 1, scope='conv_6')
                # 卷积层conv_7 3x3x256 s=1 n/s向上取整  56x56x256
                net = slim.conv2d(net, 256, 3, scope='conv_7')
                # 卷积层conv_8 1x1x256 s=1 n/s向上取整  56x56x256
                net = slim.conv2d(net, 256, 1, scope='conv_8')
                # 卷积层conv_9 3x3x512 s=1 n/s向上取整  56x56x512
                net = slim.conv2d(net, 512, 3, scope='conv_9')
                # 池化层 pool_10 28x28x512
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
                
                # 卷积层conv_11 1x1x256 s=1 n/s向上取整  28x28x256
                net = slim.conv2d(net, 256, 1, scope='conv_11')
                # 卷积层conv_12 3x3x512 s=1 n/s向上取整  28x28x512
                net = slim.conv2d(net, 512, 3, scope='conv_12')
                
                # 卷积层conv_13 1x1x256 s=1 n/s向上取整  28x28x256
                net = slim.conv2d(net, 256, 1, scope='conv_13')
                # 卷积层conv_14 3x3x512 s=1 n/s向上取整  28x28x512
                net = slim.conv2d(net, 512, 3, scope='conv_14')
                
                
                net = slim.conv2d(net, 256, 1, scope='conv_15')
                net = slim.conv2d(net, 512, 3, scope='conv_16')
                
                net = slim.conv2d(net, 256, 1, scope='conv_17')
                net = slim.conv2d(net, 512, 3, scope='conv_18')
                
                # 卷积层conv_19 3x3x512 s=1 n/s向上取整  28x28x512
                net = slim.conv2d(net, 512, 1, scope='conv_19')
                # 卷积层conv_20 3x3x1024 s=1 n/s向上取整  28x28x1024
                net = slim.conv2d(net, 1024, 3, scope='conv_20')
                # 池化层 pool_21 14x14x1024
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
                
                # 卷积层conv_22 1x1x512 s=1 n/s向上取整  14x14x512
                net = slim.conv2d(net, 512, 1, scope='conv_22')
                # 卷积层conv_23 3x3x1024 s=1 n/s向上取整  14x14x1024
                net = slim.conv2d(net, 1024, 3, scope='conv_23')
                
                net = slim.conv2d(net, 512, 1, scope='conv_24')
                net = slim.conv2d(net, 1024, 3, scope='conv_25')
                net = slim.conv2d(net, 1024, 3, scope='conv_26')
                
                # pad_27 填充 16x16x2014
                net = tf.pad(
                    net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),
                    name='pad_27')
                    
                # 卷积层conv_28 3x3x1024 s=2 (n-f+1)/s向上取整 7x7x1024
                net = slim.conv2d(
                    net, 1024, 3, 2, padding='VALID', scope='conv_28')
                
                # 卷积层 conv_29 3x3x1024 s=1 n/s向上取整  7x7x1024
                net = slim.conv2d(net, 1024, 3, scope='conv_29')
                net = slim.conv2d(net, 1024, 3, scope='conv_30')
                
                # trans_31 转置 [None,1024,7,7]
                net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
                # flat_32 展开 50716=1024*7*7
                net = slim.flatten(net, scope='flat_32')
                # 全连接层 fc_33 512
                net = slim.fully_connected(net, 512, scope='fc_33')
                # 全连接层 fc_34 4096
                net = slim.fully_connected(net, 4096, scope='fc_34')
                # 惩罚项 dropout_35 4096
                net = slim.dropout(
                    net, keep_prob=keep_prob, is_training=is_training,
                    scope='dropout_35')
                
                # 全连接层fc_36 num_outputs=1470
                net = slim.fully_connected(
                    net, num_outputs, activation_fn=None, scope='fc_36')
        return net

3、代价函数

原文地址:https://www.cnblogs.com/gezhuangzhuang/p/10498275.html