将caltech数组集做成VOC格式

一、先下载caltech数据集

二、格式转换代码将 ".seq" 转换为 ".jpg" 文件（ https://github.com/mitmul/caltech-pedestrian-dataset-converter.git）

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import glob
import cv2 as cv


def save_img(dname, fn, i, frame):
    cv.imwrite('{}/{}_{}_{}.png'.format(
        out_dir, os.path.basename(dname),
        os.path.basename(fn).split('.')[0], i), frame)

out_dir = 'data/images'
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
for dname in sorted(glob.glob('data/set*')):
    for fn in sorted(glob.glob('{}/*.seq'.format(dname))):
        cap = cv.VideoCapture(fn)
        i = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            save_img(dname, fn, i, frame)
            i += 1
        print(fn)

三、将".vbb" 文件转化为二进制文件文件（需要依赖code3.2.1）

maindir = '/home/user/Downloads/caltech_data_set/data-USA/';
subdir  = dir( maindir );
for i = 1 : length( subdir )
    if( isequal( subdir( i ).name, '.' )||...
        isequal( subdir( i ).name, '..')||...
        ~subdir( i ).isdir)              
        continue;
    end
%     subdirpath = fullfile( maindir, subdir( i ).name, '' );
%     dat = dir( subdirpath );          
% 
%     for j = 1 : length( dat )
%         datpath = fullfile( maindir, subdir( i ).name, dat( j ).name);
%         fid = fopen( datpath );
%       
%     end

    if((subdir(i).isdir && isequal(subdir(i).name(1:3), 'set')))
        tmp = fullfile(maindir, subdir(i).name);
        ssdir = dir(tmp);
        for j = 1 : length( ssdir )
            if(ssdir(j).isdir)
                if( isequal( ssdir( j ).name, '.' )||...
                    isequal( ssdir( j ).name, '..')||...
                    ~ssdir( j ).isdir)              
                    continue;
                end
                vName1 = fullfile(subdir(i).name, ssdir(j).name);
                fprintf(vName1);
                fnm = [subdir(i).name , ssdir(j).name];
                fprintf(fnm);
                vbb_to_txtt(vName1, fnm);
                fprintf(vName1)
            end
        end
%         vName1 = fullfile('set00', subdir(i).name);
%         fprintf(vName1);
    end
end

function vbb_to_txtt(vName, fnm)
    % vName = 'set01/V000'
    A = vbb( 'vbbLoad', [dbInfo '/annotations/' vName] );  
    path = '/home/user/Downloads/caltech_data_set/data/annotations';  
    %fnm = 'set01-V000.txt';
    c=fopen([path '-' fnm],'w');  
    for i = 1:A.nFrame  
        iframe = A.objLists(1,i);  
        iframe_data = iframe{1,1};  
        n1length = length(iframe_data);  
        for  j = 1:n1length  
            iframe_dataj = iframe_data(j);  
            if iframe_dataj.pos(1) ~= 0  %pos  posv  
                fprintf(c,'%d %f %f %f %f
', i, iframe_dataj.pos(1),iframe_dataj.pos(2),iframe_dataj.pos(3),iframe_dataj.pos(4));  
            end  
        end  
    end  
    fclose(c);  
end

四、将生成二进制文件中的标注框写成VOC格式的XML文件

#!/usr/bin/env python
# coding:utf-8

#from xml.etree.ElementTree import Element, SubElement, tostring
from lxml.etree import Element, SubElement, tostring
import pprint
from xml.dom.minidom import parseString
import os

def mkdir(path):
    import os

    path = path.strip()
    path = path.rstrip("\")

    isExists = os.path.exists(path)
    if not isExists:
        os.makedirs(path)
        print path + 'ok'
        return True
    else:
        print path + 'failed!'
        return False

def generate_xml(file_info, obj):
    node_root = Element('annotation')

    node_folder = SubElement(node_root, 'folder')
    node_folder.text = file_info[0]

    node_filename = SubElement(node_root, 'filename')
    node_filename.text = file_info[1]

    node_size = SubElement(node_root, 'size')
    node_width = SubElement(node_size, 'width')
    node_width.text = '640'

    node_height = SubElement(node_size, 'height')
    node_height.text = '480'

    node_depth = SubElement(node_size, 'depth')
    node_depth.text = '3'

    for obj_i in obj:
        print obj_i
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        #node_name.text = 'mouse'
        node_name.text = 'person'

        node_bndbox = SubElement(node_object, 'bndbox')
        node_xmin = SubElement(node_bndbox, 'xmin')
        #node_xmin.text = '99'
        node_xmin.text = obj_i['xmin']

        node_ymin = SubElement(node_bndbox, 'ymin')
        #node_ymin.text = '358'
        node_ymin.text = obj_i['ymin']

        node_xmax = SubElement(node_bndbox, 'xmax')
        #node_xmax.text = '135'
        node_xmax.text = obj_i['xmax']

        node_ymax = SubElement(node_bndbox, 'ymax')
        #node_ymax.text = '375'
        node_ymax.text = obj_i['ymax']

    xml = tostring(node_root, pretty_print=True)  #格式化显示，该换行的换行
    dom = parseString(xml)
    file_root = '/home/user/Downloads/caltech_data_set/data_test/'

    file_name = file_root + file_info[0];
    mkdir (file_name)
    fw = open(file_name+"/"+file_info[1].split('.')[0]+".xml", 'a+')

    fw.write(xml)
    print "xml _ ok"
    fw.close()

    #for debug
    #print xml

def printPath(level, path):
    global allFileNum
    ''''' 
    打印一个目录下的所有文件夹和文件 
    '''
    # 所有文件夹，第一个字段是次目录的级别
    dirList = []
    # 所有文件
    fileList = []
    # 返回一个列表，其中包含在目录条目的名称(google翻译)
    files = os.listdir(path)
    # 先添加目录级别
    dirList.append(str(level))
    for f in files:
        if(os.path.isdir(path + '/' + f)):
            # 排除隐藏文件夹。因为隐藏文件夹过多
            if(f[0] == '.'):
                pass
            else:
                # 添加非隐藏文件夹
                dirList.append(f)
        if(os.path.isfile(path + '/' + f)):
            # 添加文件
            fileList.append(f)
    # 当一个标志使用，文件夹列表第一个级别不打印
    i_dl = 0
    for dl in dirList:
        if(i_dl == 0):
            i_dl = i_dl + 1
        else:
            # 打印至控制台，不是第一个的目录
            print '-' * (int(dirList[0])), dl
            # 打印目录下的所有文件夹和文件，目录级别+1
            printPath((int(dirList[0]) + 1), path + '/' + dl)
    print fileList
    for fl in fileList:
        # 打印文件
        #print '-' * (int(dirList[0])), fl
        # 随便计算一下有多少个文件
        #allFileNum = allFileNum + 1
        """
        操作文件进行读写
        """
        print fl[12:17],fl[17:21]
        file_info = []
        file_info.append(fl[12:17]+'/'+fl[17:21])

        print file_info
        print path
        file_name = path+"/"+fl
        fw = open(file_name, 'r');
        line_content = fw.readlines()
        fw.close()
        print line_content


        tmp = -1
        obj = []
        con_len = len(line_content)
        try:
            string = line_content[0].split(" ")
            tmp = int(string[0])
        except Exception:
            continue
        file_info.append(str(tmp) + '.jpg')
        xmin = str(int(float(string[1])))
        ymin = str(int(float(string[2])))
        xmax = str(int(float(string[1]) + float(string[3])))
        ymax = str(int(float(string[2]) + float(string[4])))
        dict1 = {}
        dict1["xmin"] = xmin
        dict1["ymin"] = ymin
        dict1["xmax"] = xmax
        dict1["ymax"] = ymax
        obj.append(dict1)

        for con_i in xrange(1, con_len):
            string = line_content[con_i].split(" ")
            tmp1 = int(string[0])
            if tmp == tmp1:
                xmin = str(int(float(string[1])))
                ymin = str(int(float(string[2])))
                xmax = str(int(float(string[1]) + float(string[3])))
                ymax = str(int(float(string[2]) + float(string[4])))
                dict1 = {}
                dict1["xmin"] = xmin
                dict1["ymin"] = ymin
                dict1["xmax"] = xmax
                dict1["ymax"] = ymax
                obj.append(dict1)
            elif tmp1 > 0:
                generate_xml(file_info, obj)
                obj = []
                tmp = tmp1
                file_info[1] = str(tmp1) + ".jpg"
                xmin = str(int(float(string[1])))
                ymin = str(int(float(string[2])))
                xmax = str(int(float(string[1]) + float(string[3])))
                ymax = str(int(float(string[2]) + float(string[4])))
                dict1 = {}
                dict1["xmin"] = xmin
                dict1["ymin"] = ymin
                dict1["xmax"] = xmax
                dict1["ymax"] = ymax
                obj.append(dict1)
        continue

def read_annotations_generate_fileinfo_obj(file_path):
    pass

if __name__=="__main__":

    #
    # file_info = ['set00/V000', '1.jpg']
    #
    # obj = []
    # obj1 = {"xmin":"1", "ymin":"1", "xmax":"5", "ymax":"5"}
    # obj2 = {"xmin":"2", "ymin":"2", "xmax":"6", "ymax":"6"}
    # obj.append(obj1)
    # obj.append(obj2)
    #
    # generate_xml(file_info, obj)
    #

    printPath(1, "/home/user/Downloads/caltech_data_set/data_old")

五、使用脚本，利用xml文件，生成train.txt trainval.txt test.txt 等文件

import os
import random

def folder_struct(level, path):
    global allFileNum

    dirList = []
    fileList = []
    files = os.listdir(path)
    dirList.append(str(level))

    for f in files:
        if(os.path.isdir(path + '/' + f)):
            if f[0] != '.':
                dirList.append(f)
        if (os.path.isfile(path + '/' + f)):
            fileList.append(f)


    i_dl = 0
    for dl in dirList:
        if i_dl == 0:
            i_dl = i_dl + 1
        else:
            #print '-' * (int(dirList[0])), dl
            folder_struct((int(dirList[0]) + 1), path+'/'+dl)
    print dirList
    # print fileList
    # print dirList
    for fl in fileList:
        #print fl[12:17], fl[17:21]
        file_info = (fl[12:17] + '/' + fl[17:21])
        print file_info
        generate_txt(file_info)
    pass


def generate_txt(xml_folder):
    trainval_percent = 0.66
    train_percent = 0.5
    folder_root = '/home/user/Desktop/VOC/'
    xmlfilepath = folder_root + 'Annotations/'+xml_folder
    txtsavepath = folder_root + 'ImageSets/Main'
    try:
        total_xml = os.listdir(xmlfilepath)
    #print total_xml

        num = len(total_xml)

        #print num
        list = range(num)
        tv = int(num * trainval_percent)
        tr = int(tv * train_percent)
        trainval = random.sample(list, tv)
        train = random.sample(trainval, tr)

        ftrainval = open(folder_root + 'ImageSets/Main/trainval.txt', 'aw')
        ftest     = open(folder_root + 'ImageSets/Main/test.txt'    , 'aw')
        ftrain    = open(folder_root + 'ImageSets/Main/train.txt'   , 'aw')
        fval      = open(folder_root + 'ImageSets/Main/val.txt'     , 'aw')

        folder_name = xmlfilepath[-10:] + '/'

        print folder_name
        for i in list:
            name = folder_name + total_xml[i][:-4] + '
'
            if i in trainval:
                ftrainval.write(name)
                if i in train:
                    ftrain.write(name)
                else:
                    fval.write(name)
            else:
                ftest.write(name)

        ftrainval.close()
        ftrain.close()
        fval.close()
        ftest.close()
    except Exception:
        pass
#用来便利所有文件夹名的，没有用遍历文件夹名的方法，而是用一开始生成的注释文件的名字去遍历文件夹名。。。
folder_struct(1, "/home/user/Downloads/caltech_data_set/data_old")

在具体训练的时候，需要用０－５作为训练集，然后用６－１０作为测试集

在跑的过程中，可能会有标注越界的问题，在voc_eval.py 和pascal_voc.py中进行更改就好了。推荐博客