CCPD2019数据集 CCPD: Chinese City Parking Dataset
步骤:
prepare_data
python reformat_CCPD.py
python dataset_provider.py
# Base 200 正常车牌
# FN 20 距离摄像头相当的远或者相当近
# DB 20 光线暗或者比较亮
# Rotate 10 水平倾斜20-25°,垂直倾斜-10-10°
# Tilt 10 水平倾斜15-45°,垂直倾斜15-45°
# Weather 10 在雨天,雪天,或者雾天
# Blur 5 由于相机抖动造成的模糊
# Challenge 10 其他的比较有挑战性的车牌
# NP 5 没有车牌的新车
# 0236-16_34-222&408_398&520-377&473_222&520_243&455_398&408-0_0_28_14_33_32_29-69-74.jpg
每个名称可以分为七个字段。这些字段解释如下。
面积:牌照面积与整个图片区域的面积比。
倾斜度:水平倾斜程度和垂直倾斜度。
边界框坐标:左上和右下顶点的坐标。
四个顶点位置:整个图像中LP的四个顶点的精确(x,y)坐标。这些坐标从右下角->左下角->左上角->右上角
面积 0236
倾斜度 16_34
边界框坐标 222&408_398&520
四个顶点位置 377&473_222&520_243&455_398&408
是车牌号每个字符的索引 0_0_28_14_33_32_29
亮度 69
模糊程度 74
车牌号 号码由一个汉字,一个字母和五个字母或数字组成
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
格式变换
import os
import cv2
import random
import os.path
def annotation_from_name(file_name):
# 图片的名称就是标签,由"_"字符分隔
# # 0236-16_34-222&408_398&520-377&473_222&520_243&455_398&408-0_0_28_14_33_32_29-69-74.jpg
file_name = file_name[:-4]
# 0236-16_34-222&408_398&520-377&473_222&520_243&455_398&408-0_0_28_14_33_32_29-69-74
name_split = file_name.split('-')
location = name_split[2]
# 边界框坐标 222&408_398&520
location = location.split('_')
# 222&408
left_top = location[0].split('&')
# 398&520
right_bottom = location[1].split('&')
x1 = int(left_top[0])
y1 = int(left_top[1])
x2 = int(right_bottom[0])
y2 = int(right_bottom[1])
# 四个顶点位置 377&473_222&520_243&455_398&408
x1y1 = name_split[3].split('_')[0].split('&')
x2y2 = name_split[3].split('_')[1].split('&')
x3y3 = name_split[3].split('_')[2].split('&')
x4y4 = name_split[3].split('_')[3].split('&')
# 边界框两个顶点以及四个顶点位置 0-11
return (x1, y1, x2, y2, int(x1y1[0]), int(x1y1[1]), int(x2y2[0]), int(x2y2[1]), int(x3y3[0]), int(x3y3[1]), int(x4y4[0]), int(x4y4[1]))
def generate_data_list(ccpd_path= r'D:\data\CCPD2019' ):
# D:\data\CCPD2019\
# Base 200 正常车牌
# Blur 5 由于相机抖动造成的模糊
# Challenge 10 其他的比较有挑战性的车牌
# DB 20 光线暗或者比较亮
# FN 20 距离摄像头相当的远或者相当近
# Rotate 10 水平倾斜20-25°,垂直倾斜-10-10°
# Tilt 10 水平倾斜15-45°,垂直倾斜15-45°
# Weather 10 在雨天,雪天,或者雾天
# NP 5 没有车牌的新车
# 输入的数据集
image_roots = [os.path.join(ccpd_path , 'ccpd_base'),
os.path.join(ccpd_path ,'ccpd_blur'),
os.path.join(ccpd_path ,'ccpd_challenge'),
os.path.join(ccpd_path ,'ccpd_db'),
os.path.join(ccpd_path ,'ccpd_fn'),
# os.path.join(ccpd_path ,'ccpd_np'),
os.path.join(ccpd_path ,'ccpd_rotate'),
os.path.join(ccpd_path ,'ccpd_tilt'),
os.path.join( ccpd_path, 'ccpd_weather')]
#输出
train_list_file_path = r'D:\data\data_folder\data_list_CCPD_train.txt'
test_list_file_path = r'D:\data\data_folder\data_list_CCPD_test.txt'
if not os.path.exists(os.path.dirname(train_list_file_path)):
os.makedirs(os.path.dirname(train_list_file_path))
fout_train = open(train_list_file_path, 'w')
fout_test = open(test_list_file_path, 'w')
# train_proportion:标注数据中用于train的比例 -取值 0到1之间
train_proportion = 1
# 训练图片的序数
train_counter = 0
test_counter = 0
for root in image_roots:
print(root)
# 图片的名称列表
file_name_list = [name for name in os.listdir(root) if name.endswith('.jpg')]
# shuffle 重新随机图片列表
random.shuffle(file_name_list)
# 训练集 和测试集 数据
file_name_list_train = file_name_list[:int(len(file_name_list) * train_proportion)]
file_name_list_test = file_name_list[int(len(file_name_list) * train_proportion):]
for file_name in file_name_list_train:
location_annotation = annotation_from_name(file_name)
line = os.path.join(root, file_name) + ',1,1,' + str(location_annotation[0]) + ',' + str(location_annotation[1]) + ',' + str(location_annotation[2]) + ',' + str(location_annotation[3])\
+',' + str(location_annotation[4]) + ',' + str(location_annotation[5]) + ',' + str(location_annotation[6]) + ',' + str(location_annotation[7]) + ',' + str(location_annotation[8])\
+',' + str(location_annotation[9]) + ',' + str(location_annotation[10]) + ',' + str(location_annotation[11])
fout_train.write(line + '\n')
train_counter += 1
print(train_counter)
for file_name in file_name_list_test:
location_annotation = annotation_from_name(file_name)
print("test",file_name)
line = os.path.join(root, file_name) + ',1,1,' + str(location_annotation[0]) + ',' + str(location_annotation[1]) + ',' + str(location_annotation[2]) + ',' + str(location_annotation[3])\
+',' + str(location_annotation[4]) + ',' + str(location_annotation[5]) + ',' + str(location_annotation[6]) + ',' + str(location_annotation[7]) + ',' + str(location_annotation[8])\
+',' + str(location_annotation[9]) + ',' + str(location_annotation[10]) + ',' + str(location_annotation[11])
fout_test.write(line + '\n')
test_counter += 1
print(test_counter)
fout_train.close()
fout_test.close()
if __name__ == '__main__':
# 返回值 [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
generate_data_list(ccpd_path=r'D:\data\CCPD2019')
模型的路径
import sys
sys.path.append("D:/Detection/Pytorch_Retina_License_Plate-master")
ccpd_2_coco
# -*- coding: utf-8 -*-#
# python ccpd_2_coco.py --data "./data"
import datetime
import json
import cv2
from random import randint
import numpy as np
from pathlib import Path
from PIL import Image
import os
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--data",
default=None,
type=str,
required=True,
help="The input data dir. Should contain all the images")
args = parser.parse_args()
IMAGE_DIR = Path(args.data)
INFO = {
"description": "CCPD Dataset in COCO Format",
"url": "",
"date_created": datetime.datetime.utcnow().isoformat(' ') # 显示此刻时间,格式:'2019-04-30 02:17:49.040415'
}
LICENSES = [
{
"id": 1,
"name": "ALL RIGHTS ",
"url": ""
}
]
CATEGORIES = [
{
'id': 1,
'name': 'license plate',
'supercategory': 'shape',
},
{
'id': 2,
'name': 'background',
'supercategory': 'shape',
}
]
def random_color(class_id):
'''预定义12种颜色,基本涵盖kjdz所有label类型
颜色对照网址:https://tool.oschina.net/commons?type=3'''
colorArr = [(255,0,0), # 红色
(255,255,0), # 黄色
(0, 255, 0), # 绿色
(0,0,255), # 蓝色
(160, 32, 240), # 紫色
(165, 42, 42), # 棕色
(238, 201, 0), # gold
(255, 110, 180), # HotPink1
(139, 0 ,0), #DarkRed
(0 ,139 ,139),#DarkCyan
(139, 0 ,139),# DarkMagenta
(0 ,0 ,139) # dark blue
]
if class_id < 11:
return colorArr[class_id]
else: # 如有特殊情况,类别数超过12,则随机返回一个颜色
rm_col = (randint(0,255),randint(0,255),randint(0,255))
return rm_col
# 获取 bounding-box, segmentation 信息
# 输入:image path
# 返回:
# bounding box
# four locations
def get_info(im_file):
img_name = str(im_file)
lbl = img_name.split('/')[-1].rsplit('.', 1)[0].split('-')[-3] # label: '16_2_32_30_25_29_6'
iname = img_name.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')
[leftUp, rightDown] = [[float(eel) for eel in el.split('&')] for el in iname[2].split('_')] # bounding box
height = rightDown[1]-leftUp[1]
width = rightDown[0]-leftUp[0]
left = leftUp[0]
top = leftUp[1]
segmentation = [[float(eel) for eel in el.split('&')] for el in iname[3].split('_')] # four vertices locations
return [left, top, width, height], segmentation
# 计算任意多边形的面积,顶点按照顺时针或者逆时针方向排列
def compute_polygon_area(points):
point_num = len(points)
if(point_num < 3):
return 0.0
s = points[0][1] * (points[point_num-1][0] - points[1][0])
#for i in range(point_num): # (int i = 1 i < point_num ++i):
for i in range(1, point_num):
s += points[i][1] * (points[i-1][0] - points[(i+1)%point_num][0])
return abs(s/2.0)
def main():
# coco lable文件(如training2017.json)需要存储的信息
coco_output = {
"info": INFO,
"licenses": LICENSES,
"categories": CATEGORIES,
"images": [],
"annotations": []
}
# 初始化id(以后依次加一)
image_id = 1
annotation_id = 1
# 加载图片信息
im_files = [f for f in IMAGE_DIR.iterdir()]
im_files.sort(key=lambda f: f.stem,reverse=True) # 排序,防止顺序错乱、数据和标签不对应
# print("im-length:",len(im_files),"\n im_files:",im_files)
for im_file in im_files:
# 写入图片信息(id、图片名、图片大小),其中id从1开始
image = Image.open(im_file)
#im_info = pycococreatortools.create_image_info( image_id, im_file.name, image.size) # 图片信息
im_info = {
"id" : image_id, # 图像id,可从0开始
"width" : image.size[0], # 图像的宽
"height" : image.size[1], # 图像的高
"file_name" : im_file.name, # 文件名
"license" : None, # 遵循哪个协议
"flickr_url" : None, # flickr图片链接url
"coco_url" : None, # COCO图片链接url
"date_captured" : "2019/05/20", # 获取数据的日期
}
coco_output['images'].append(im_info) # 存储图片信息(id、图片名、大小)
annotation_info_list = [] # 存储标注信息
# 处理label信息, 包括左上角、右下角、四个角点(用于分割)
bounding_box, segmentation = get_info(im_file)
class_id = 1 # id 为数字形式,如 1,此时是list形式,后续需要转换 # 指定为1,因为只有”是车牌“这一类
area = compute_polygon_area(segmentation) # 当前segmentation的面积(比bounding box更精确)
annot ={
"id" : annotation_id, # 注释id编号
"image_id" : image_id, # 图像id编号
"category_id" : class_id, # 类别id编号
"segmentation" : segmentation, # 分割具体数据
"area" : area, # 目标检测的区域大小
"bbox" : bounding_box,
"iscrowd" : 0 , # 目标是否被遮盖,默认为0
}
annotation_info_list.append(annot)
# 上面得到单张图片的所有bounding-box信息,接下来每单张图片存储一次
for annotation_info in annotation_info_list:
if annotation_info is not None:
coco_output['annotations'].append(annotation_info)
print(image_id)
image_id += 1
# 保存成json格式
print("[INFO] Storing annotations json file...")
output_json = Path(f'D:\data\CCPD2019\ccpd_annotations.json')
with output_json.open('w', encoding='utf-8') as f:
json.dump(coco_output, f)
print("[INFO] Annotations JSON file saved in:", str(output_json))
if __name__ == "__main__":
main()
COCO2YOLO
"""
COCO 格式的数据集转化为 YOLO 格式的数据集
--json_path 输入的json文件路径
--save_path 保存的文件夹名字,默认为当前目录下的labels。
"""
import os
import json
from tqdm import tqdm
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--json_path', default='./instances_val2017.json',type=str, help="input: coco format(json)")
parser.add_argument('--save_path', default='./labels', type=str, help="specify where to save the output dir of labels")
arg = parser.parse_args()
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[0] + box[2] / 2.0
y = box[1] + box[3] / 2.0
w = box[2]
h = box[3]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
if __name__ == '__main__':
json_file = arg.json_path # COCO Object Instance 类型的标注
ana_txt_save_path = arg.save_path # 保存的路径
data = json.load(open(json_file, 'r'))
if not os.path.exists(ana_txt_save_path):
os.makedirs(ana_txt_save_path)
id_map = {} # coco数据集的id不连续!重新映射一下再输出!
with open(os.path.join(ana_txt_save_path, 'classes.txt'), 'w') as f:
# 写入classes.txt
for i, category in enumerate(data['categories']):
f.write(f"{category['name']}\n")
id_map[category['id']] = i
# print(id_map)
for img in tqdm(data['images']):
filename = img["file_name"]
img_width = img["width"]
img_height = img["height"]
img_id = img["id"]
head, tail = os.path.splitext(filename)
ana_txt_name = head + ".txt" # 对应的txt名字,与jpg一致
f_txt = open(os.path.join(ana_txt_save_path, ana_txt_name), 'w')
for ann in data['annotations']:
if ann['image_id'] == img_id:
box = convert((img_width, img_height), ann["bbox"])
f_txt.write("%s %s %s %s %s\n" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
f_txt.close()
1.修改数据集的格式
/data1/mydata/CCPD2019/ccpd_weather
python predeal_data.py --data /data1/mydata/CCPD2019/ccpd_weather
YOLO 数据目录结构
https://github.com/Weifeng-Chen/DL_tools/blob/main/coco2yolo.py
python coco2yolo.py --json_path /data1/mydata/CCPD2019/TestTrain/label/train/ccpd_annotations.json --save_path /data1/mydata/CCPD2019/TestTrain/labels
parser.add_argument('--json_path', default='./instances_val2017.json',type=str, help="input: coco format(json)")
parser.add_argument('--save_path', default='./labels', type=str, help="specify where to save the output dir of labels")
yolo的数据格式为 (x_center, y_center, w, h); 而coco里面的bbox格式为(x_left, y_top, w, h) 。
2.修改配置文件
vim mydataset.yaml
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: /data1/mydata/CCPD2019/label_ccpd/ccpd_annotations.json, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
train: /data1/mydata/CCPD2019/TestTrain/images/train # train images (relative to 'path') 128 images
val: /data1/mydata/CCPD2019/TestTrain/images/train # val images (relative to 'path') 128 images
test: # test images (optional)
3.修改train中的下载部分
提前下载到相应的位置中
01.Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
02.Downloading pt to weight dir
python train.py --img 640 --batch 16 --epochs 3 --data mydataset.yaml --weights yolov5s.pt
/opt/License-Plate-Detector-yolo# python train.py --img 640 --batch 16 --epochs 3 --data mydataset.yaml --weights ./weights/yolov5s.pt
# 需要满足特定的目录结构
No labels found in /data1/mydata/CCPD2019/TestTrain/labels/train.cache. Can not train without labels.
4.检测
cd /opt/yolov5-master
python detect.py --weights /opt/yolov5-master/runs/train/exp/weights/best.pt --data /opt/yolov5-master/data/mydataset.yaml --source /data1/mCamera012000.jpg
/opt/yolov5-master/runs/train/exp/weights/last.pt
参考
https://github.com/gm19900510/Pytorch_Retina_License_Plate/blob/master/prepare_data/reformat_CCPD.py
License Plate Detection with Yolov5 https://github.com/zeusees/License-Plate-Detector
https://github.com/weidafeng/CCPD2COCO
https://github.com/Weifeng-Chen/DL_tools/blob/main/coco2yolo.py