(3)data augmentation——pixel-wise

先说一下,做data augmentation的目的是为了减少噪声对模型的影响,希望模型真正学习到目标的特征,由于yolov3的该模块特别典型,故以此说明,就是包括以下部分:

 在这之前先进行了图像融合,就是随机对图像融合,:

 我们只看图像操作部分先确定融合后的图像为两个图像最大的w和h,然后以比例λ进行计算:

 除了v3采用的这种mixup,还有两种是别的论文中的:

cutmix和Mosaic

论文名称:CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features

论文地址:

开源地址:

简单来说cutmix相当于cutout+mixup的结合,可以应用于各种任务中:

而Mosaic增强是本文提出的,属于cutmix的扩展,cutmix是两张图混合,而马赛克增强是4张图混合,好处非常明显是一张图相当于4张图,等价于batch增加了,可以显著减少训练需要的batch size大小。

 

 然后是调整图像的hsv值,关于图像的颜色空间参照博客

接下来随机比例扩展图像,多余的部分用固定颜色填充:

 

 然后对图像进行随机裁剪(可能会丢掉一些box,留下那些满足iou界限的box):

 然后就可以把图像resize到固定的尺寸(416*416),有两种方式可以选择:

 最后还能随机进行翻转操作,比如变成这样:

 以上就是yolov3所有的data augmentation过程,代码在下面。

  1 import numpy as np
  2 import cv2
  3 import random
  4 
  5 def mix_up(img1, img2):
  6     '''
  7     return:
  8         mix_img: HWC format mix up image
  9         mix_bbox: [N, 5] shape mix up bbox, i.e. `x_min, y_min, x_max, y_mix, mixup_weight`.
 10     '''
 11     height = max(img1.shape[0], img2.shape[0])
 12     width = max(img1.shape[1], img2.shape[1])
 13 
 14     mix_img = np.zeros(shape=(height, width, 3), dtype='float32')
 15 
 16     # rand_num = np.random.random()
 17     rand_num = np.random.beta(1.5, 1.5)
 18     rand_num = max(0, min(1, rand_num))
 19     rand_num = 1
 20     mix_img[:img1.shape[0], :img1.shape[1], :] = img1.astype('float32') * rand_num
 21     mix_img[:img2.shape[0], :img2.shape[1], :] += img2.astype('float32') * (1. - rand_num)
 22 
 23     mix_img = mix_img.astype('uint8')
 24 
 25     return mix_img
 26 
 27 def random_color_distort(img, brightness_delta=32, hue_vari=18, sat_vari=0.5, val_vari=0.5):
 28     '''
 29     randomly distort image color. Adjust brightness, hue, saturation, value.
 30     param:
 31         img: a BGR uint8 format OpenCV image. HWC format.
 32     '''
 33 
 34     def random_hue(img_hsv, hue_vari, p=0.5):
 35         if np.random.uniform(0, 1) > p:
 36             hue_delta = np.random.randint(-hue_vari, hue_vari)
 37             img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180
 38         return img_hsv
 39 
 40     def random_saturation(img_hsv, sat_vari, p=0.5):
 41         if np.random.uniform(0, 1) > p:
 42             sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari)
 43             img_hsv[:, :, 1] *= sat_mult
 44         return img_hsv
 45 
 46     def random_value(img_hsv, val_vari, p=0.5):
 47         if np.random.uniform(0, 1) > p:
 48             val_mult = 1 + np.random.uniform(-val_vari, val_vari)
 49             img_hsv[:, :, 2] *= val_mult
 50         return img_hsv
 51 
 52     def random_brightness(img, brightness_delta, p=0.5):
 53         if np.random.uniform(0, 1) > p:
 54             img = img.astype(np.float32)
 55             brightness_delta = int(np.random.uniform(-brightness_delta, brightness_delta))
 56             img = img + brightness_delta
 57         return np.clip(img, 0, 255)
 58 
 59     # brightness
 60     img = random_brightness(img, brightness_delta)
 61     img = img.astype(np.uint8)
 62 
 63     # color jitter
 64     img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)
 65 
 66     if np.random.randint(0, 2):
 67         img_hsv = random_value(img_hsv, val_vari)
 68         img_hsv = random_saturation(img_hsv, sat_vari)
 69         img_hsv = random_hue(img_hsv, hue_vari)
 70     else:
 71         img_hsv = random_saturation(img_hsv, sat_vari)
 72         img_hsv = random_hue(img_hsv, hue_vari)
 73         img_hsv = random_value(img_hsv, val_vari)
 74 
 75     img_hsv = np.clip(img_hsv, 0, 255)
 76     img = cv2.cvtColor(img_hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
 77 
 78     return img
 79 
 80 def random_expand(img, bbox, max_ratio=4, fill=255, keep_ratio=True):
 81     '''
 82     Random expand original image with borders, this is identical to placing
 83     the original image on a larger canvas.
 84     param:
 85     max_ratio :
 86         Maximum ratio of the output image on both direction(vertical and horizontal)
 87     fill :
 88         The value(s) for padded borders.
 89     keep_ratio : bool
 90         If `True`, will keep output image the same aspect ratio as input.
 91     '''
 92     h, w, c = img.shape
 93     ratio_x = random.uniform(1, max_ratio)
 94     if keep_ratio:
 95         ratio_y = ratio_x
 96     else:
 97         ratio_y = random.uniform(1, max_ratio)
 98 
 99     oh, ow = int(h * ratio_y), int(w * ratio_x)
100     off_y = random.randint(0, oh - h)
101     off_x = random.randint(0, ow - w)
102 
103     dst = np.full(shape=(oh, ow, c), fill_value=fill, dtype=img.dtype)
104 
105     dst[off_y:off_y + h, off_x:off_x + w, :] = img
106 
107     # correct bbox
108     bbox[:, :2] += (off_x, off_y)
109     bbox[:, 2:4] += (off_x, off_y)
110 
111     return dst, bbox
112 
113 def plot_one_box(img, coord, label=None, color=None, line_thickness=None):
114     '''
115     coord: [x_min, y_min, x_max, y_max] format coordinates.
116     img: img to plot on.
117     label: str. The label name.
118     color: int. color index.
119     line_thickness: int. rectangle line thickness.
120     '''
121     tl = line_thickness or int(round(0.002 * max(img.shape[0:2])))  # line thickness
122     color = color or [random.randint(0, 255) for _ in range(3)]
123     c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))
124     cv2.rectangle(img, c1, c2, color, thickness=tl)
125     if label:
126         tf = max(tl - 1, 1)  # font thickness
127         t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]
128         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
129         cv2.rectangle(img, c1, c2, color, -1)  # filled
130         cv2.putText(img, label, (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)
131 
132 def bbox_crop(bbox, crop_box=None, allow_outside_center=True):
133     """Crop bounding boxes according to slice area.
134     This method is mainly used with image cropping to ensure bonding boxes fit
135     within the cropped image.
136     Parameters
137     ----------
138     bbox : numpy.ndarray
139         Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
140         The second axis represents attributes of the bounding box.
141         Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
142         we allow additional attributes other than coordinates, which stay intact
143         during bounding box transformations.
144     crop_box : tuple
145         Tuple of length 4. :math:`(x_{min}, y_{min}, width, height)`
146     allow_outside_center : bool
147         If `False`, remove bounding boxes which have centers outside cropping area.
148     Returns
149     -------
150     numpy.ndarray
151         Cropped bounding boxes with shape (M, 4+) where M <= N.
152     """
153     bbox = bbox.copy()
154     if crop_box is None:
155         return bbox
156     if not len(crop_box) == 4:
157         raise ValueError(
158             "Invalid crop_box parameter, requires length 4, given {}".format(str(crop_box)))
159     if sum([int(c is None) for c in crop_box]) == 4:
160         return bbox
161 
162     l, t, w, h = crop_box
163 
164     left = l if l else 0
165     top = t if t else 0
166     right = left + (w if w else np.inf)
167     bottom = top + (h if h else np.inf)
168     crop_bbox = np.array((left, top, right, bottom))
169 
170     if allow_outside_center:
171         mask = np.ones(bbox.shape[0], dtype=bool)
172     else:
173         centers = (bbox[:, :2] + bbox[:, 2:4]) / 2
174         mask = np.logical_and(crop_bbox[:2] <= centers, centers < crop_bbox[2:]).all(axis=1)
175 
176     # transform borders
177     bbox[:, :2] = np.maximum(bbox[:, :2], crop_bbox[:2])
178     bbox[:, 2:4] = np.minimum(bbox[:, 2:4], crop_bbox[2:4])
179     bbox[:, :2] -= crop_bbox[:2]
180     bbox[:, 2:4] -= crop_bbox[:2]
181 
182     mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:4]).all(axis=1))
183     bbox = bbox[mask]
184     return bbox
185 
186 def bbox_iou(bbox_a, bbox_b, offset=0):
187     """Calculate Intersection-Over-Union(IOU) of two bounding boxes.
188     Parameters
189     ----------
190     bbox_a : numpy.ndarray
191         An ndarray with shape :math:`(N, 4)`.
192     bbox_b : numpy.ndarray
193         An ndarray with shape :math:`(M, 4)`.
194     offset : float or int, default is 0
195         The ``offset`` is used to control the whether the width(or height) is computed as
196         (right - left + ``offset``).
197         Note that the offset must be 0 for normalized bboxes, whose ranges are in ``[0, 1]``.
198     Returns
199     -------
200     numpy.ndarray
201         An ndarray with shape :math:`(N, M)` indicates IOU between each pairs of
202         bounding boxes in `bbox_a` and `bbox_b`.
203     """
204     if bbox_a.shape[1] < 4 or bbox_b.shape[1] < 4:
205         raise IndexError("Bounding boxes axis 1 must have at least length 4")
206 
207     tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
208     br = np.minimum(bbox_a[:, None, 2:4], bbox_b[:, 2:4])
209 
210     area_i = np.prod(br - tl + offset, axis=2) * (tl < br).all(axis=2)
211     area_a = np.prod(bbox_a[:, 2:4] - bbox_a[:, :2] + offset, axis=1)
212     area_b = np.prod(bbox_b[:, 2:4] - bbox_b[:, :2] + offset, axis=1)
213     return area_i / (area_a[:, None] + area_b - area_i)
214 
215 def random_crop_with_constraints(bbox, size, min_scale=0.3, max_scale=1,
216                                  max_aspect_ratio=2, constraints=None,
217                                  max_trial=50):
218     """Crop an image randomly with bounding box constraints.
219     This data augmentation is used in training of
220     Single Shot Multibox Detector [#]_. More details can be found in
221     data augmentation section of the original paper.
222     .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
223        Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
224        SSD: Single Shot MultiBox Detector. ECCV 2016.
225     Parameters
226     ----------
227     bbox : numpy.ndarray
228         Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
229         The second axis represents attributes of the bounding box.
230         Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
231         we allow additional attributes other than coordinates, which stay intact
232         during bounding box transformations.
233     size : tuple
234         Tuple of length 2 of image shape as (width, height).
235     min_scale : float
236         The minimum ratio between a cropped region and the original image.
237         The default value is :obj:`0.3`.
238     max_scale : float
239         The maximum ratio between a cropped region and the original image.
240         The default value is :obj:`1`.
241     max_aspect_ratio : float
242         The maximum aspect ratio of cropped region.
243         The default value is :obj:`2`.
244     constraints : iterable of tuples
245         An iterable of constraints.
246         Each constraint should be :obj:`(min_iou, max_iou)` format.
247         If means no constraint if set :obj:`min_iou` or :obj:`max_iou` to :obj:`None`.
248         If this argument defaults to :obj:`None`, :obj:`((0.1, None), (0.3, None),
249         (0.5, None), (0.7, None), (0.9, None), (None, 1))` will be used.
250     max_trial : int
251         Maximum number of trials for each constraint before exit no matter what.
252     Returns
253     -------
254     numpy.ndarray
255         Cropped bounding boxes with shape :obj:`(M, 4+)` where M <= N.
256     tuple
257         Tuple of length 4 as (x_offset, y_offset, new_width, new_height).
258     """
259     # default params in paper
260     if constraints is None:
261         constraints = (
262             (0.1, None),
263             (0.3, None),
264             (0.5, None),
265             (0.7, None),
266             (0.9, None),
267             (None, 1),
268         )
269 
270     w, h = size
271 
272     candidates = [(0, 0, w, h)]
273     for min_iou, max_iou in constraints:
274         min_iou = -np.inf if min_iou is None else min_iou
275         max_iou = np.inf if max_iou is None else max_iou
276 
277         for _ in range(max_trial):
278             scale = random.uniform(min_scale, max_scale)
279             aspect_ratio = random.uniform(
280                 max(1 / max_aspect_ratio, scale * scale),
281                 min(max_aspect_ratio, 1 / (scale * scale)))
282             crop_h = int(h * scale / np.sqrt(aspect_ratio))
283             crop_w = int(w * scale * np.sqrt(aspect_ratio))
284 
285             crop_t = random.randrange(h - crop_h)
286             crop_l = random.randrange(w - crop_w)
287             crop_bb = np.array((crop_l, crop_t, crop_l + crop_w, crop_t + crop_h))
288 
289             if len(bbox) == 0:
290                 top, bottom = crop_t, crop_t + crop_h
291                 left, right = crop_l, crop_l + crop_w
292                 return bbox, (left, top, right-left, bottom-top)
293 
294             iou = bbox_iou(bbox, crop_bb[np.newaxis])
295             if min_iou <= iou.min() and iou.max() <= max_iou:
296                 top, bottom = crop_t, crop_t + crop_h
297                 left, right = crop_l, crop_l + crop_w
298                 candidates.append((left, top, right-left, bottom-top))
299                 break
300 
301     # random select one
302     while candidates:
303         crop = candidates.pop(np.random.randint(0, len(candidates)))
304         new_bbox = bbox_crop(bbox, crop, allow_outside_center=False)
305         if new_bbox.size < 1:
306             continue
307         new_crop = (crop[0], crop[1], crop[2], crop[3])
308         return new_bbox, new_crop
309     return bbox, (0, 0, w, h)
310 
311 def letterbox_resize(img, new_width, new_height, interp=0):
312     '''
313     Letterbox resize. keep the original aspect ratio in the resized image.
314     '''
315     ori_height, ori_width = img.shape[:2]
316 
317     resize_ratio = min(new_width / ori_width, new_height / ori_height)
318 
319     resize_w = int(resize_ratio * ori_width)
320     resize_h = int(resize_ratio * ori_height)
321 
322     img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)
323     image_padded = np.full((new_height, new_width, 3), 128, np.uint8)
324 
325     dw = int((new_width - resize_w) / 2)
326     dh = int((new_height - resize_h) / 2)
327 
328     image_padded[dh: resize_h + dh, dw: resize_w + dw, :] = img
329 
330     return image_padded, resize_ratio, dw, dh
331 
332 def resize_with_bbox(img, bbox, new_width, new_height, interp=0, letterbox=False):
333     '''
334     Resize the image and correct the bbox accordingly.
335     '''
336 
337     if letterbox:
338         image_padded, resize_ratio, dw, dh = letterbox_resize(img, new_width, new_height, interp)
339 
340         # xmin, xmax
341         bbox[:, [0, 2]] = bbox[:, [0, 2]] * resize_ratio + dw
342         # ymin, ymax
343         bbox[:, [1, 3]] = bbox[:, [1, 3]] * resize_ratio + dh
344 
345         return image_padded, bbox
346     else:
347         ori_height, ori_width = img.shape[:2]
348 
349         img = cv2.resize(img, (new_width, new_height), interpolation=interp)
350 
351         # xmin, xmax
352         bbox[:, [0, 2]] = bbox[:, [0, 2]] / ori_width * new_width
353         # ymin, ymax
354         bbox[:, [1, 3]] = bbox[:, [1, 3]] / ori_height * new_height
355 
356         return img, bbox
357 
358 def random_flip(img, bbox, px=0, py=0):
359     '''
360     Randomly flip the image and correct the bbox.
361     param:
362     px:
363         the probability of horizontal flip
364     py:
365         the probability of vertical flip
366     '''
367     height, width = img.shape[:2]
368     if np.random.uniform(0, 1) < px:
369         img = cv2.flip(img, 1)
370         xmax = width - bbox[:, 0]
371         xmin = width - bbox[:, 2]
372         bbox[:, 0] = xmin
373         bbox[:, 2] = xmax
374 
375     if np.random.uniform(0, 1) < py:
376         img = cv2.flip(img, 0)
377         ymax = height - bbox[:, 1]
378         ymin = height - bbox[:, 3]
379         bbox[:, 1] = ymin
380         bbox[:, 3] = ymax
381     return img, bbox
382 
383 
384 if __name__ == "__main__":
385     # img1 = cv2.imread("1.png")
386     # img2 = cv2.imread("2.png")
387     # mix_img = mix_up(img1,img2)
388     # hsv_img = random_color_distort(img2)
389     img = cv2.imread("data/000001.jpg")
390     img2 = img.copy()
391     boxes = np.array([[48,240,195,371],[8,12,352,198]])
392     # plot_one_box(img,boxes[0])
393     # plot_one_box(img, boxes[1])
394     # cv2.imshow("1",img)
395     # cv2.imwrite("ori.jpg",img)
396     # cv2.waitKey(0)
397     # img1, boxes1 = random_expand(img2,np.array(boxes))
398     h, w, _ = img2.shape
399     # boxes, crop = random_crop_with_constraints(boxes, (w, h))
400     # x0, y0, w, h = crop
401     # img2 = img2[y0: y0 + h, x0: x0 + w]
402     interp = np.random.randint(0, 5)
403     img, boxes = resize_with_bbox(img, boxes, 416, 416, interp=interp, letterbox=False)
404     img, boxes = random_flip(img, boxes, px=0.5)
405     for i in range(len(boxes)):
406 
407         plot_one_box(img, boxes[i])
408     # plot_one_box(img1, boxes[1])
409     cv2.imshow("1", img)
410     cv2.imwrite("crop.jpg", img)
411     cv2.waitKey(0)
原文地址:https://www.cnblogs.com/super-zheng/p/13236645.html