图像ocr识别(一)

研究了点OCR识别,本文讲下opencv方式-找出字符区域,虽然还不完善,但是记录下,后续往CNN+RNN+CTC方向走,此处就作为练手了。

效果1:

效果2:

效果3:

效果4(识别率不太好,只把大框识别了,字符的分割有问题):

import cv2
import imutils
import numpy as np
from imageio import imread
import math
import matplotlib.pyplot as plt


def point_distance(p1, p2):
    return math.sqrt(math.pow(p2[0] - p1[0], 2) + math.pow(p2[1] - p1[1], 2))


def calc_height_width(box):
    width = point_distance(box[1], box[0])
    height = point_distance(box[0], box[3])
    return (width, height)


fileName = 'test1'

img = imread('imgs\' + fileName + '.jpg')
img = imutils.resize(img, width=1920, height=2080)



cannyImg = cv2.Canny(img, 200, 200)

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
eroded = cv2.erode(closed, kernel)

cannyImg = cv2.Canny(eroded, 200, 200)
blurred = cv2.GaussianBlur(cannyImg, (105, 105), 0)
# blurred = cv2.GaussianBlur(cannyImg, (15, 15),0)


_, skin = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)


contours, hierarchy = cv2.findContours(skin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

contours = sorted(contours, key=cv2.contourArea, reverse=True)

boxes = []
for i in range(len(contours)):
    c = contours[i]
    rect = cv2.minAreaRect(c)
    box = np.int0(cv2.boxPoints(rect))
    (w, h) = calc_height_width(box)
    if w == 0 or h == 0:
        continue
    if w < 20 or h < 20:
        continue
    # boxes.append(box)
    rate1 = h / w * 100
    rate2 = w / h * 100
    if (10 <= rate1 <= 20) or (10 <= rate2 <= 20):
        print((w, h), '--------', rate1, '%', rate2, '%')
        boxes.append(box)

img = img.copy()
i = 0


def parse_chars(positions, min_thresh, min_range, max_range):
    charInfos = []
    begin = 0
    end = 0
    for idx in range(len(positions)):
        if positions[idx] > min_thresh and begin == 0:
            begin = idx
        elif positions[idx] > min_thresh and begin != 0:
            if idx - begin > max_range:
                charInfo = {'begin': begin, 'end': idx}
                charInfos.append(charInfo)

                begin = 0
                end = 0
            continue
        elif positions[idx] < min_thresh and begin != 0:
            end = idx
            if end - begin >= min_range:
                charInfo = {'begin': begin, 'end': end}
                charInfos.append(charInfo)

                begin = 0
                end = 0
        elif positions[idx] < min_thresh or begin == 0:
            continue

    return charInfos


def process_more(windowName, imgSrc):
    ori_imgSrc = imgSrc.copy()
    # cv2.imshow(windowName+'111', ori_imgSrc)

    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    closed = cv2.morphologyEx(imgSrc, cv2.MORPH_CLOSE, kernel)
    imgSrc = cv2.erode(closed, kernel)

    imgSrc = cv2.Canny(imgSrc, 300, 300)
    kernel = np.ones((5, 5), np.uint8)
    imgSrc = cv2.dilate(imgSrc, kernel, iterations=1)
    _, imgSrc = cv2.threshold(imgSrc, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # rows---> ori_imgSrc.shape[0]
    # cols---> ori_imgSrc.shape[1]
    rows = ori_imgSrc.shape[0]
    cols = ori_imgSrc.shape[1]
    tags = np.zeros((cols))

    for row in range(rows):
        for col in range(cols):
            if imgSrc[row][col] == 255:
                tags[col] += 1

    char_positions = parse_chars(positions=tags, min_thresh=8, min_range=25, max_range=100)

print(len(char_positions)) for p in char_positions: leftTop = (p['begin'], 0) rightBottom = (p['end'], rows - 2) cv2.rectangle(ori_imgSrc, (leftTop[0], leftTop[1]), (rightBottom[0], rightBottom[1]), (0, 255, 0), 2) ori_imgSrc = imutils.resize(ori_imgSrc, width=450) cv2.imshow(windowName, ori_imgSrc) for box in boxes: # img = cv2.drawContours(img, [box], -1, (0, 0, 255), 3) x_from = np.min(box[:, 1]) x_end = np.max(box[:, 1]) y_from = np.min(box[:, 0]) y_end = np.max(box[:, 0]) if x_from < 0: x_from = 0 if y_from < 0: y_from = 0 img_tmp = img[x_from:x_end, y_from:y_end] # cv2.imshow("ffff111" + str(i), img_tmp) (w, h) = calc_height_width(box) if w > h: # 左上角, 左下角,右上角 # 3,2,4 matSrc = np.float32([ [box[2][0], box[2][1]], [box[1][0], box[1][1]], [box[3][0], box[3][1]] ]) matDst = np.float32([ [0, 0], [0, h], [w, 0] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, (int(w), int(h))) else: # 左上角, 左下角,右上角 # 右上角, 左上角, 右下角 # 3,2,4 # 4,3,1 matSrc = np.float32([ [box[3][0], box[3][1]], [box[2][0], box[2][1]], [box[0][0], box[0][1]] ]) matDst = np.float32([ [0, 0], [0, w], [h, 0] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, (int(h), int(w))) process_more("ffff222asdfas" + str(i), dst.copy()) i += 1 img = imutils.resize(img, width=600, height=600) cv2.imshow("Frame6", img) cv2.waitKey(100000) & 0xFF cv2.destroyAllWindows()

  

原文地址:https://www.cnblogs.com/aarond/p/OCR-1.html