import numpy as np
import cv2
import paddlehub as hub
ocr = hub.Module(name="chinese_ocr_db_crnn_mobile")
def char_recognition(pattern,*args):
result_list = []
if pattern==3:
for img in args:
np_images = [img]
results = ocr.recognize_text(
images=np_images,
use_gpu=False,
output_dir='ocr_result',
visualization=True,
box_thresh=0.5,
text_thresh=0.5)
for result in results:
data = result['data']
for index,infomation in enumerate(data):
print('text', ':',infomation['text'])
result_list.append(infomation['text'])
return result_list
def preprocessing(img):
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 对图片的通道进行转换为RGB
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) # 灰度图像处理
img_Gauassian = cv2.GaussianBlur(img_gray, (5, 5), 0)
ret, Binary = cv2.threshold(img_Gauassian, 60, 255, cv2.THRESH_BINARY) # 二值化处理 周围像素影响
return Binary
def find_ppt(img):
edges = cv2.Canny(img, 100, 200) #边缘检测
_,contours, _ = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # 寻找轮廓,输出为三个参数
max_index = 0
#有很多个轮廓,面积最大的那个轮廓就是ppt
for i in range(len(contours)):
cnt = contours[i]
area = cv2.contourArea(cnt)
max_area = cv2.contourArea(contours[max_index])
if (area > max_area):
max_index = i
cv2.drawContours(img_bgr, [contours[max_index]], -1, (0, 0, 255), 1)
#cv2.imshow('img_bgr2', img_bgr) #展示轮廓
return contours, max_index
def find_ppt_zuobiao(img_bgr, contours, max_index):
hull = cv2.convexHull(contours[max_index]) #凸包
#按道理来说,长方形凸包有4个点。但我们上面求出的凸包,因为不是正规长方形,所以有很多乱七八糟的点。我们从这些点中,选出长方形的四个顶点
#print(hull)
points = []
#minRect = cv2.minAreaRect(points)
#cv2.fourPoint2f[4]
#points=minRect.points(cv2.fourPoint2f)
#选出左上的点
min_x = 999
min_y = 999
for point in list(hull):
if point[0][0] <= min_x+18 and point[0][1] <= min_y-5:
min_x = point[0][0]
min_y = point[0][1]
points.append((min_x, min_y))
# 选出右上的点
max_x = 0
min_y = 999999
for point in list(hull):
if point[0][0] >= max_x-5 and point[0][1] <= min_y - 5:
max_x = point[0][0]
min_y = point[0][1]
points.append((max_x, min_y))
# 选出左下的点
min_x = 999999
max_y = 0
for point in list(hull):
if point[0][0] <= min_x - 5 and point[0][1] >= max_y - 10:
min_x = point[0][0]
max_y = point[0][1]
points.append((min_x, max_y))
# 选出右下的点
max_x = 0
max_y = 0
for point in list(hull):
if point[0][0] >= max_x - 30 and point[0][1] >= max_y - 10:
max_x = point[0][0]
max_y = point[0][1]
points.append((max_x, max_y))
#这样,points这个列表里就有了ppt的4个顶点坐标
cv2.drawContours(img_bgr, [contours[max_index]], -1, (0, 0, 255), 1)
for i in range(4):
cv2.circle(img_bgr, points[i], 3, (255, 0, 0), -1)
#cv2.imshow('img_bgr', img_bgr) #看看坐标
return points
def TouYingBianHuan(img_bgr, points): #坐标变换
dst = np.array([[0, 0], [w - 1, 0], [0, h - 1], [w - 1, h - 1]], np.float32)
src = np.array(points, np.float32)
P = cv2.getPerspectiveTransform(src, dst) # 计算投影矩阵
result = cv2.warpPerspective(img_bgr, P, (w, h), borderValue=125)
res = cv2.resize(result, (1000, 700), interpolation=cv2.INTER_CUBIC)
result = char_recognition(3,res)
#print(result)
cv2.imshow('res', res)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
#一定要记住!!!opencv的原点在左上角,和我们正常的坐标是上下颠倒的!!!!!!!!!!!!
img_bgr = cv2.imread('2.jpg') # opencv默认的imread是以BGR的方式进行存储的
#img_bgr = cv2.resize(img_bgr, (1000, 1000), interpolation=cv2.INTER_CUBIC)
#cv2.imshow('img', img_bgr)
h, w = img_bgr.shape[:2] #图片的高和宽,注意,原点(0,0)在图片的左上角
img = preprocessing(img_bgr) #预处理,把图片二值化,变为只有黑白
contours, max_index = find_ppt(img) #把ppt那一块找出来
points = find_ppt_zuobiao(img_bgr, contours, max_index) #ppt是长方形,找出他4个坐标
#print(points)
TouYingBianHuan(img_bgr, points) #投影变换,原本是斜的,现在把他变正