案例@文本侦测与识别.zip资源-CSDN文库

共20个文件

py：6个

pyc：6个

bat：3个

图像识别

图像处理

python

人工智能

需积分: 5 90 浏览量 2022-08-18 16:28:37 上传评论收藏 86.83MB ZIP 举报

资源详情

资源评论

资源推荐

收起资源包目录

案例@文本侦测与识别.zip （20个子文件）

文本侦测与识别

data

2.jpg 252KB

1.jpg 37KB

test.mp4 915KB

run_core.bat 44B

wordRecognizer

models

frozen_east_text_detection.pb 92.18MB

core

__pycache__

ocr.cpython-37.pyc 4KB

ocr.py 7KB

main.py 253B

__pycache__

main.cpython-37.pyc 346B

uis

ocrapp.py 353B

ocrframe.py 2KB

__pycache__

ocrvideo.cpython-37.pyc 2KB

ocrframe.cpython-37.pyc 2KB

ocrapp.cpython-37.pyc 702B

ocrui.cpython-37.pyc 2KB

ocrui.py 2KB

tools.bat 28B

ocrvideo.py 2KB

ocr.ui 2KB

run_app.bat 30B

from imutils.object_detection import non_max_suppression from PIL import Image, ImageDraw, ImageFont import numpy as np import cv2 import pytesseract class OCRDetector: def __init__(self, width = 320, height = 320, min_confidence = 0.5, modelpath = "wordRecognizer/models/frozen_east_text_detection.pb", padding = 0.00): super(OCRDetector, self).__init__() # 调整后的图像宽度，必须是32的倍数。 self.width = width # 调整后的图像高度，必须是32的倍数。 self.height = height # 确定文本的概率阈值。 self.min_confidence = min_confidence # EAST场景文本检测器模型文件路径。 self.modelpath = modelpath # 添加到每个ROI边界的填充量,扩展ROI的边界。 self.padding = padding # 使用基于深度学习的文本检测器来检测图像中的文本区域。生成两个数组，一个包含给定区域包含文本的概率，另一个文本的边界框位置。 def decode_predictions(self, scores, geometry): # 从scores获取行数和列数, 然后初始化两个列表：边界框坐标和概率 (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # 循环行数 for y in range(0, numRows): # 提取当前行的概率和几何数据(用于派生出围绕文本的边界框坐标) scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # 遍历当前所选行的每个列索引 for x in range(0, numCols): # 通过忽略概率不高的区域来过滤弱文本检测 if scoresData[x] < self.min_confidence: continue # 计算偏移因子，因为我们得到的特征图将比输入图像小4倍，所以我们乘以4，使坐标回到原始图像。 (offsetX, offsetY) = (x * 4.0, y * 4.0) # 提取旋转角度进行预测，然后计算sin和cos angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # 派生出文本区域的边框坐标 h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # 分别更新rects和confidence列表 rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) return (rects, confidences) # 侦测图像文本坐标和概率 def detect(self, image): # 确定原始图像尺寸与新图像尺寸的比率，基于width、height参数 (newW, newH) = (self.width, self.height) # 调整图像大小，忽略纵横比 image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # 提取两层的输出特征映射，构建layerNames的表： # 第一层是我们的输出sigmoid激活，它给出了包含文本或不包含文本的区域的概率。 # 第二层是表示图像“几何”的输出要素图。我们使用它来导出输入图像中文本的边界框坐标。 layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # 加载OpenCV的EAST文本检测器 net = cv2.dnn.readNet(self.modelpath) # 从图像构造一个blob，然后执行模型的前向传递以获得两个输出层集 blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # 使用先前定义的decode_predictions函数解码预测 (rects, confidences) = self.decode_predictions(scores, geometry) # 将非最大值抑制应用于边界框以抑制弱重叠边界框 boxes = non_max_suppression(np.array(rects), probs=confidences) return boxes # 识别给定ROI中的文本，返回边界框值和实际文本字符串 def recognition(self, image): # 复制原图 orig = image.copy() (origH, origW) = image.shape[:2] # 确定原始图像尺寸与新图像尺寸的比率，基于width、height参数 (newW, newH) = (self.width, self.height) # 获取图像尺寸 (H, W) = image.shape[:2] rW = W / float(newW) rH = H / float(newH) # 获取ROI boxes = self.detect(image) # 初始化结果列表 results = [] # 循环遍历边界框 for (startX, startY, endX, endY) in boxes: # 将坐标缩放到原始图像尺寸 startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # 为了获得更好的文本OCR，我们可以在边界框周围应用一些填充，我们在这里计算x和y方向的增量 dx = int((endX - startX) * self.padding) dy = int((endY - startY) * self.padding) # 分别对边界框的每各边进行填充 startX = max(0, startX - dx) startY = max(0, startY - dy) endX = min(origW, endX + (dx * 2)) endY = min(origH, endY + (dy * 2)) # 提取填充的ROI roi = orig[startY:endY, startX:endX] # 使用Tesseract识别图像中的文本ROI，默认检测英文，检测中文为-l chi_sim config = ("-l eng --oem 1 --psm 7") text = pytesseract.image_to_string(roi, config = config) # 将结果（边界框值和实际文本字符串）附加到结果列表 results.append(((startX, startY, endX, endY), text)) # 将结果从上到下排序 results = sorted(results, key = lambda r:r[0][1]) return results # 将侦测识别到的结果画在原图上 def detect_mark(self, image): # 复制原图 orig = image.copy() (origH, origW) = image.shape[:2] # 获取文本框坐标和识别文本 results = self.recognition(image) for ((startX, startY, endX, endY), text) in results: # 将OCR文本打印到终端 print("OCR TEXT") print("=======") print(text) # 从文本中去掉非 ASCII 字符，因为 OpenCV 在 cv2.putText 函数中不支持非 ASCII 字符 text = "".join([c if ord(c)<128 else "" for c in text]).strip() # 基于 ROI 绘制 ROI 周围的边界框和结果文本 cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2) cv2.putText(orig, text, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 225), 2) return orig if __name__ == '__main__': img = cv2.imread("data/1.jpg") ocrDetector = OCRDetector() orig = ocrDetector.detect_mark(img) # 显示文本检测的结果 cv2.imshow("Text Detection", orig) cv2.waitKey(0)