我正在尝试使用 OpenCV 和 Python-tesseract 应用 OCR 将以下图像转换为文本:.
import cv2
import pytesseract
import argparse
import numpy as np
if __name__ == "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description="Process images for OCR")
parser.add_argument("input_file", help="Input image file path")
args = parser.parse_args()
# Read the input image
image = cv2.imread(args.input_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
orig_image = image
# Normalization
norm_img = np.zeros((image.shape[0], image.shape[1]))
image = cv2.normalize(image, norm_img, 0, 255, cv2.NORM_MINMAX)
# Remove noise
image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)
#image = cv2.GaussianBlur(image, (1, 1), 0)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) [1]
image = cv2.bitwise_not(image)
result = pytesseract.image_to_data(image, config=r'--psm 6 --oem 3 -l eng tessedit_char_blacklist=,;:', output_type=pytesseract.Output.DICT)
text_results = result['text']
bounding_boxes = list(zip(result['left'], result['top'], result['width'], result['height']))
unique_results = list(set(zip(text_results, bounding_boxes)))
char_list = ['-', '}', ',', '—', 'nnn', '#', ':', '=', '——', '*', '!', '°', '——=', ':', ';', '+', '-', '©', ',', ',', ',']
ocr_results = []
ocr_results = [element for element in unique_results if element[0].strip() and element[0] not in char_list]
print(len(ocr_results))
for ocr_result in ocr_results:
x, y, w, h = ocr_result[1]
cv2.rectangle(orig_image, (x, y), (x + w, y + h), (255, 0, 255), 2)
# Draw the text on the image
cv2.putText(orig_image, ocr_result[0], (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.namedWindow("image", cv2.WINDOW_NORMAL)
cv2.imshow("image", orig_image)
cv2.waitKey(0)
附上ocr后的表现如下。我发现结果还有很大的改进空间。
附图是从谷歌获取的示例图像,仅代表问题。我使用的原始图像的分辨率为 300 dpi,质量很好。主要问题是如何优化 pytesseract ocr 来检测嵌入式微控制器板原理图中的文本数据。
你可以尝试一下paddleocr,我测试了PCB图像
from paddleocr import PaddleOCR,draw_ocr
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(use_angle_cls=False, det=False, lang='en', use_gpu=False) # need to run only once to download and loa
img_path = 'PCB.png'
result = ocr.ocr(img_path, cls=False)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./arial.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')