在 PCB 原理图图像上使用 pytesseract 可以改善 OCR 结果吗?

问题描述 投票:0回答:1

我正在尝试使用 OpenCV 和 Python-tesseract 应用 OCR 将以下图像转换为文本:image before ocr.

import cv2
import pytesseract
import argparse
import numpy as np

if __name__ == "__main__":
    # Argument parsing
    parser = argparse.ArgumentParser(description="Process images for OCR")
    parser.add_argument("input_file", help="Input image file path")
    args = parser.parse_args()
    
    # Read the input image
    image = cv2.imread(args.input_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    orig_image = image

    # Normalization
    norm_img = np.zeros((image.shape[0], image.shape[1]))
    image = cv2.normalize(image, norm_img, 0, 255, cv2.NORM_MINMAX)

    # Remove noise
    image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)

    #image = cv2.GaussianBlur(image, (1, 1), 0)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) [1]

    image = cv2.bitwise_not(image)

    result = pytesseract.image_to_data(image, config=r'--psm 6 --oem 3 -l eng tessedit_char_blacklist=,;:', output_type=pytesseract.Output.DICT)

    text_results = result['text']
    bounding_boxes = list(zip(result['left'], result['top'], result['width'], result['height']))
               
    unique_results = list(set(zip(text_results, bounding_boxes)))
    char_list = ['-', '}', ',', '—', 'nnn', '#', ':', '=', '——', '*', '!', '°', '——=', ':', ';', '+', '-', '©', ',', ',', ',']
    ocr_results = []
    ocr_results = [element for element in unique_results if element[0].strip() and element[0] not in char_list]

    print(len(ocr_results))

    for ocr_result in ocr_results:
        x, y, w, h = ocr_result[1]
        cv2.rectangle(orig_image, (x, y), (x + w, y + h), (255, 0, 255), 2)
        # Draw the text on the image
        cv2.putText(orig_image, ocr_result[0], (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    cv2.namedWindow("image", cv2.WINDOW_NORMAL)
    cv2.imshow("image", orig_image)
    cv2.waitKey(0)

附上ocr后的表现如下。我发现结果还有很大的改进空间。

附图是从谷歌获取的示例图像,仅代表问题。我使用的原始图像的分辨率为 300 dpi,质量很好。主要问题是如何优化 pytesseract ocr 来检测嵌入式微控制器板原理图中的文本数据。 image after ocr

python opencv optimization ocr python-tesseract
1个回答
0
投票

你可以尝试一下paddleocr,我测试了PCB图像

from paddleocr import PaddleOCR,draw_ocr
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(use_angle_cls=False, det=False, lang='en', use_gpu=False) # need to run only once to download and loa
img_path = 'PCB.png'
result = ocr.ocr(img_path, cls=False)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)


# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./arial.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')

© www.soinside.com 2019 - 2024. All rights reserved.