使用 Tesseract 进行数字 OCR

问题描述 投票:0回答:1

我正在尝试OCR一些数字:

我编写了这段代码来测试不同的 psm 参数(6,7,8,13),我没有看到太大的区别。

import os
import pytesseract
import matplotlib.pyplot as plt

import cv2
import numpy as np

pytesseract.pytesseract.tesseract_cmd = (
    r"path/to/tesseract"
)
def apply_tesseract(image_path, psm):
    image = cv2.imread(image_path)
    text = pytesseract.image_to_string(image, config=f"--psm {psm} digits")
    return image, text

def display_images_with_text(images, texts):
    num_images = len(images)
    num_rows = min(3, num_images)
    num_cols = (num_images + num_rows - 1) // num_rows

    fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 8), subplot_kw={'xticks': [], 'yticks': []})
    
    for i, (image, text) in enumerate(zip(images, texts)):
        ax = axes[i // num_cols, i % num_cols] if num_rows > 1 else axes[i % num_cols]
        ax.imshow(image)
        ax.axis("off")
        ax.set_title(text)

    plt.show()

def main(folder_path):
    for psm in [6]:
        images = []
        texts = []
        for filename in os.listdir(folder_path):
            if filename.lower().endswith((".png")):
                image_path = os.path.join(folder_path, filename)
                image, text = apply_tesseract(image_path, psm)
                images.append(image)
                texts.append(text)
        display_images_with_text(images, texts)

if __name__ == "__main__":
    folder_path = r"./digitImages"
    main(folder_path)

这是

--psm 6

的输出

如您所见,情况不太好。

我该如何改进?数字图像已经是黑白的并且非常小,我尝试了一些处理,但最终得到了相同的黑白图像。

# Read the original image
original_image = cv2.imread(image_path)

new_width = original_image.shape[1] * 2  # Double the width
new_height = original_image.shape[0] * 2  # Double the height
resized_image = cv2.resize(original_image, (new_width, new_height))


# Convert the original image to grayscale
gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

# Sharpen the blurred image
sharpen_kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
sharpen = cv2.filter2D(gray, -1, sharpen_kernel)

# Apply Otsu's thresholding to the blurred image
thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_OTSU)[1]
python ocr tesseract image-preprocessing
1个回答
0
投票

我也为我的项目尝试过这个库并完成了 POC,但这并不准确,有时它会给出一些随机数据,然后你的逻辑就会失败。

如果您通过正在实施的项目获得收入,那么您应该使用图像处理谷歌 API 或其他一些服务来扫描并获得准确的结果。但这是有代价的。

© www.soinside.com 2019 - 2024. All rights reserved.