如何从图片数量中只找到加粗的文字行?

问题描述 投票:0回答:1
from PIL import Image
import pytesseract
from pdf2image import convert_from_path
import os
import pandas as pd
import cv2
import numpy as np

files = os.chdir("C:/Users/abhishek_kumar1/Desktop/New folder")
#print(os.getcwd())
pages = convert_from_path("d.pdf",190,single_file=True,
                      poppler_path='C:/Users/abhishek_kumar1/Downloads/poppler-0.68.0_x86/poppler-0.68.0/bin')
image_counter=1
for page in pages:
    filename = "page_"+str(image_counter)+".jpg"
    page.save(filename,'JPEG')

img = cv2.imread(filename)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imwrite('grey.png',gray)
binary,thresh1 = cv2.threshold(gray, 0, 255,cv2.THRESH_OTSU|cv2.THRESH_BINARY_INV)
cv2.imwrite('Thresh1.png',thresh1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 3))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 6)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
im2 = img.copy()


ROI_number = 0
for cnt in contours[::-1]:
    [x,y,w,h] = cv2.boundingRect(cnt)
    ROI=im2[y:y+h, x:x+w]
    #print(str(w),str(h))
    #cv2.putText(im2, str(h), (x,y - 10 ), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (255, 0, 0), 1)
    #cv2.putText(im2, str(w), (x,y + 10 ), cv2.FONT_HERSHEY_SIMPLEX, 0.1, (0, 0, 255), 1)
    cv2.imwrite('ROI_{}.jpg'.format(ROI_number),ROI)
    cv2.rectangle(im2,(x,y),(x+w,y+h),(36,255,12),1)
    ROI_number += 1

cv2.imwrite('contours1.png',im2)

如何从上面的代码中只找到这张图片,有没有什么选项可以从图片中了解字体类型,比如粗体,斜体,其他的,从所有的图片中只找到粗体行的部分会很麻烦,请任何机构有这方面的建议,请帮助我。

python image-processing fonts python-imaging-library opencv-contour
1个回答
0
投票

查看python代码和结果。

import cv2
import numpy as np
img = cv2.imread('C.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 160, 255, cv2.THRESH_BINARY)[1]
kernel = np.ones((5,5),np.uint8)
kernel2 = np.ones((3,3),np.uint8)
marker = cv2.dilate(thresh,kernel,iterations = 1)
mask=cv2.erode(thresh,kernel,iterations = 1)

while True:
    tmp=marker.copy()
    marker=cv2.erode(marker, kernel2)
    marker=cv2.max(mask, marker)
    difference = cv2.subtract(tmp, marker)
    if cv2.countNonZero(difference) == 0:
        break

marker_color = cv2.cvtColor(marker, cv2.COLOR_GRAY2BGR)
out=cv2.bitwise_or(img, marker_color)
cv2.imwrite('out.png', out)
cv2.imshow('result', out )

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.