我使用下面的算法将句子分割成单词,将单词分割成字符。 正如您在下面的输出中看到的,“STAND”一词中的字母“S”和“T”连接在一起,我不明白我做错了什么,如果你们能帮助我,我会很高兴。
2.我已经在 EMNIST 字母数据集上训练了一个模型。我的模型一次只能预测一个字母。为了进一步进行,我需要将每个字符框提取到字符图像数组中。最终,我的目标是拥有一个包含所有角色图像的数组。之后,我计划使用我的模型单独预测每个角色。
此外,我需要将每个字符的大小调整为 28x28 像素,因为模型经过训练可以从该大小的图像中预测字母。 我在做这件事时遇到了麻烦..希望你能帮助我
import cv2
# Preprocessing
def preProcessing(myImage):
grayImg = cv2.cvtColor(myImage, cv2.COLOR_BGR2GRAY)
# cv2.imshow('Gray Image', grayImg)
# cv2.waitKey()
ret, thresh1 = cv2.threshold(grayImg, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
# cv2.imshow('After threshold', thresh1)
# cv2.waitKey()
print(f'The threshold valua applied to the image is: {ret} ')
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, horizontal_kernel, iterations=1)
horizontal_contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
im2 = myImage.copy()
for cnt in horizontal_contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (255, 255, 255), 0)
im2= seg_word(rect)
#im2 = seg_word(rect)
#im2=character_seg(im2)
return im2
# Word segmentation
def seg_word(wordImage):
# convert the input image into gray scale
grayImg = cv2.cvtColor(wordImage, cv2.COLOR_BGR2GRAY)
# Binarize the gray image with OTSU algorithm
ret, thresh2 = cv2.threshold(grayImg, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
#print(ret)
# create a Structuring Element size of 8*10 for the vertical contouring
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 10))
# apply Dilation for once only
dilation = cv2.dilate(thresh2, vertical_kernel, iterations=1)
#fingd the vertical contours
vertical_contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
word_img = wordImage.copy()
# Run through each contour and extract the bounding box
for cnt in vertical_contours:
#computes the minimum rectangle
x, y, w, h = cv2.boundingRect(cnt)
# Draw a rectangular from the top left to the bottom right with the
# given Coordinates x,y and height and width
rect = cv2.rectangle(word_img, (x, y), (x + w, y + h), (0, 255, 0), 0)
# apply a Character Segmentation and return the output Image
word_img= character_seg(rect)
return word_img
# Character segmentation
def character_seg(img):
#conver the input image int gray scale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Threshold the image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# Apply morphological erosion to remove small artifacts
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,5))
eroded = cv2.erode(thresh, kernel, iterations=1)
# Apply morphological dilation to expand the characters
dilated = cv2.dilate(eroded, kernel, iterations=3)
# Find contours in the image
contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Iterate through each contour and extract the bounding box
for contour in contours:
(x, y, w, h) = cv2.boundingRect(contour)
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0 ), 2)
return img
# Load the test image
image_path = r"C:\Users\student\Desktop\FinalProject\Flask\uploads\1_lWmB8FGf1uWT6r1TichK-Q-
ezgif.com-webp-to-png-converter.png"
myImage = cv2.imread(image_path)
# Display the image
cv2.imshow('Text Image', myImage)
cv2.waitKey(0)
processed_img = preProcessing(myImage)
cv2.imshow('Text Image', processed_img)
cv2.waitKey(0)
我只写预处理可以改进的部分。同样,文本的颜色很容易选择,并使用
cv2.inRange
: 设置阈值
im = cv2.imread("text.png") # read image
lower = (60, 60, 0) # define lower limit
upper = (100, 100, 40) # and upper limit
mask = cv2.inRange(im, lower, upper) # use cv2.inRange
maskClosed = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((3,3), dtype = np.uint8)) # use morph close to fill the holes in the mask
plt.imshow(maskClosed) # show mask
结果:
您可以使用它代替当前的预处理部分。这些字母应该很容易彼此分开。