在 Linux Ubuntu 22.04 上使用 OpenCV 版本 4.9.0 的 C++ API,我尝试自动检测第一个扫描图像中描绘的多个商店收据,复制生成的子图像,旋转它们并将它们提供给 OCR 例程进一步加工:
绿色区域不在原始区域中,但用于掩盖收据中可能存在的任何 PII。
我尝试使用 OpenCV 文档和 StackOverflow 上的各种示例中的代码,但我很难让它正常工作,直到我尝试在 GIMP 中执行图像预处理步骤。在这里,经过一些实验,我发现使用 Sobel 算法的 “Filter->Edge-Detect->Edge...” 选项对于生成图像最成功,我可以将其提供给我编写的 C++ 代码(代码如下)。
这是 GIMP 对话框,其中包含我使用的设置(算法=Sobel,金额=10,边框行为=“clamp”):
生成的图像:
这是应用阈值 35 后的二值图像:
最后,OpenCV 代码在收据周围生成这些旋转矩形(太小或太大的矩形将被丢弃):
所以这是我的问题: 如何让 OpenCV 对原始图像执行与 GIMP 相同的操作?
这是我的代码,改编自 OpenCV 示例以及 这个 SO 线程:
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
using namespace cv;
using namespace std;
int main(int argc, char* argv[]) {
int min_area = 10000;
int max_height = 800;
double thr = 220;
Mat img, orig_img;
if (argc < 2) {
cout << "Usage: " << argv[0] << " [FILE PATH] [THRESHOLD (1-255, optional)]" << endl;
return 0;
} else {
orig_img = imread(argv[1]);
if ( orig_img.empty() ) {
cout << "WARNING: the input image was empty!" << endl;
return EXIT_FAILURE;
}
img = orig_img;
}
if (argc > 2) {
long val = strtol(argv[2], nullptr, 10);
if (val > 0 && val < 256) {
thr = val;
}
}
//-------------------------------------------
// Resize if height is larger than 800 pixels:
Mat tmp;
double h = img.size().height;
double w = img.size().width;
double df = 1.0;
cout << "\nOriginal image height:\t" << static_cast<int>(h) << endl;
cout << "Original image width:\t" << static_cast<int>(w) << "\n" << endl;
if (h > max_height) {
df = (double)max_height / h;
resize(img, tmp, Size(), df, df, INTER_NEAREST_EXACT);
img = tmp;
}
Mat img2, img3;
cvtColor(img, img2, COLOR_BGR2GRAY);
blur( img2, img3, Size(3,3) );
threshold(img3, img2, thr, 255, THRESH_BINARY);
Mat element = getStructuringElement(MORPH_CROSS, Size(3, 3), Point(1, 1));
erode(img2, img2, element); // without it find contours fails on some rects
// Show images:
imshow("img", img);
imshow("img2", img2);
waitKey();
// preprocessing done, search rectangles
vector<vector<Point> > contours;
// vector<Vec4i> hierarchy;
findContours(img2, contours, /* hierarchy, */ RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);
vector<RotatedRect> rects;
for (int i = 0; i < contours.size(); i++) {
// if (hierarchy[i][2] > 0) continue;
// capture inner contour
RotatedRect rr = minAreaRect(contours[i]);
if (rr.size.area() < min_area) continue; // too small
rr.size.width += 8;
rr.size.height += 8; // expand to outlier rect if needed
rects.push_back(rr);
cout << "***************\nRectangle dimensions:"
<< "\nRect " << i << ":"
<< "\n\twidth:\t" << rr.size.width
<< "\n\theight:\t" << rr.size.height
<< "\n\tarea:\t" << rr.size.width * rr.size.height
<< "\n***************\n"
<< endl;
}
Mat debugImg;
img.copyTo(debugImg);
for (RotatedRect rr : rects) {
Point2f points[4];
rr.points(points);
for (int i = 0; i < 4; i++) {
int ii = (i + 1) % 4;
line(debugImg, points[i], points[ii], CV_RGB(255, 0, 0), 2);
}
}
imshow("debug", debugImg);
waitKey();
}
感谢您的帮助!
与此同时,我通过在 OpenCV 中使用 Scharr 算法以及调整参数,获得了更好的结果。我的目标是使这个过程尽可能自动化,所以我希望用户输入最少的信息。
这是我迄今为止所掌握的内容(改编自此处找到的 OpenCV Sobel 示例):
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
cv::CommandLineParser parser(argc, argv,
"{@input |lena.jpg|input image}"
"{ksize k|1|ksize (hit 'K' to increase its value at run time)}"
"{scale s|1|scale (hit 'S' to increase its value at run time)}"
"{delta d|0|delta (hit 'D' to increase its value at run time)}"
"{help h|false|show help message}");
cout << "The sample uses Sobel or Scharr OpenCV functions for edge detection\n\n";
parser.printMessage();
cout << "\nPress 'ESC' to exit program.\nPress 'R' to reset values ( ksize will be -1 equal to Scharr function )";
// First we declare the variables we are going to use
Mat image,src, src_gray;
Mat grad;
const String window_name = "Sobel Demo - Simple Edge Detector";
int ksize = parser.get<int>("ksize");
int scale = parser.get<int>("scale");
int delta = parser.get<int>("delta");
int ddepth = CV_16S;
String imageName = parser.get<String>("@input");
// As usual we load our source image (src)
image = imread( samples::findFile( imageName ), IMREAD_COLOR ); // Load an image
// Check if image is loaded fine
if( image.empty() )
{
printf("Error opening image: %s\n", imageName.c_str());
return EXIT_FAILURE;
}
//-------------------------------------------
// Resize if height is larger than 800 pixels:
int max_height = 800;
Mat tmp;
double h = image.size().height;
double w = image.size().width;
double df = 1.0;
cout << "\nOriginal image height:\t" << static_cast<int>(h) << endl;
cout << "Original image width:\t" << static_cast<int>(w) << "\n" << endl;
if (h > max_height) {
df = (double)max_height / h;
resize(image, tmp, Size(), df, df, INTER_NEAREST_EXACT);
image = tmp;
}
double thr = 125; // Start with a threshold value somewhere between 0 and 255
for (;;)
{
// Remove noise by blurring with a Gaussian filter ( kernel size = 3 )
GaussianBlur(image, src, Size(3, 3), 0, 0, BORDER_DEFAULT);
// Convert the image to grayscale
cvtColor(src, src_gray, COLOR_BGR2GRAY);
Mat grad_x, grad_y;
Mat abs_grad_x, abs_grad_y;
Sobel(src_gray, grad_x, ddepth, 1, 0, ksize, scale, delta, BORDER_DEFAULT);
Sobel(src_gray, grad_y, ddepth, 0, 1, ksize, scale, delta, BORDER_DEFAULT);
// converting back to CV_8U
convertScaleAbs(grad_x, abs_grad_x);
convertScaleAbs(grad_y, abs_grad_y);
addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0, grad);
imshow(window_name, grad);
char key = (char)waitKey(0);
int old_ksize, old_scale, old_delta, old_threshold;
if(key == 27)
{
break;
}
if (key == 'k' || key == 'K')
{
old_ksize = ksize;
ksize = ksize < 30 ? ksize+2 : -1;
cout << "Changed 'ksize' from " << old_ksize << " to " << ksize << endl;
}
if (key == 's' || key == 'S')
{
old_scale = scale;
scale++;
cout << "Changed 'scale' from " << old_scale << " to " << scale << endl;
}
if (key == 'd' || key == 'D')
{
old_delta = delta;
delta++;
cout << "Changed 'delta' from " << old_delta << " to " << delta << endl;
}
if (key == 'r' || key == 'R')
{
scale = 1;
ksize = -1;
delta = 0;
cout << "Reset to Scharr algorithm." << endl;
}
if (key == '+') {
old_threshold = (int)thr;
if (thr < 250) {
thr += 5;
cout << "Increasing threshold from " << old_threshold << " to " << (int)thr << endl;
key = 'f';
}
}
if (key == '-') {
old_threshold = (int)thr;
if (thr > 5) {
thr -= 5;
cout << "Decreasing threshold from " << old_threshold << " to " << (int)thr << endl;
key = 'f';
}
}
if (key == 'f' || key == 'F') {
Mat img2, img3;
int min_area = 10000;
// not necessary here:
// cvtColor(img, img2, COLOR_BGR2GRAY);
blur( grad, img2, Size(3,3) );
threshold(img2, img3, thr, 255, THRESH_BINARY);
Mat element = getStructuringElement(MORPH_CROSS, Size(3, 3), Point(1, 1));
erode(img3, img3, element); // without it find contours fails on some rects
// preprocessing done, search rectanges
vector<vector<Point> > contours;
// vector<Vec4i> hierarchy;
findContours(img3, contours, /* hierarchy, */ RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);
vector<RotatedRect> rects;
for (int i = 0; i < contours.size(); i++) {
// if (hierarchy[i][2] > 0) continue;
// capture inner contour
RotatedRect rr = minAreaRect(contours[i]);
if (rr.size.area() < min_area) continue; // too small?
if (rr.size.width > img3.size().width - 5) continue; // rectangle encloses entire image?
rr.size.width += 8;
rr.size.height += 8; // expand to outlier rect if needed
rects.push_back(rr);
cout << "***************\nRectangle dimensions found:"
<< "\nRect " << i << ":"
<< "\n\twidth:\t" << rr.size.width
<< "\n\theight:\t" << rr.size.height
<< "\n\tarea:\t" << rr.size.width * rr.size.height
<< "\n***************\n"
<< endl;
}
Mat debugImg;
image.copyTo(debugImg);
for (RotatedRect rr : rects) {
Point2f points[4];
rr.points(points);
for (int i = 0; i < 4; i++) {
int ii = (i + 1) % 4;
line(debugImg, points[i], points[ii], CV_RGB(255, 0, 0), 2);
}
}
imshow("debug", debugImg);
waitKey();
destroyWindow("debug");
}
}
cout << "\n************************\nParameters:"
<< "\n\tksize = " << ksize
<< "\n\tscale = " << scale
<< "\n\tdelta = " << delta
<< "\n\tthreshold = " << thr
<< "\n************************"
<< endl;
return EXIT_SUCCESS;
}
阈值从 125 开始,可以根据需要增加或减少。当按
"+"
或 "-"
时,阈值可以增加或减少 5,这也会自动将任何矩形应用于原始图像并在新窗口中显示。更改其中一个参数(如原始 Sobel 示例代码中所示)将按预期工作,但必须按下 "f"
或 "F"
键才能显示矩形叠加层。
当我使用 Scharr(将 -1 作为
ksize
传递)与 scale=5
和 delta=1
并将阈值降低到 120 时,似乎会产生最好的结果。但我仍然想知道如何自动化一切......最小用户输入将提供有关扫描的收据数量的输入,并告诉他们确保它们不重叠。但如果可能的话,所有参数等都应该从图像中自动确定。
谢谢。