为 OCR 选择一个区域会返回扭曲的图像，而 Tesseract 无法解码它

Question

朗蟒蛇导入系统导入 pytesseract 从 PyQt5.QtGui 导入 QPainter、QPen、QImage、QPixmap、QCursor 从 PyQt5.QtCore 导入 Qt、QPoint、QRect、QSize 从 PyQt5.QtWidgets 导入 QApplication、QMainWindow、QLabel、QAction、QMenu、QSystemTrayIcon、QStyle、QRubberBand 从 PIL 导入 ImageGrab、Image、ImageFilter、ImageOps

类主窗口（QMainWindow）：定义init（自我）：超级（）。init（）

    # Create the UI elements
    self.label = QLabel(self)
    self.setCentralWidget(self.label)

    # Add the menu items
    self.menu = QMenu(self)
    self.ocr_action = QAction("Perform OCR", self)
    self.ocr_action.triggered.connect(self.perform_ocr)
    self.menu.addAction(self.ocr_action)

    # Set the window to be transparent
    self.setWindowOpacity(0.5)

    # Create the system tray icon
    self.tray_icon = QSystemTrayIcon(self)
    self.tray_icon.setIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
    self.tray_icon.setContextMenu(self.menu)
    self.tray_icon.show()

    # Create a rubber band for selecting the area
    self.rubber_band = QRubberBand(QRubberBand.Rectangle, self.label)
    self.rubber_band.setMouseTracking(True)
    self.rubber_band.hide()

    # Reset the window position and size to full screen
    self.reset_position()

def reset_position(self):
    screen_size = QApplication.desktop().screenGeometry()
    self.setGeometry(screen_size)
    self.move(0, 0)

def mousePressEvent(self, event):
    if event.button() == Qt.LeftButton:
        self.start_pos = event.pos()
        self.rubber_band.setGeometry(QRect(self.start_pos, QSize()))
        self.rubber_band.show()

def mouseMoveEvent(self, event):
    if  self.rubber_band.isVisible():
        start_pos = self.rubber_band.pos()
        end_pos = event.pos()
        self.rubber_band.setGeometry(QRect(start_pos, end_pos).normalized())
        event.accept()



def mouseReleaseEvent(self, event):
    if event.button() == Qt.LeftButton:
        # Hide the previous rubber band
        if self.rubber_band.isVisible():
            rubber_band_rect = self.rubber_band.geometry()
            self.rubber_band.hide()

        # Get the selected rectangle
        x1 = min(self.start_pos.x(), event.pos().x())
        y1 = min(self.start_pos.y(), event.pos().y())
        x2 = max(self.start_pos.x(), event.pos().x())
        y2 = max(self.start_pos.y(), event.pos().y())

        # Check if rubber_band intersects with the selected area
        selected_rect = QRect(QPoint(x1, y1), QPoint(x2, y2))
        if rubber_band_rect.intersects(selected_rect):
            rubber_band_rect = rubber_band_rect.intersected(selected_rect)

        # Grab the selected area as a pixmap
        screen = QApplication.primaryScreen()
        pixmap = screen.grabWindow(
        QApplication.desktop().winId(),
        rubber_band_rect.x(), rubber_band_rect.y(), rubber_band_rect.width(), rubber_band_rect.height()
        )

        # Set the pixmap in the label
        self.label.setPixmap(pixmap)

        # Show the OCR menu item
        self.ocr_action.setVisible(True)

        # Show the rubber band
        self.rubber_band.setGeometry(QRect(self.start_pos, QSize()))
        self.rubber_band.show()

        # Perform OCR on the selected area
        self.perform_ocr()



def perform_ocr(self):
    # Get the selected area as an image
    pixmap = self.label.pixmap()
    if pixmap is None:
        return

    # Convert the pixmap to a QImage
    qimage = pixmap.toImage()

    # Make the image transparent
    alpha_channel = QImage(qimage.size(), QImage.Format_ARGB32_Premultiplied)
    alpha_channel.fill(Qt.transparent)
    painter = QPainter(alpha_channel)
    painter.drawPixmap(0, 0, pixmap)
    painter.end()

    # Set the alpha channel on the image
    qimage.setAlphaChannel(alpha_channel)

    # Convert the QImage to a PIL image
    buffer = qimage.constBits()
    buffer.setsize(qimage.byteCount())
    pil_image = Image.frombuffer(
    'RGBA', (qimage.width(), qimage.height()), buffer, 'raw', 'RGBA', 0, 1)

    # Save the selected area as a PNG file
    pil_image = 'selected_area.png'
    qimage.save(pil_image)

    # Perform OCR on the selected area of the image
    text = pytesseract.image_to_string(pil_image, lang='eng', config='--psm 6')

    # Copy the recognized text to the clipboard
    clipboard = QApplication.clipboard()
    clipboard.setText(text)

    # Hide the window and reset the label
    self.hide()
    self.label.setPixmap(QPixmap())

    # Hide the OCR menu item
    self.ocr_action.setVisible(False)

    # Hide the rubber band and reset start_pos
    self.rubber_band.hide()
    self.start_pos = None



def hideEvent(self, event):
    super().hideEvent(event)
    self.reset_position()

if name == 'main': app = QApplication(sys.argv)

window = MainWindow()
window.setGeometry

编辑：更新代码

为 OCR 选择一个区域会返回扭曲的图像，而 Tesseract 无法解码它

问题描述投票：0回答：0

最新问题

为 OCR 选择一个区域会返回扭曲的图像，而 Tesseract 无法解码它

问题描述 投票：0回答：0

最新问题

问题描述投票：0回答：0