以下代码给了我一个近乎黑色的图像
import fitz
import pandas as pd
import cv2 #opencv for preprocessing of image
import numpy as np
filename = 'sample_pdf\muscle model\Invoice of muscle model from HealthLink.pdf'
doc = fitz.Document(filename) #same as fitz.open
page1 = doc[0]
pix = page1.get_pixmap(colorspace = 'RGB', annots = False)
bytes=np.frombuffer(pix.samples,dtype=np.int8)
img_rgb = bytes.reshape(pix.height,pix.width,pix.n)
img_bgr = img_rgb[:,:,::-1]#convert from rgb to bgr for cv2.imwrite
cv2.imwrite('test.png',img_bgr)
我知道可能还有其他功能可以将 PDF 中的页面转换为图像。但我仍然想知道为什么上面的代码失败了。
尝试:
import fitz
import cv2
import numpy as np
filename = 'sample_pdf/muscle model/Invoice of muscle model from HealthLink.pdf'
doc = fitz.open(filename)
page = doc[0]
pix = page.get_pixmap(colorspace='rgb')
# Convert pixmap to numpy array
img_bgr = np.asarray(bytearray(pix.samples), dtype=np.uint8).reshape(pix.height, pix.width, 3)
# Since OpenCV uses BGR instead of RGB, we need to convert
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
# Save the image
cv2.imwrite('test.png', img_rgb)