使用 python 查找、替换和调整 PDF 中的图像

问题描述 投票:0回答:1

我有Python代码,它正在查找和替换pdf中的图像,但是我很难调整新图像的大小,所以代码正在做的是查找旧图像,并且它使用的大小和位置它来应用新图像。我希望能够将新图像的大小修改为我想要的任何高度和宽度,但仍将其放置在旧图像所在的位置。目标是同时替换多个 pdf 上的徽标。感谢您的建议。

from pikepdf import Pdf, PdfImage, Name
from PIL import Image
import zlib
import os

# Path to the folder containing the input PDF files
input_folder = r'C:\input folder'
# Path to the folder where the modified PDF files will be saved
output_folder = r'C:\output folder'
# Path to the replacement image
replacement_image_path = r'C:\new image to replace'

def replace_images_in_pdf(input_pdf_path, output_pdf_path, image_path):
    pdf = Pdf.open(input_pdf_path, allow_overwriting_input=True)
    replacement_image = Image.open(image_path)
    image_replaced = False  # Track if an image has been replaced

    for page in pdf.pages:
        if image_replaced:  # If an image has already been replaced, stop processing further pages
            break
        for image_key in list(page.images.keys()):
            raw_image = page.images[image_key]
            pdf_image = PdfImage(raw_image)
            raw_image = pdf_image.obj
            pillow_image = pdf_image.as_pil_image()
            
            # Resize the replacement image to match the original image's dimensions
            replacement_image_resized = replacement_image.resize((pillow_image.width, pillow_image.height))
            
            # Replace the original image
            raw_image.write(zlib.compress(replacement_image_resized.tobytes()), filter=Name("/FlateDecode"))
            raw_image.ColorSpace = Name("/DeviceRGB")
            raw_image.Width, raw_image.Height = pillow_image.width, pillow_image.height
            
            image_replaced = True  # Mark that an image has been replaced
            break  # Exit the loop after replacing the first image
        
    pdf.save(output_pdf_path)
    pdf.close()

def process_folder(input_folder, output_folder, replacement_image_path):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for input_file in os.listdir(input_folder):
        if input_file.lower().endswith('.pdf'):
            input_pdf_path = os.path.join(input_folder, input_file)
            output_pdf_path = os.path.join(output_folder, input_file)
            replace_images_in_pdf(input_pdf_path, output_pdf_path, replacement_image_path)

# Run the process
process_folder(input_folder, output_folder, replacement_image_path)
python image pdf replace find
1个回答
0
投票

采用 KJ 的想法,使用密文注释通过 PyMuPDF 调整图像显示区域的大小:

# Assumption: we already know which image we want to deal with.
# For example:

import fitz # PyMuPDF
doc=fitz.open("input.pdf")
page=doc[0]

page.get_images()
[(3, 0, 1280, 720, 8, 'DeviceRGB', '', 'Img3', 'DCTDecode')]
# we have an image at xref 3.

# remove it and re-insert it in a different rectangle, 90° rotated.
img_bbox = page.get_image_rects(3)[0]  # old display location

img = doc.extract_image(3)  # extract image
page.add_redact_annot(img_bbox)  # mark image area for removal
'Redact' annotation on page 0 of landscape.pdf

# remove image
page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_REMOVE)

# re-insert in new rectangle 90° rotated anti-clockwise
page.insert_image((100,100,400,500), stream=img["image"], rotate=90)

# save at max compression to new file
doc.ez_save("output.pdf")
© www.soinside.com 2019 - 2024. All rights reserved.