我在我的 React TypeScript 应用程序中使用
pdf-lib
。如何获取 PDFDocument
中的所有文本字段以及该表单字段所在的页码。
const getAcroFieldRefs = (pdfDoc) => {
if (!pdfDoc.catalog.getMaybe('AcroForm')) return [];
const acroForm = pdfDoc.index.lookup(pdfDoc.catalog.get('AcroForm'));
if (!acroForm.getMaybe('Fields')) return [];
const acroFields = pdfDoc.index.lookup(acroForm.get('Fields'));
return acroFields.array;
};
const pdfDoc = PDFDocumentFactory.load(fs.readFileSync('./form.pdf'));
const fieldRefs = getAcroFieldRefs(pdfDoc);
console.log('REFS:', fieldRefs.map(String));
const pages = pdfDoc.getPages();
const page1AnnotsRaw = pdfDoc.index.lookupMaybe(pages[0].getMaybe('Annots'));
const page1Annots = page1AnnotsRaw ? page1AnnotsRaw.array : [];
const page2AnnotsRaw = pdfDoc.index.lookupMaybe(pages[1].getMaybe('Annots'));
const page2Annots = page2AnnotsRaw ? page2AnnotsRaw.array : [];
console.log('PAGE_1_ANNOTS:', page1Annots.map(String));
console.log('PAGE_2_ANNOTS:', page2Annots.map(String));
fieldRefs.forEach((fieldRef) => {
if (page1Annots.includes(fieldRef)) {
console.log(`${fieldRef.toString()} belongs to page 1`);
}
if (page2Annots.includes(fieldRef)) {
console.log(`${fieldRef.toString()} belongs to page 2`);
}
});
function getPageIndexFromField(pdfDoc: PDFDocument, field: PDFField): number {
let fieldPageIndex = -1;
pdfDoc.getPages().forEach((aPage, index) => {
const pageAnnotsRaw = aPage.node.lookupMaybe(
PDFName.of("Annots"),
PDFArray
);
const pageAnnots = pageAnnotsRaw ? pageAnnotsRaw.asArray() : [];
if (pageAnnots.includes(field.ref)) {
fieldPageIndex = index + 1;
}
});
return fieldPageIndex;
}