我一直在关注 fflate 自述文件,最终得到了将 PDF 拆分为多个部分并将这些部分保存到 zip 文件中的代码。核心功能在这里:
export function splitPdfAndExport(
pdf: PDFDocument,
splitAfterMarks: Array<number>,
): Promise<Blob> {
return new Promise((res, rej) => {
const streams: Array<any> = []
const zip = new fflate.Zip()
zip.ondata = (err, dat, final) => {
console.log('ZIP', err, dat, final, err instanceof Error)
if (!err) {
// output of the streams
streams.push(dat)
if (final) {
res(new Blob(streams, { type: 'application/octet-stream' }))
}
} else {
return rej(err)
}
}
let previousPage = 0
splitAfterMarks.forEach(async pageI => {
const sub = await PDFDocument.create()
const pages: Array<number> = []
while (previousPage < pageI) {
pages.push(previousPage++)
}
let start = pages[0]
let end = pages[pages.length - 1]
const name =
start === end ? `page.${start}.pdf` : `page.${start}-${end}.pdf`
const copiedPages = await sub.copyPages(pdf, pages)
for (const page of copiedPages) {
sub.addPage(page)
}
const data = await sub.save() // returns ArrayBuffer
const zipFile = new fflate.ZipDeflate(name, {
level: 7,
})
zip.add(zipFile)
zipFile.push(data, true)
})
zip.end()
})
}
但是,它似乎立即记录了
ZIP Error: stream finishing
,而我下载的Blob
基本上是一个空/损坏的zip文件。这是其余的代码(使用 pdf-lib
,它似乎可以很好地分割 PDF,我手动检查了分割结果是否在本地生成有效的 PDF):
const handleExport = async () => {
if (buffer) {
const pdf = await PDFDocument.load(buffer)
const zip = await splitPdfAndExport(pdf, splitAfterMarksNumbers)
downloadBlob(zip, 'split.pdf.zip')
}
}
let a: HTMLAnchorElement
try {
a = document.createElement('a')
a.style.display = 'none'
document.body.appendChild(a)
} catch (e) {}
export function downloadBlob(blob: Blob, fileName: string) {
const url = URL.createObjectURL(blob)
a.href = url
a.download = fileName
a.click()
window.URL.revokeObjectURL(url)
}
知道为什么它对我不起作用吗?我是否需要添加某种处理程序/挂钩来以某种方式处理回调中的流?我在文档中没有看到这一点。并且 src 中的错误代码不会导致任何线索。
这是因为我的
Promise
有一个 forEach(async () => ...)
但父级没有异步,因此流在发送任何文件之前就结束了。现在正在运行:
export async function splitPdfAndExport(
pdf: PDFDocument,
splitAfterMarks: Array<number>,
): Promise<Blob> {
return new Promise(async (res, rej) => {
const streams: Array<any> = []
const zip = new fflate.Zip()
zip.ondata = (err, dat, final) => {
console.log('ZIP', err, dat, final, err instanceof Error)
if (!err) {
// output of the streams
streams.push(dat)
if (final) {
res(new Blob(streams, { type: 'application/octet-stream' }))
}
} else {
// return rej(err)
}
}
// const pdfDoc = await PDFDocument.load(docmentAsBytes)
const numberOfPages = pdf.getPages().length
let previousPage = 0
const marks = splitAfterMarks.concat([numberOfPages])
for (const pageI of marks) {
const sub = await PDFDocument.create()
const pages: Array<number> = []
while (previousPage < pageI) {
pages.push(previousPage++)
}
let start = pages[0]
let end = pages[pages.length - 1]
const name =
start === end ? `page.${start}.pdf` : `page.${start}-${end}.pdf`
// copy the page at current index
const copiedPages = await sub.copyPages(pdf, pages)
for (const page of copiedPages) {
sub.addPage(page)
}
const data = await sub.save()
const zipFile = new fflate.AsyncZipDeflate(name, {
level: 9,
})
// Always add streams to ZIP archives before pushing to those streams
console.log('adding', name)
zip.add(zipFile)
console.log('pushing', name, data)
zipFile.push(data, true)
}
console.log('ending')
zip.end()
console.log('ended')
})
}