使用 google apps 脚本将 docx 文件合并为单个 pdf 时出错

Question

我使用了 Tanaike 提出的 Google Apps 脚本 code 将多个 PDF 文件合并为一个。我的要求是：

获取文件id

检查它是pdf还是docx

如果是docx，转换成pdf然后合并

这是修改后的片段：

async function newMain() {
  // Retrieve PDF data.
 
  var destinationFolder = DriveApp.getFolderById("your-folder-id")
  var urls = sheet.getRange(2,2).getValue().toString().split(",");  //split the urls joined by commas 
  const ids = urls.map((url) => {
  const matches = url.match(/\/file\/d\/([^\/]+)\/edit/); //get file id from urls
  return matches ? [matches[1]] : [];
});

  const data = ids.map(([id]) => {
  const file = DriveApp.getFileById(id);
  const mimeType = file.getMimeType();

  // Check if the file is a DOCX
if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
  
  
   var blob = file.getBlob();
   var tempfile = Drive.Files.insert({}, blob, {convert:true});
   var id = tempfile["id"];
  
   var doc = DocumentApp.openById(id);
   var body = doc.getBody();
   var text = body.getText();
   var pdfFile = destinationFolder.createFile(`temp.pdf`, text, "application/pdf"); 

    // Get the PDF content
   const pdfData = pdfFile.getBlob().getBytes();
   Drive.Files.remove(id);
    // Remove the temporary PDF file
   DriveApp.getFileById(pdfFile.getId()).setTrashed(true);
   return new Uint8Array(pdfData);

  } else if (mimeType === 'application/pdf') {
    // File is already a PDF, fetch its content
    return new Uint8Array(file.getBlob().getBytes());
  } 
});



  // Load pdf-lib
  const cdnjs = "https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js";
  eval(UrlFetchApp.fetch(cdnjs).getContentText().replace(/setTimeout\(.*?,.*?(\d*?)\)/g, "Utilities.sleep($1);return t();"));

  // Merge PDFs.
  const pdfDoc = await PDFLib.PDFDocument.create();
  for (let i = 0; i < data.length; i++) {
    const pdfData = await PDFLib.PDFDocument.load(data[i]);
    for (let j = 0; j < pdfData.getPageCount(); j++) {
      const [page] = await pdfDoc.copyPages(pdfData, [j]);
      pdfDoc.addPage(page);
    }
  }
  const bytes = await pdfDoc.save();

  // Create a PDF file.
  var file = DriveApp.createFile(Utilities.newBlob([...new Int8Array(bytes)], MimeType.PDF, "sample2.pdf"));
  file.moveTo(destinationFolder);
 
 }

当我运行它时，我收到以下错误：

错误：无法解析 PDF 文档（行：368 col：0 offset = 18311）：找不到 PDF 标题 e
e
e.parseHeader
评估
评估
评估
评估
我
e.parseDocument

由于我的初学者技能，我无法解决或改进它，非常感谢这方面的任何指导。

Answer 1

修改要点：

如果您的
```
ids
```
值是DOCX文件的有效文件ID，在您的脚本中，我认为
```
pdfData
```
不是PDF数据。在您的脚本中，检索文本数据并将检索到的文本数据保存为文本文件，不包含 PDF 数据。我认为这可能是您当前问题的原因。

既然如此，下面的修改如何？

修改后的脚本：

async function newMain() {
  // Retrieve PDF data.
  var destinationFolder = DriveApp.getFolderById("your-folder-id")
  var urls = sheet.getRange(2, 2).getValue().toString().split(",");  //split the urls joined by commas 
  const ids = urls.map((url) => {
    const matches = url.match(/\/file\/d\/([^\/]+)\/edit/); //get file id from urls
    return matches ? [matches[1]] : [];
  });


  // --- I modifieid the below script.
  const data = ids.map(([id]) => {
    const file = DriveApp.getFileById(id);
    const mimeType = file.getMimeType();
    if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
      var blob = file.getBlob();
      var tempfile = Drive.Files.insert({}, blob, { convert: true });
      const pdfData = DriveApp.getFileById(tempfile.id).getBlob().getBytes();
      Drive.Files.remove(tempfile.id);
      return new Uint8Array(pdfData);
    } else if (mimeType === 'application/pdf') {
      return new Uint8Array(file.getBlob().getBytes());
    }
  });
  // ---


  // Load pdf-lib
  const cdnjs = "https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js";
  eval(UrlFetchApp.fetch(cdnjs).getContentText().replace(/setTimeout\(.*?,.*?(\d*?)\)/g, "Utilities.sleep($1);return t();"));

  // Merge PDFs.
  const pdfDoc = await PDFLib.PDFDocument.create();
  for (let i = 0; i < data.length; i++) {
    const pdfData = await PDFLib.PDFDocument.load(data[i]);
    for (let j = 0; j < pdfData.getPageCount(); j++) {
      const [page] = await pdfDoc.copyPages(pdfData, [j]);
      pdfDoc.addPage(page);
    }
  }
  const bytes = await pdfDoc.save();

  // Create a PDF file.
  var file = DriveApp.createFile(Utilities.newBlob([...new Int8Array(bytes)], MimeType.PDF, "sample2.pdf"));
  file.moveTo(destinationFolder);
}

在此修改后的脚本中，如果
```
ids
```
的值是DOCX文件的有效文件ID，则DOCX文件被转换为Google文档，并且Google文档被转换为PDF数据。然后，每个PDF数据被合并。

注：

此修改后的脚本假设您的
```
ids
```
值是 DOCX 文件的有效文件 ID。请注意这一点。

使用 google apps 脚本将 docx 文件合并为单个 pdf 时出错

问题描述投票：0回答：1

1个回答

修改要点：

修改后的脚本：

注：

最新问题

使用 google apps 脚本将 docx 文件合并为单个 pdf 时出错

问题描述 投票：0回答：1

1个回答

修改要点：

修改后的脚本：

注：

最新问题

问题描述投票：0回答：1