JavaScript 无法完成大数据集的全量查询

问题描述 投票:0回答:1

我正在尝试运行一个脚本来解析我的发件箱,并在我的外发电子邮件的“收件人”和“抄送”行中抓取唯一的电子邮件地址。我想我可能遇到了内存限制。但我没有收到任何错误。当我对搜索参数中包含数百封电子邮件的邮箱运行此脚本时,它运行良好...但是,如果我对每天发送 80 多封邮件的邮箱运行此脚本,它永远不会完成运行,并且状态已完成,数据被截断。上次运行仅提取了 20 天的数据,而不是 30 天的数据(Google 表中大约有 200 行)

目标是输入一系列日期(理想情况下是 90 天/过去三个月),然后撤回我们发送到的所有唯一身份。将它们返回到包含“名字”、“姓氏”、“电子邮件”、“日期”列的电子表格中。

我希望了解如何优化此功能或允许其运行更大的邮箱。

function exportSentEmails() {
  var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
  sheet.clear(); // Clear any existing content in the sheet
  
  // Set up the headers in the first row
  sheet.appendRow(["First Name", "Last Name", "Email Address", "Date"]);

  // Define the date range (replace these with your desired dates)
  var startDate = new Date('2024-01-01'); // Change to your desired start date
  var endDate = new Date('2024-04-01');   // Change to your desired end date

  // Format dates to use in the Gmail search query
  var startFormatted = formatDateForQuery(startDate);
  var endFormatted = formatDateForQuery(endDate);

  // Get the sent emails from Gmail within the specified date range
  var threads = GmailApp.search('in:sent after:' + startFormatted + ' before:' + endFormatted);
  
  // Loop through each thread and extract email addresses and names
  var emailSet = new Set(); // To track unique email addresses
  threads.forEach(thread => {
    var messages = thread.getMessages();
    messages.forEach(message => {
      var recipients = message.getTo().split(',');
      var ccRecipients = message.getCc().split(','); // Extract CC recipients
      recipients = recipients.concat(ccRecipients); // Combine To and CC recipients
      recipients.forEach(recipient => {
        try {
          var email = recipient.match(/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i);
          if (email) {
            email = email[0].trim().toLowerCase();
            if (!emailSet.has(email)) {
              emailSet.add(email);
              var nameParts = getNameParts(recipient, email);
              var date = formatDate(message.getDate());
              sheet.appendRow([nameParts.firstName, nameParts.lastName, email, date]);
            }
          }
        } catch (e) {
          Logger.log('Error processing recipient: ' + recipient + ' - ' + e.message);
        }
      });
    });
  });

  Logger.log('Export completed.');
}

// Helper function to format date for Gmail query
function formatDateForQuery(date) {
  return Utilities.formatDate(date, Session.getScriptTimeZone(), 'yyyy/MM/dd');
}

// Helper function to format date for spreadsheet
function formatDate(date) {
  return Utilities.formatDate(date, Session.getScriptTimeZone(), 'MM/dd/yyyy');
}

// Helper function to extract name parts from a recipient string
function getNameParts(recipient, email) {
  var displayName = recipient.replace(email, '').trim().replace(/["<>]/g, '');
  var nameParts = { firstName: "", lastName: "" };

  if (displayName) {
    var nameArray = displayName.split(' ');
    nameParts.firstName = nameArray[0];
    nameParts.lastName = nameArray.slice(1).join(' ');
  } else {
    var nameFromEmail = email.split('@')[0].replace(/[\.\_\-]/g, ' ');
    var nameArray = nameFromEmail.split(' ');
    nameParts.firstName = nameArray[0];
    nameParts.lastName = nameArray.slice(1).join(' ');
  }

  // Capitalize the first letter of each name part
  nameParts.firstName = toTitleCase(nameParts.firstName);
  nameParts.lastName = toTitleCase(nameParts.lastName);

  return nameParts;
}

// Helper function to convert a string to title case
function toTitleCase(str) {
  return str.split(' ').map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()).join(' ');
}
javascript google-sheets google-apps-script
1个回答
0
投票

为了针对大数据集优化代码,请考虑批量处理(通过对内容进行分页和设置页面大小)。另外,不要将行附加到嵌套循环内的电子表格,而是将数据收集到数组中并将其附加到一个操作中。这显着减少了电子表格的迭代次数,这可能会很慢。

function exportSentEmails() {
  var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
  sheet.clear(); 
  
  sheet.appendRow(["First Name", "Last Name", "Email Address", "Date"]);


  var startDate = new Date('2024-01-01'); // Change this to your desired start date accordingly
  var endDate = new Date('2024-04-01');   // Change this to your desired start date accordingly


  var startFormatted = formatDateForQuery(startDate);
  var endFormatted = formatDateForQuery(endDate);


  var query = 'in:sent after:' + startFormatted + ' before:' + endFormatted;
  var threads, pageToken, emailSet = new Set(), rows = [];

  do {
    threads = GmailApp.search(query, 0, 100, {pageToken: pageToken});
    threads.forEach(thread => {
      var messages = thread.getMessages();
      messages.forEach(message => {
        var recipients = message.getTo().split(',');
        var ccRecipients = message.getCc().split(','); // Extract CC recipients
        recipients = recipients.concat(ccRecipients); // Combine To and CC recipients
        recipients.forEach(recipient => {
          try {
            var email = recipient.match(/\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i);
            if (email) {
              email = email[0].trim().toLowerCase();
              if (!emailSet.has(email)) {
                emailSet.add(email);
                var nameParts = getNameParts(recipient, email);
                var date = formatDate(message.getDate());
                rows.push([nameParts.firstName, nameParts.lastName, email, date]);
              }
            }
          } catch (e) {
            Logger.log('Error processing recipient: ' + recipient + ' - ' + e.message);
          }
        });
      });
    });
    pageToken = GmailApp.getThreadPageToken(query, 0, 100);
  } while (pageToken);


  if (rows.length > 0) {
    sheet.getRange(sheet.getLastRow() + 1, 1, rows.length, rows[0].length).setValues(rows);
  }

  Logger.log('Export completed.');
}


function formatDateForQuery(date) {
  return Utilities.formatDate(date, Session.getScriptTimeZone(), 'yyyy/MM/dd');
}


function formatDate(date) {
  return Utilities.formatDate(date, Session.getScriptTimeZone(), 'MM/dd/yyyy');
}


function getNameParts(recipient, email) {
  var displayName = recipient.replace(email, '').trim().replace(/["<>]/g, '');
  var nameParts = { firstName: "", lastName: "" };

  if (displayName) {
    var nameArray = displayName.split(' ');
    nameParts.firstName = nameArray[0];
    nameParts.lastName = nameArray.slice(1).join(' ');
  } else {
    var nameFromEmail = email.split('@')[0].replace(/[\.\_\-]/g, ' ');
    var nameArray = nameFromEmail.split(' ');
    nameParts.firstName = nameArray[0];
    nameParts.lastName = nameArray.slice(1).join(' ');
  }


  nameParts.firstName = toTitleCase(nameParts.firstName);
  nameParts.lastName = toTitleCase(nameParts.lastName);

  return nameParts;
}

function toTitleCase(str) {
  return str.split(' ').map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()).join(' ');
}

© www.soinside.com 2019 - 2024. All rights reserved.