我有一个简单的node.js 脚本来捕获一些网页的屏幕截图。看来我在使用 async/await 的过程中被绊倒了,但我不知道在哪里。我目前使用的是 puppeteer v1.11.0。
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
//screenshot on first console message
page.once("console", async console_msg => {
await page.pdf({path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"}
});
//close page
await page.close();
//resolve promise
resolve();
});
//go to page
await page.goto(url, {"waitUntil":["load", "networkidle0"]});
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
//close browser after loop has finished (and all promises resolved)
await browser.close();
}
//kick it off
stepThru();
//getting this error message:
//UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!
});
Navigation failed because browser has disconnected
错误通常意味着启动 Puppeteer 的节点脚本在没有等待 Puppeteer 操作完成的情况下结束。因此,正如您所说,这是一些等待的问题。
关于你的脚本,我做了一些更改以使其正常工作:
stepThru
函数的(异步)结束,因此请更改stepThru();
到
await stepThru();
和
puppeteer.launch({devtools:false}).then(function(browser){
到
puppeteer.launch({devtools:false}).then(async function(browser){
(我添加了
async
)
goto
和page.once
承诺的方式PDF 承诺现在是:
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({
path: paper + '.pdf',
printBackground:true,
width:'1024px',
height:'768px',
margin: {
top:"0px",
right:"0px",
bottom:"0px",
left:"0px"
}
});
resolve();
});
})
它只有一个职责,就是创建 PDF。
page.goto
管理了
Promise.all
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
// ... pdf creation as above
})
]);
page.close
移到了 Promise.all
await Promise.all([
// page.goto
// PDF creation
]);
await page.close();
resolve();
现在它可以工作了,这里是完整的工作脚本:
const puppeteer = require('puppeteer');
//a list of sites to screenshot
const papers =
{
nytimes: "https://www.nytimes.com/",
wapo: "https://www.washingtonpost.com/"
};
//launch puppeteer, do everything in .then() handler
puppeteer.launch({devtools:false}).then(async function(browser){
//create a load_page function that returns a promise which resolves when screenshot is taken
async function load_page(paper){
const url = papers[paper];
return new Promise(async function(resolve, reject){
const page = await browser.newPage();
await page.setViewport({width:1024, height: 768});
await Promise.all([
page.goto(url, {"waitUntil":["load", "networkidle2"]}),
new Promise(async function(resolve, reject){
//screenshot on first console message
page.once("console", async () => {
await page.pdf({path: paper + '.pdf', printBackground:true, width:'1024px', height:'768px', margin: {top:"0px", right:"0px", bottom:"0px", left:"0px"} });
resolve();
});
})
]);
await page.close();
resolve();
})
}
//step through the list of papers, calling the above load_page()
async function stepThru(){
for(var p in papers){
if(papers.hasOwnProperty(p)){
//wait to load page and screenshot before loading next page
await load_page(p);
}
}
await browser.close();
}
await stepThru();
});
请注意:
我将
networkidle0
更改为 networkidle2
,因为 nytimes.com 网站需要很长时间才能达到 0 网络请求状态(由于 AD 等)。显然,您可以等待 networkidle0
,但这取决于您,这超出了您的问题范围(在这种情况下增加 page.goto
超时)。
www.washingtonpost.com
网站出现TOO_MANY_REDIRECTS
错误,因此我更改为washingtonpost.com
,但我认为您应该对此进行更多调查。为了测试脚本,我多次使用 nytimes
网站和其他网站。再次强调:这超出了你的问题范围。
如果您需要更多帮助,请告诉我😉
当系统盘已满时,我也遇到同样的错误。
如果您在 AWS lambda 中使用 Puppeteer,则回滚您的运行时配置,此问题将得到解决。