我是 JavaScript 和 Puppeteer 库的新手。我知道什么是异步编程,并且我一直在 JavaScript 中使用它,但是当我尝试将它与 Puppeteer 一起使用时,我遇到了错误。
这是我的
index.js
文件:
(async () => {
const browser = await puppeteer.launch({
headless: false,
});
// Disable Facebook notifications
const context = await browser.defaultBrowserContext();
await context.overridePermissions('https://www.facebook.com', ['geolocation', 'notifications']);
const page = await browser.newPage();
const logger = await myLog.logger;
console.log('About to use function.');
await utils.webAction(
logger, 'Enter Facebook',
(() => (page.goto(constants.linkHomepage, {waitUntil: ['domcontentloaded', 'networkidle2']})))
);
console.log('Content loaded');
// User Login
await utils.webAction(
logger, 'Wait for loginEmail element',
(() => (page.waitForSelector(constants.cssLoginEmail, {visible: true}))),
);
// Using a fake (incorrect) xpath to trigger the "catch" block
await utils.webAction(
logger, 'Enter user email using incorrect xpath', utils.webType,
page, "Fake user", "fake xpath/css" // Arguments of `webType`
);
})();
还有
utils.js
文件:
module.exports = {
webAction: function (logger, msg, func, ...args)
{
return new Promise((resolve) => {
let actionMessage, actionLevel;
try
{
func(...args);
actionMessage = msg;
actionLevel = 'info';
}
catch (error)
{
actionMessage = "Error while executing " + msg + " function.\n---\n" + error.message + "\n---";
actionLevel = 'error';
}
finally
{
logger.log({
level: actionLevel,
message: actionMessage
})
}
console.log('Inside resolve Promise');
resolve('Fullfilment value of Promise');
console.log('Last line if resolve body');
})
},
// Type into input field
webType: function (page, text, xpath)
{
page.type(
xpath, text,
{delay: getRandomIntInRange(constants.minSpeed, constants.maxSpeed)}
);
},
};
我创建了
webAction
函数,因为我希望代码看起来更干净,并跟踪程序崩溃的位置(我愿意接受其他方法来实现这一点)。
这是我遇到的错误:
About to use function.
info: Enter Facebook {"timestamp":"2024-01-18 16:43:59"}
Inside resolve Promise
Last line if resolve body
Content loaded
info: Wait for loginEmail element {"timestamp":"2024-01-18 16:43:59"}
Inside resolve Promise
Last line if resolve body
info: Enter user email using incorrect xpath {"timestamp":"2024-01-18 16:43:59"}
Inside resolve Promise
Last line if resolve body
/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/common/CallbackRegistry.js:85
this._reject(callback, new Errors_js_1.TargetCloseError('Target closed'));
TargetCloseError: Protocol error (Runtime.callFunctionOn): Target closed
at CallbackRegistry.clear (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/common/CallbackRegistry.js:85:36)
at CdpCDPSession._onClosed (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/CDPSession.js:113:25)
at Connection.onMessage (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/Connection.js:132:25)
at WebSocket.<anonymous> (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/node/NodeWebSocketTransport.js:52:32)
at callListener (/home/user/Projects/Javascript/Facebook/node_modules/ws/lib/event-target.js:290:14)
at WebSocket.onMessage (/home/user/Projects/Javascript/Facebook/node_modules/ws/lib/event-target.js:209:9)
at WebSocket.emit (node:events:514:28)
at Receiver.receiverOnMessage (/home/user/Projects/Javascript/Facebook/node_modules/ws/lib/websocket.js:1192:20)
at Receiver.emit (node:events:514:28)
at Receiver.dataMessage (/home/user/Projects/Javascript/Facebook/node_modules/ws/lib/receiver.js:560:14) {
cause: ProtocolError
at <instance_members_initializer> (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/common/CallbackRegistry.js:96:14)
at new Callback (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/common/CallbackRegistry.js:100:16)
at CallbackRegistry.create (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/common/CallbackRegistry.js:32:26)
at Connection._rawSend (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/Connection.js:91:26)
at CdpCDPSession.send (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/CDPSession.js:78:33)
at next (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-extra-plugin-stealth/evasions/sourceurl/index.js:34:41)
at CdpCDPSession.send (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-extra-plugin-stealth/evasions/sourceurl/index.js:75:16)
at #evaluate (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:211:50)
at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
at async ExecutionContext.evaluateHandle (/home/user/Projects/Javascript/Facebook/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:178:16)
}
Node.js v18.17.0
我希望能够使用
webAction
功能,因此如果您知道我如何修复该错误,我们非常欢迎您。
在输出中它说它正确进入了 Facebook,但事实并非如此,浏览器一打开它就立即关闭! 显然它没有正确完成订单语句(
waitForSelector
&utils.webType
)。有人可以解释一下发生了什么以及如何能够运行此代码而不出现任何错误吗?
在您的 webAction 函数中,您正在调用这些函数,但您并不等待这些承诺的解决。
您需要更改 utils.js 文件以等待 func 调用的结果:
module.exports = {
webAction: async function (logger, msg, func, ...args)
{
return new Promise(async (resolve) => {
let actionMessage, actionLevel;
try
{
await func(...args);
actionMessage = msg;
actionLevel = 'info';
}
catch (error)
{
actionMessage = "Error while executing " + msg + " function.\n---\n" + error.message + "\n---";
actionLevel = 'error';
}
finally
{
logger.log({
level: actionLevel,
message: actionMessage
})
}
console.log('Inside resolve Promise');
resolve('Fullfilment value of Promise');
console.log('Last line if resolve body');
})
},
webType: async function (page, text, xpath)
{
await page.type(
xpath, text,
{delay: getRandomIntInRange(constants.minSpeed, constants.maxSpeed)}
);
},
};
承诺的规则是,如果您关心订单或结果(99% 的情况下您都会这样做),则需要
await
承诺。对于来自函数的 await
承诺,需要将承诺链返回给调用者,以便可以在那里等待。目前,您的 webAction
函数还没有 await func()
。 webType
不等待page.type()
。
实际上,您的脚本会在不等待任何结果的情况下触发数十个并发承诺,因此排序是任意的,并且几乎可以保证出现错误 - 浏览器与所有其他操作一起关闭。
虽然 this 提供了对代码的修复,但设计还可以改进。记录 Puppeteer 操作的一种方法是使用代理并将每个调用转发到
page
:
const puppeteer = require("puppeteer"); // ^21.6.0
const makeLogger = page => {
return msg => {
const handler = {
get(target, propKey, receiver) {
return (...args) => {
return page[propKey](...args)
.then(result => {
// happy path logging here
console.log(`${propKey}: ${msg}`);
return result;
})
.catch(err => {
// sad path logging here
console.error(`${msg} (${err.message})`);
});
};
}
};
return new Proxy({}, handler);
};
};
// sample usage:
let browser;
(async () => {
browser = await puppeteer.launch({headless: "new"});
const [page] = await browser.pages();
const p = makeLogger(page);
await p("example.com").goto("https://www.example.com");
const text = await p("get text content").$eval("h1", el => el.textContent);
console.log(" got text:", text);
await p("test throwing").evaluate("notExists.foobar");
console.log(" done");
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
输出:
goto: example.com
$eval: get text content
got text: Example Domain
test throwing (notExists is not defined)
done
您应该能够在此处集成您的自定义记录器并根据口味进行调整。与
page
一样,使用闭包,这样您就不必在每次调用时重复传递相同的参数。