我一直在写一个刮除Trailblazer网站的代码。该网站是一个[[dynamic网站,因此,当我单击Show More按钮时,它将加载用户拥有的其他徽章。因此,有没有办法让我单击该按钮,然后等待添加新内容,然后再将标记加载到cheerio中进行抓取。
当前,我只是让浏览器等待3秒钟,然后重新单击,但这会使整个过程花费很多时间,而且如果浏览器在这段时间内无法获取,也可能会失败。那么还有其他选择吗?请在下面找到我的代码->
const url = "https://trailblazer.me/id/akganesa";
// function to load the page
async function getPage(url) {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto(url, {waitUntil: 'networkidle0'});
// initialize i
// the problem area ----
var i = 1;
// if i then loop
while(i){
const [button] = await page.$x("//button[contains(., 'Show More')]");
if (button) {
await button.click();
await page.waitFor(3000);
}
// if show more is not there then set i to 0
else{
i = 0;
}
}
const html = await page.content(); // serialized HTML of page DOM.
await browser.close();
return html;
}
const go = async () => {
const browser = await puppeteer.launch({
headless: false,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--window-size=1600,1200"
],
defaultViewport: null
});
const context = await browser.createIncognitoBrowserContext();
const page = await context.newPage();
try {
await page.goto("https://trailblazer.me/id/akganesa", {
waitUntil: "networkidle2"
});
const [first_button] = await page.$x("//button[contains(., 'Show More')]");
await first_button.click();
while (
(await (await page.$x("//button[contains(., 'Show More')]")).length) > 0
) {
const [button] = await page.$x("//button[contains(., 'Show More')]");
await button.click();
await page.waitForResponse(response => response.status() === 200);
}
browser.close();
return;
} catch (err) {
console.log(err);
browser.close();
return;
}
};
go();