我正在尝试使用 Puppeteer 抓取网站 (bestsecret.at) 的一些初创公司数据,当我尝试导航到下一页时,Cloudflare 等待屏幕出现并中断抓取。有没有办法用 Puppeteer 绕过它?
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const randomUseragent = require('random-useragent');
const userAgent = randomUseragent.getRandom();
puppeteer.use(StealthPlugin());
(async () => {
var argarr = ['--no-sandbox', '--disable-setuid-sandbox'];
const browser = await puppeteer.launch({args: argarr});
const page = await browser.newPage();
await page.setUserAgent(userAgent);
await page.goto('https://www.bestsecret.at', {waitUntil: 'networkidle2'});
const bodyWidth = await page.evaluate(() => document.body.scrollWidth);
const bodyHeight = await page.evaluate(() => document.body.scrollHeight);
await page.setViewport({ width: bodyWidth, height: bodyHeight });
await page.type('#login-username', 'USARENAME');
await page.type('#j_password', 'PASSWORD');
await page.waitForTimeout(1000);
await Promise.all([
page.click('#login-button'),
page.waitForNavigation({waitUntil: 'networkidle2'}),
]);
let bodyHTML = await page.content();
console.log(bodyHTML);
await browser.close();
})();
您可以尝试的事情是: