我想抓取给定查询的谷歌搜索,但我无法让 css_identifiers 处理此代码:
const axios = require("axios");
const cheerio = require("cheerio");
async function scrape (req, res) {
try {
const query = req.params.query;
const encodedQuery = encodeURIComponent(query);
// Set the number of search results you want (e.g., 10 in this case)
const numResults = 10;
const response = await axios.get(`https://www.google.com.mx/search?q=${encodedQuery}&start=${numResults}`);
const html = response.data;
console.log(html)
const css_identifier_result = ".tF2Cxc";
const css_identifier_title = "h3";
const css_identifier_link = ".yuRUbf a";
const css_identifier_text = ".IsZvec";
const $ = cheerio.load(html);
const results = $(css_identifier_result);
const output = [];
results.each((index, element) => {
console.log("aaaaaaaaaaaaaaaaa")
const item = {
title: $(element).find(css_identifier_title).text(),
link: $(element).find(css_identifier_link).attr('href'),
text: $(element).find(css_identifier_text).text(),
};
output.push(item);
});
res.status(200).json({
message: "Scraping successful",
output: html,
});
} catch (error) {
// Handle any errors that occurred during the request
console.error('Error while scraping website:', error.message);
res.status(500).json({
message: "Error while scraping website. Contact support.",
error: "Internal Server Error",
});
}
}
module.exports = {
scrape,
}
这是我的后端函数,它不打印控制台日志。我不知道如何才能做到这一点,我已经尝试了几种方法。
提供的代码使用 Cheerio 库和 axios。
这些选择器似乎与 axios 中的 HTML 不匹配。尝试:
const axios = require("axios"); // 1.4.0
const cheerio = require("cheerio"); // 1.0.0-rc.12
const url = "<Your URL>";
axios
.get(url)
.then(({data: html}) => {
const $ = cheerio.load(html);
const data = [...$(".egMi0")]
.map(e => ({
href: $(e).find("a").attr("href"),
title: $(e).find("h3").text().trim(),
}));
console.log(data);
})
.catch(err => console.error(err));