使用axios和cheerio(Node js)抓取谷歌搜索

问题描述 投票:0回答:1

我想抓取给定查询的谷歌搜索,但我无法让 css_identifiers 处理此代码:

const axios = require("axios");
const cheerio = require("cheerio");

async function scrape (req, res) {
    try {
        const query = req.params.query;
        const encodedQuery = encodeURIComponent(query);
        // Set the number of search results you want (e.g., 10 in this case)
        const numResults = 10;

        const response = await axios.get(`https://www.google.com.mx/search?q=${encodedQuery}&start=${numResults}`);
        const html = response.data;

        console.log(html)

        const css_identifier_result = ".tF2Cxc";
        const css_identifier_title = "h3";
        const css_identifier_link = ".yuRUbf a";
        const css_identifier_text = ".IsZvec";

        const $ = cheerio.load(html);
        const results = $(css_identifier_result);

        const output = [];

        results.each((index, element) => {
            console.log("aaaaaaaaaaaaaaaaa")
            const item = {
            title: $(element).find(css_identifier_title).text(),
            link: $(element).find(css_identifier_link).attr('href'),
            text: $(element).find(css_identifier_text).text(),
            };

            output.push(item);
        });

        res.status(200).json({
            message: "Scraping successful",
            output: html,
        });
      } catch (error) {
        // Handle any errors that occurred during the request
        console.error('Error while scraping website:', error.message);
        res.status(500).json({
            message: "Error while scraping website. Contact support.",
            error: "Internal Server Error",
        });
      }
}

module.exports = {
    scrape,
}

这是我的后端函数,它不打印控制台日志。我不知道如何才能做到这一点,我已经尝试了几种方法。

提供的代码使用 Cheerio 库和 axios。

node.js web-scraping axios cheerio
1个回答
0
投票

这些选择器似乎与 axios 中的 HTML 不匹配。尝试:

const axios = require("axios"); // 1.4.0
const cheerio = require("cheerio"); // 1.0.0-rc.12

const url = "<Your URL>";

axios
  .get(url)
  .then(({data: html}) => {
    const $ = cheerio.load(html);
    const data = [...$(".egMi0")]
      .map(e => ({
        href: $(e).find("a").attr("href"),
        title: $(e).find("h3").text().trim(),
      }));
    console.log(data);
  })
  .catch(err => console.error(err));
© www.soinside.com 2019 - 2024. All rights reserved.