因此,我尝试通过在udemy.com上观看有关node和cheerio的两个视频课程来独自学习如何做到这一点,但是我对此并没有任何改善。执行此操作时,代码将生成图像URL数组:
const request = require("request-promise");
const cheerio = require("cheerio");
const url = "https://www.example.com";
const scrapeResults = [];
async function scrapeJobHeader() {
try {
const htmlResult = await request.get(url);
const $ = await cheerio.load(htmlResult);
$("td.productListing-data > a ").each((index, element) => {
const resultTitle = $(element).children("img");
const img_url = resultTitle.attr("src");
const scrapeResult = { img_url };
scrapeResults.push(scrapeResult);
});
return scrapeResults;
} catch (err) {
console.error(err);
}
}
async function scrapeWebsite() {
const jobsWithHeaders = await scrapeJobHeader();
console.log(jobsWithHeaders);
}
scrapeWebsite();
但是执行此操作时出现错误“ TypeError:无法读取未定义的属性“替换”:
const img_url = resultTitle.attr("src").replace("images\\/more_color.png","");
返回的图像包括“ images / more_color.png”,但我只想返回实际的产品图像而不是png。
结果看起来像这样:
{ img_url: 'images/more_color.png' },
{ img_url: undefined },
{ img_url: undefined },
{
img_url: 'images/20191206/thumb/AK1501-@RH-CRY-LOVE@22X06-825_3L@467400@200@[email protected]'
},
{ img_url: 'images/more_color.png' },
{ img_url: undefined },
{ img_url: undefined },
{
img_url: 'images/20191206/thumb/AK1501-@GD-CRY-LOVE@22X06-825_3L@467399@200@[email protected]'
},
{ img_url: 'images/more_color.png' },
{ img_url: undefined },
{ img_url: undefined },
{
img_url: 'images/20191206/thumb/AK1500-@GD-CRY-QUEEN@3X06-825_3L@467397@200@[email protected]'
},
{ img_url: 'images/more_color.png' },
{ img_url: undefined },
{ img_url: undefined },
下一步将是摆脱未定义的图像URL,但我还没有走那么远。