我正在剪贴类似craiglist的网站。
我的主要目标是构建并反对这样的对象(例如汽车广告示例:
{ title: 'VOLKSWAGEN',
cv: '75 cv',
model: 'POLO 1. 0 ADVANCE',
brand: 'Volkswagen',
place: 'Malpaso Buzanada ( Arona )',
location: 'Tenerife',
transmision: 'manual',
fuel: 'gasolina',
km: '90.000 kms',
year: '2016',
doors: '5 puertas',
description:
'Test description',
vendor: 'Profesional',
reference: 'r343187239',
price: '7.799',
type: 'OFERTA',
image:
[ 'https://img.website.com/fg/3431/87/343187239_1.jpg',
'https://img.website.com/fg/3431/87/343187239_2.jpg',
'https://img.website.com/fg/3431/87/343187239_3.jpg',
'https://img.website.com/fg/3431/87/343187239_4.jpg' ] }
它将是这个对象之王的阵列,我从这个网站上获得的每一个广告。
我的问题是我可以到达此对象数组,但是创建后可以处理它以将图像保存到磁盘中。
我认为对诺言做错了:
const fs = require('fs')
const axios = require('axios');
const $ = require('cheerio', { decodeEntities: true });
const url = 'https://www.website.com/anuncios/700000000.htm';
const adArray = [];
const axiosData = axios.get(url,{responseType: 'arraybuffer',responseEncoding: 'binary'})
.then(result => {
const myP = $('.aditem', result.data.toString('binary')).each((i, element) => {
function checkCVinTitle (cv, model) {
const cvNoSpace = cv.replace(' ', '');
if(model){
if(model.includes(cv.toUpperCase())) {
createAdObject.model = model.replace(cv.toUpperCase(),'');
}
else if(model.includes(cvNoSpace.toUpperCase())){
createAdObject.model = model.replace(cvNoSpace.toUpperCase(),'');
}
else {
// console.log('no model')
}
return createAdObject.model;
}
}
const capitalize = (str, lower = false) =>
(lower ? str.toLowerCase() : str).replace(/(?:^|\s|["'([{])+\S/g, match => match.toUpperCase());
const createAdObject = {};
// cheerio variables for each parametter
const titleAndModel = $('.aditem-detail-title', element).text();
const brandAndPlace = $('.display-desktop.list-location-link', element).text();
const cv = $('.cc.tag-mobile', element).text();
const location = $('.list-location-region', element).text();
const transmision = $('.cmanual.tag-mobile', element).text();
const fuel = $('.gas.tag-mobile', element).text();
const km = $('.kms.tag-mobile', element).text();
const year = $('.ano.tag-mobile', element).text();
const doors = $('.ejes.tag-mobile', element).text();
const description = $('.tx', element).text();
const price = $('.aditem-price', element).text();
const vendor = $('.pillDiv.pillSellerTypePro', element).text();
const reference = $('.x5', element).text();
const type = $('.x3', element).text();
createAdObject.title = titleAndModel.split(' - ')[0];
createAdObject.cv = cv;
createAdObject.model = titleAndModel.split(' - ')[1];
checkCVinTitle(cv, createAdObject.model);
createAdObject.brand = brandAndPlace.split(' de segunda mano en ')[0];
createAdObject.place = brandAndPlace.split(' de segunda mano en ')[1];
createAdObject.location = capitalize(location);
createAdObject.transmision = transmision;
createAdObject.fuel = fuel;
createAdObject.km = km;
createAdObject.year = year.split(' ')[1]
createAdObject.doors = doors;
createAdObject.description = description.replace('\n', ' ');
createAdObject.vendor = vendor;
createAdObject.reference = reference.replace(/\s/g,'');
createAdObject.price = price.replace('€', '');
createAdObject.type = type;
createAdObject.image = [];
// function generate array images
let promises = [];
const averageImage = 5;
for(let i=1; i < averageImage; i++){
const image = `https://img.website.com/fg/${createAdObject.reference.slice(1,5)}/${createAdObject.reference.slice(5,7)}/${createAdObject.reference.substr(1)}_${i}.jpg`;
promises.push(axios.get(image)
.then(response => {
// response.status == 200 ? createAdObject.image.push(image) : console.log('something wrong with images')
response.status == 200 ? createAdObject.image.push(image) : ''
})
.catch(error => console.log())
)
}
return Promise.all(promises)
.then(() => {
type == 'OFERTA' ? createAdObject.brand != 'Scooters' ? adArray.push(createAdObject) : '' : '';
console.log('hey3')
})
.catch(error => console.log(error))
})
return Promise.all([myP])
.then(() => console.log('hey2'))
})
return Promise.all([axiosData])
.then(() => console.log('hey'))
.catch(error => console.log(error))
我尝试使它不带变量:
$('.aditem', result.data.toString('binary')).each((i, element) => {
当我运行脚本时,它输出此:
hey2
hey
hey3
hey3
hey3
hey3
hey3
hey3
hey3
我也修改了收益,但似乎对我没有任何帮助。
为什么不等待这个promise.all?
返回Promise.all([axiosData])
关于cheerio的事情不能让foreach等待承诺?
建议更新:
const fs = require('fs')
const axios = require('axios');
const $ = require('cheerio', { decodeEntities: true });
const url = 'https://www.website.com/ads/700000000.htm';
const adArray = [];
const axiosData = axios.get(url,{responseType: 'arraybuffer',responseEncoding: 'binary'})
.then(result => {
const theMap = $('.aditem', result.data.toString('binary')).map(element => {
function checkCVinTitle (cv, model) {
const cvNoSpace = cv.replace(' ', '');
if(model){
if(model.includes(cv.toUpperCase())) {
createAdObject.model = model.replace(cv.toUpperCase(),'');
}
else if(model.includes(cvNoSpace.toUpperCase())){
createAdObject.model = model.replace(cvNoSpace.toUpperCase(),'');
}
else {
// console.log('no model')
}
return createAdObject.model;
}
}
const capitalize = (str, lower = false) =>
(lower ? str.toLowerCase() : str).replace(/(?:^|\s|["'([{])+\S/g, match => match.toUpperCase());
const createAdObject = {};
// cheerio variables for each parametter
const titleAndModel = $('.aditem-detail-title', element).text();
const brandAndPlace = $('.display-desktop.list-location-link', element).text();
const cv = $('.cc.tag-mobile', element).text();
const location = $('.list-location-region', element).text();
const transmision = $('.cmanual.tag-mobile', element).text();
const fuel = $('.gas.tag-mobile', element).text();
const km = $('.kms.tag-mobile', element).text();
const year = $('.ano.tag-mobile', element).text();
const doors = $('.ejes.tag-mobile', element).text();
const description = $('.tx', element).text();
const price = $('.aditem-price', element).text();
const vendor = $('.pillDiv.pillSellerTypePro', element).text();
const reference = $('.x5', element).text();
const type = $('.x3', element).text();
// const image = $('img', element)[0].attribs.src
// const numberImages = $('.mini-overlay-superior', element).text();
createAdObject.title = titleAndModel.split(' - ')[0];
createAdObject.cv = cv;
createAdObject.model = titleAndModel.split(' - ')[1];
checkCVinTitle(cv, createAdObject.model);
createAdObject.brand = brandAndPlace.split(' de segunda mano en ')[0];
createAdObject.place = brandAndPlace.split(' de segunda mano en ')[1];
createAdObject.location = capitalize(location);
createAdObject.transmision = transmision;
createAdObject.fuel = fuel;
createAdObject.km = km;
createAdObject.year = year.split(' ')[1]
createAdObject.doors = doors;
createAdObject.description = description.replace('\n', ' ');
createAdObject.vendor = vendor;
createAdObject.reference = reference.replace(/\s/g,'');
createAdObject.price = price.replace('€', '');
createAdObject.type = type;
createAdObject.image = [];
// function generate array images
let promises = []
const averageImage = 5;
for(let i=1; i < averageImage; i++){
const image = `https://img.milanuncios.com/fg/${createAdObject.reference.slice(1,5)}/${createAdObject.reference.slice(5,7)}/${createAdObject.reference.substr(1)}_${i}.jpg`;
promises.push(axios.get(image)
.then(response => {
response.status == 200 ? createAdObject.image.push(image) : ''
})
.catch(error => console.log())
)
}
return Promise.all(promises)
.then(() => {
type == 'OFERTA' ? createAdObject.brand != 'Scooters' ? adArray.push(createAdObject) : '' : '';
})
})
Promise.all([theMap])
.then(console.log(adArray))
})
Promise.all([axiosData])
.then(() => console.log(adArray))
.catch(error => console.log(error))
您应该使用$(selector).map()
而不是$(selector).each()
。
[each()
返回undefined
,并且Promise.all
跳过所有非承诺值(或更确切地说,不可账的值),如下所示。
Promise.all([4, "hello", undefined])
.then(() => {
console.log("done");
});
类似这样的东西:
(async()=>{
if(type != 'OFERTA') return;
const result = await axios.get(url,{responseType: 'arraybuffer',responseEncoding: 'binary'})
// $.map instead $.each as map rtn [] and each rtns undefined
const adArray = await $('.aditem', result.data.toString('binary')).map( async (index, element) => {
const createAdObject = adObject(element);
if(createAdObject.brand == 'Scooters') return;
const averageImage = 5;
const promises = Array.from({length: averageImage}, async (_, i) => {
let image = imageStr(i, createAdObject)
return axios.get(image)
})
const images = await Promise.all(promises)
createAdObject.image.push(...images)
return createAdObject
})
})