带有cheerio和axios的承诺-嵌套在循环内部

问题描述 投票:0回答:2

我正在剪贴类似craiglist的网站。

我的主要目标是构建并反对这样的对象(例如汽车广告示例:

{ title: 'VOLKSWAGEN',
    cv: '75 cv',
    model: 'POLO 1. 0 ADVANCE',
    brand: 'Volkswagen',
    place: 'Malpaso Buzanada ( Arona )',
    location: 'Tenerife',
    transmision: 'manual',
    fuel: 'gasolina',
    km: '90.000 kms',
    year: '2016',
    doors: '5 puertas',
    description:
     'Test description',
    vendor: 'Profesional',
    reference: 'r343187239',
    price: '7.799',
    type: 'OFERTA',
    image:
     [ 'https://img.website.com/fg/3431/87/343187239_1.jpg',
       'https://img.website.com/fg/3431/87/343187239_2.jpg',
       'https://img.website.com/fg/3431/87/343187239_3.jpg',
       'https://img.website.com/fg/3431/87/343187239_4.jpg' ] }

它将是这个对象之王的阵列,我从这个网站上获得的每一个广告。

我的问题是我可以到达此对象数组,但是创建后可以处理它以将图像保存到磁盘中。

我认为对诺言做错了:

const fs = require('fs')
const axios = require('axios');
const $ = require('cheerio', { decodeEntities: true });

const url = 'https://www.website.com/anuncios/700000000.htm';

const adArray = [];

const axiosData = axios.get(url,{responseType: 'arraybuffer',responseEncoding: 'binary'})
    .then(result => {
        const myP = $('.aditem', result.data.toString('binary')).each((i, element) => {
            function checkCVinTitle (cv, model) {
                const cvNoSpace = cv.replace(' ', '');
                if(model){
                    if(model.includes(cv.toUpperCase())) {
                        createAdObject.model = model.replace(cv.toUpperCase(),''); 
                    } 
                    else if(model.includes(cvNoSpace.toUpperCase())){
                        createAdObject.model = model.replace(cvNoSpace.toUpperCase(),''); 
                    }
                    else {
                        // console.log('no model')
                    }
                    return createAdObject.model;
                }
            }
            const capitalize = (str, lower = false) =>
            (lower ? str.toLowerCase() : str).replace(/(?:^|\s|["'([{])+\S/g, match => match.toUpperCase());

                const createAdObject = {};
                // cheerio variables for each parametter
                const titleAndModel = $('.aditem-detail-title', element).text();
                const brandAndPlace = $('.display-desktop.list-location-link', element).text();
                const cv = $('.cc.tag-mobile', element).text();
                const location = $('.list-location-region', element).text();
                const transmision = $('.cmanual.tag-mobile', element).text();
                const fuel = $('.gas.tag-mobile', element).text();
                const km = $('.kms.tag-mobile', element).text();
                const year = $('.ano.tag-mobile', element).text();
                const doors = $('.ejes.tag-mobile', element).text();
                const description = $('.tx', element).text();
                const price = $('.aditem-price', element).text();
                const vendor = $('.pillDiv.pillSellerTypePro', element).text();
                const reference = $('.x5', element).text();
                const type = $('.x3', element).text();

                    createAdObject.title = titleAndModel.split(' - ')[0];
                    createAdObject.cv = cv;
                    createAdObject.model = titleAndModel.split(' - ')[1];
                    checkCVinTitle(cv, createAdObject.model);
                    createAdObject.brand = brandAndPlace.split(' de segunda mano en ')[0];
                    createAdObject.place = brandAndPlace.split(' de segunda mano en ')[1];
                    createAdObject.location = capitalize(location);
                    createAdObject.transmision = transmision;
                    createAdObject.fuel = fuel;
                    createAdObject.km = km;
                    createAdObject.year = year.split(' ')[1]
                    createAdObject.doors = doors;
                    createAdObject.description = description.replace('\n', ' ');
                    createAdObject.vendor = vendor;
                    createAdObject.reference = reference.replace(/\s/g,'');
                    createAdObject.price = price.replace('€', '');
                    createAdObject.type = type;
                    createAdObject.image = [];
                    // function generate array images
                        let promises = [];
                        const averageImage = 5;
                        for(let i=1; i < averageImage; i++){
                            const image = `https://img.website.com/fg/${createAdObject.reference.slice(1,5)}/${createAdObject.reference.slice(5,7)}/${createAdObject.reference.substr(1)}_${i}.jpg`;
                            promises.push(axios.get(image)
                                .then(response => {
                                    // response.status == 200 ? createAdObject.image.push(image) : console.log('something wrong with images')
                                    response.status == 200 ? createAdObject.image.push(image) : ''
                                    })
                                .catch(error => console.log())
                            )
                        }
                        return Promise.all(promises)
                        .then(() => {
                            type == 'OFERTA' ? createAdObject.brand != 'Scooters' ? adArray.push(createAdObject)  : '' : '';
                            console.log('hey3')
                        })
                        .catch(error => console.log(error))
        })
        return Promise.all([myP])
        .then(() => console.log('hey2'))
    })
    return Promise.all([axiosData])
    .then(() => console.log('hey'))
    .catch(error => console.log(error))

我尝试使它不带变量:

$('.aditem', result.data.toString('binary')).each((i, element) => {

当我运行脚本时,它输出此:

hey2
hey



hey3
hey3
hey3
hey3
hey3
hey3
hey3

我也修改了收益,但似乎对我没有任何帮助。

为什么不等待这个promise.all?

返回Promise.all([axiosData])

关于cheerio的事情不能让foreach等待承诺?

建议更新:

const fs = require('fs')
const axios = require('axios');
const $ = require('cheerio', { decodeEntities: true });

const url = 'https://www.website.com/ads/700000000.htm';

const adArray = [];

const axiosData = axios.get(url,{responseType: 'arraybuffer',responseEncoding: 'binary'})
    .then(result => {
        const theMap = $('.aditem', result.data.toString('binary')).map(element => {
            function checkCVinTitle (cv, model) {
                const cvNoSpace = cv.replace(' ', '');
                if(model){
                    if(model.includes(cv.toUpperCase())) {
                        createAdObject.model = model.replace(cv.toUpperCase(),''); 
                    } 
                    else if(model.includes(cvNoSpace.toUpperCase())){
                        createAdObject.model = model.replace(cvNoSpace.toUpperCase(),''); 
                    }
                    else {
                        // console.log('no model')
                    }
                    return createAdObject.model;
                }
            }
            const capitalize = (str, lower = false) =>
            (lower ? str.toLowerCase() : str).replace(/(?:^|\s|["'([{])+\S/g, match => match.toUpperCase());

                const createAdObject = {};
                // cheerio variables for each parametter
                const titleAndModel = $('.aditem-detail-title', element).text();
                const brandAndPlace = $('.display-desktop.list-location-link', element).text();
                const cv = $('.cc.tag-mobile', element).text();
                const location = $('.list-location-region', element).text();
                const transmision = $('.cmanual.tag-mobile', element).text();
                const fuel = $('.gas.tag-mobile', element).text();
                const km = $('.kms.tag-mobile', element).text();
                const year = $('.ano.tag-mobile', element).text();
                const doors = $('.ejes.tag-mobile', element).text();
                const description = $('.tx', element).text();
                const price = $('.aditem-price', element).text();
                const vendor = $('.pillDiv.pillSellerTypePro', element).text();
                const reference = $('.x5', element).text();
                const type = $('.x3', element).text();
                // const image = $('img', element)[0].attribs.src
                // const numberImages = $('.mini-overlay-superior', element).text();

                    createAdObject.title = titleAndModel.split(' - ')[0];
                    createAdObject.cv = cv;
                    createAdObject.model = titleAndModel.split(' - ')[1];
                    checkCVinTitle(cv, createAdObject.model);
                    createAdObject.brand = brandAndPlace.split(' de segunda mano en ')[0];
                    createAdObject.place = brandAndPlace.split(' de segunda mano en ')[1];
                    createAdObject.location = capitalize(location);
                    createAdObject.transmision = transmision;
                    createAdObject.fuel = fuel;
                    createAdObject.km = km;
                    createAdObject.year = year.split(' ')[1]
                    createAdObject.doors = doors;
                    createAdObject.description = description.replace('\n', ' ');
                    createAdObject.vendor = vendor;
                    createAdObject.reference = reference.replace(/\s/g,'');
                    createAdObject.price = price.replace('€', '');
                    createAdObject.type = type;
                    createAdObject.image = [];
                    // function generate array images
                        let promises = []
                        const averageImage = 5;
                        for(let i=1; i < averageImage; i++){
                            const image = `https://img.milanuncios.com/fg/${createAdObject.reference.slice(1,5)}/${createAdObject.reference.slice(5,7)}/${createAdObject.reference.substr(1)}_${i}.jpg`;
                                promises.push(axios.get(image)
                                    .then(response => {
                                        response.status == 200 ? createAdObject.image.push(image) : ''
                                    })
                                    .catch(error => console.log())
                                )
                        }
                        return Promise.all(promises)
                        .then(() => {
                            type == 'OFERTA' ? createAdObject.brand != 'Scooters' ? adArray.push(createAdObject)  : '' : '';
                        })
        })
        Promise.all([theMap])
        .then(console.log(adArray))
    })
    Promise.all([axiosData])
    .then(() => console.log(adArray))
    .catch(error => console.log(error))
node.js es6-promise
2个回答
0
投票

您应该使用$(selector).map()而不是$(selector).each()

[each()返回undefined,并且Promise.all跳过所有非承诺值(或更确切地说,不可账的值),如下所示。

Promise.all([4, "hello", undefined])
.then(() => {
    console.log("done");
});

0
投票

类似这样的东西:

(async()=>{
    if(type != 'OFERTA') return;
    const result = await axios.get(url,{responseType: 'arraybuffer',responseEncoding: 'binary'})
    // $.map instead $.each as map rtn [] and each rtns undefined
    const adArray = await $('.aditem', result.data.toString('binary')).map( async (index, element) => {
        const createAdObject = adObject(element);
        if(createAdObject.brand == 'Scooters') return;
        const averageImage = 5;
        const promises = Array.from({length: averageImage}, async (_, i) => {
            let image = imageStr(i, createAdObject)
            return axios.get(image)
        })
        const images = await Promise.all(promises)
        createAdObject.image.push(...images)
        return createAdObject
    })
})
© www.soinside.com 2019 - 2024. All rights reserved.