Python 抓取变体,具有多种尺寸和颜色组合

问题描述 投票:0回答:1

我正在尝试刮掉各种尺寸和颜色。

以下是以下场景:

所选颜色: -螳螂绿 -线轴黄

所选尺寸: -6磅 -8磅 -10磅 -15磅 -20磅 -30磅

我需要抓取标题、价格和特价,使用下面的代码。

import scrapy
import re
from scrapy_splash import SplashRequest

class FishingRodsSpider(scrapy.Spider):
    name = "ana_rods_detailed"
    allowed_domains = ["anacondastores.com"]
    start_urls = ["https://www.anacondastores.com/fishing/fishing-line/braid-line/shimano-kairiki-8-braid-line-150-metre-spool/BP90140299"]

    def start_requests(self):
        for url in self.start_urls:
            yield SplashRequest(url, self.parse, args={'wait': 2})

    def parse(self, response):
        title = response.css('.pdp-title::text').get().strip()
        price = response.css('.product-info .price-was .amount::text').get().strip()
        club_price = response.css('.product-info .price-now .amount::text').get().strip()
        product_details = response.css('.product-details-list')
        base_url = "https://www.anacondastores.com"
        variant_style_pickers = response.css('.js-variant-style-picker')
        variant_size_pickers = response.css('.js-variant-size')
        style_data_urls = []
        for i, style_picker in enumerate(variant_style_pickers):
            style_data_url = style_picker.attrib.get('data-url')
            if i == 0 and style_data_url is None:
                for size_picker in variant_size_pickers:
                    size_data_url = size_picker.attrib.get('data-url')
                    if size_data_url is not None:
                        size_variant_url = base_url + size_data_url
                        # self.log("Size Variant: " + size_variant_url)
                    else:
                        self.log("Size data-url attribute is missing for a size picker.")
            else:
                if style_data_url is not None:
                    style_data_urls.append(base_url + style_data_url)
                    for size_picker in variant_size_pickers:
                        size_data_url = size_picker.attrib.get('data-url')
                        if size_data_url is not None:
                            size_variant_url = base_url + size_data_url
                            self.log("Size Variant: " + size_variant_url)
                else:
                    self.log("Style data-url attribute is missing for a style picker.")

预期结果 -螳螂绿 6 磅 -螳螂绿 8 磅 -螳螂绿 10 磅 -螳螂绿 20 磅 -螳螂绿 30 磅 -线轴黄色 6 磅 -线轴黄色 8 磅 -线轴黄色 10 磅 -线轴黄色 20 磅 -线轴黄色 30 磅

python selenium-webdriver scrapy
1个回答
0
投票

该网站的数据可在加载的页面中找到,因此无需使用

scrapy-splash
。您需要检查网络活动以查找单击颜色或尺寸选项时生成的 URL。请参阅下面的示例代码:

import scrapy


class AnacondaSpider(scrapy.Spider):
    name = "anaconda"
    allowed_domains = ["www.anacondastores.com"]
    start_urls = [
        "https://www.anacondastores.com/fishing/fishing-line/braid-line/shimano-kairiki-8-braid-line-150-metre-spool/BP90140299-mantis-green"
    ]

    def parse(self, response):
        # get all colors and scrape them
        color_urls = response.css(".js-variant-style-picker::attr(data-url)").getall()
        for url in color_urls:
            yield response.follow(url)

        # get all the sizes and scrape them
        size_codes = response.css(
            ".size-variant a::attr(data-variant-size-code)"
        ).getall()

        for code in size_codes:
            url = response.urljoin(code + "?version=7")
            yield scrapy.Request(url, callback=self.parse_size)

    def parse_size(self, response):
        item = dict()

        item["title"] = response.css(
            "#productContentWrapper > div::attr(data-product-name)"
        ).get()
        item["price"] = response.css(
            "#productContentWrapper > div::attr(data-product-metric1)"
        ).get()
        item["sale_price"] = response.css(
            "#productContentWrapper > div::attr(data-product-price)"
        ).get()
        item["size"] = response.css(
            "#productContentWrapper > div::attr(data-product-dimension13)"
        ).get()

        yield item
最新问题
© www.soinside.com 2019 - 2024. All rights reserved.