在递归中使用scrapy回调时,xlsxwriter无法创建文件

问题描述 投票:1回答:1

xlsxwriter无法以递归方式创建文件,任何人都可以看看?

import scrapy
import xlsxwriter

class QuotesSpider(scrapy.Spider):
    name = "quotes"
    def start_requests(self):
        start_urls = [
            'https://www.hotelgg.com/venue/mittitlt/', 
        ]

创建工作簿

        filename = 'hotel-list.xlsx'
        wb = xlsxwriter.Workbook(filename)
        self.wb = wb
        if start_urls[0] == 'https://www.hotelgg.com/venue/mittitlt/':
            self.ws = wb.add_worksheet("nanshan")

发送请求抓取

        yield scrapy.Request(url=start_urls[0], callback=self.parse)

    def parse(self, response):
        ws = self.ws
        i = 0
        # parse response
        for quote in response.css('ul.hotel_list div.info'):
            item = {
                'name': quote.css('h3.title a::text').extract_first(),
                'region': quote.css('span.region::text').extract_first(),
                'street': quote.css('span.street::text').extract_first(),
                'space': quote.css('span.meetingroom_space_range::text').extract(),
            }

将行写入excel

            ws.write_string(i, 0, item['name'])
            ws.write_string(i, 1, item['region'])
            ws.write_string(i, 2, item['street'])
            if item['space']:
                ws.write_string(i, 3, item['space'][1])
            else:
                ws.write_string(i, 3, '0')
            i += 1

获取下一页进行抓取

        next_page = response.css('div.pager a:last-child::attr(href)').extract_first()
        self.log(next_page)
        if next_page is not None:
            next_page = response.urljoin(next_page)

发送了下一页的请求

            yield scrapy.Request(next_page, callback=self.parse)
python python-3.x scrapy xlsxwriter
1个回答
0
投票

尝试这样的事情:

def process_item(self, item, spider):
    for key, value in item.items():
        if value is None or value is "":
            item[key] = "-"
    if item['yield_type'] == 'product':
        self.prod_row += 1
        self.products.write_string( "A%s" % self.prod_row, item["breadcrumb"] )
        self.products.write_string( "B%s" % self.prod_row, item["last_category"] )
        self.products.write_string( "C%s" % self.prod_row, item["product_href"] )
    if item['yield_type'] == 'profile':
        self.prof_row += 1
        self.profiles.write_string( "A%s" % self.prof_row, item["profile_category"] )
        self.profiles.write_string( "B%s" % self.prof_row, item["company_name"] )
        self.profiles.write_string( "C%s" % self.prof_row, item["company_href"] )

将数据保存在工作簿的不同表中。

xlsxwriter在发送空值时显示错误,因此请确保放置一些东西而不是null / blank:

    if value is None or value is "":
        item[key] = "-"
© www.soinside.com 2019 - 2024. All rights reserved.