每次发生超时,都会出现这样的提示
User timeout caused connection failure. Getting https://some.website/ took longer than 15.0 seconds..
我不想登录到我的日志文件。
如何将其静音?
您可以将方法添加到名称为“ handle_error”的蜘蛛类中。它可以编写您自己的代码来处理错误。
from scrapy.spiders import CrawlSpider
class SomeSpider(CrawlSpider):
name = 'SAME NAME'
allowed_domains = ['ALLOWED DOMAINS HERE']
start_urls = ['START_URL']
download_timeout = 15
def parse(self, response):
urls = response.css('div.title a::attr(href)').extract()
for url in urls:
yield response.follow(url, callback=self.parse_data_page, errback=self.handle_error)
def parse_data_page(self, response):
# Parsing here
def handle_error(self, failure):
self.log("Request failed: %s" % failure.request)