import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scraper_api import ScraperAPIClient
client = ScraperAPIClient('hiding the key')
class MoviesSpider(CrawlSpider):
name = "movies"
def start_requests(self):
urls = ["https://www.imdb.com/search/title/?genres=drama&groups=top_250&sort=user_rating,desc"]
for link in urls:
yield scrapy.Request(client.scrapyGet(url=link,render=True),callback=self.parse_item)
rules = (Rule(LinkExtractor(restrict_xpaths='//h3[@class="lister-item-header"]/a'), callback="parse_item", follow=True),)
def parse_item(self, response):
yield {'link':response.url}
[scrapy.core.engine] 调试:抓取 (200)
2023-02-19 17:48:00 [scrapy.core.scraper] 调试:从 <200 https://api.scraperapi.com/?url=https%3A%2F%2Fwww.imdb.com%2Fsearch% 2Ftitle%2F%3Fgenres%3Ddrama%26groups%3Dtop_250%26sort%3Duser_rating%2Cdesc&api_key=8ccf268c7e3c965da0777f5594598b9d&render=true&scraper_sdk=python%5C\>
{'link': 'https://api.scraperapi.com/?url=https%3A%2F%2Fwww.imdb.com%2Fsearch%2Ftitle%2F%3Fgenres%3Ddrama%26groups%3Dtop_250%26sort%3Duser_rating% 2Cdesc&api_key=8ccf268c7e3c965da0777f5594598b9d&render=true&scraper_sdk=python'}
我期待得到['链接':链接的名称]