我正在使用 CrawlerRunner 运行蜘蛛,我需要设置日志记录级别。
这是启动抓取工具的代码:
import os
import sys
import logging
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
from scrapy.utils.project import get_project_settings
from scrapy.utils.reactor import install_reactor
from scrapy_app.models import Scraper
# reactor must be installed before importing
install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
from twisted.internet import ( # pylint: disable=wrong-import-position, wrong-import-order
reactor,
)
SETTINGS_FILE_PATH = "scrapy_app.scrapy_app.settings"
os.environ.setdefault("SCRAPY_SETTINGS_MODULE", SETTINGS_FILE_PATH)
def setup_logging():
configure_logging() # logs are there, but LOG_LEVEL is always DEBUG
def _crawl_spider(runner: CrawlerRunner, spider: str) -> None:
"""
Adds a scrapper to the runner by its name without starting the reactor
:param spider: spider name
"""
d = runner.crawl(spider, )
d.addBoth(lambda _: reactor.stop()) # type: ignore
def run_spider(spider: str) -> None:
"""
Run a scraper by its name
:param spider: Name of spider
"""
setup_logging()
runner = CrawlerRunner(get_project_settings())
_crawl_spider(runner, spider)
reactor.run() # type: ignore
def setup_logging():
configure_logging() # the logs are there, but LOG_LEVEL is always DEBUG
def setup_logging():
`logging.basicConfig(
level=logging.INFO # no logging at all
)
同时使用logging.basicConfig和configure_logging - 与仅使用configure_logging的结果相同。
在settings.py中设置LOG_LEVEL似乎没有改变任何东西。
您需要在之前的设置中更改它,并将它们用作
CrawlerRunner
的参数。
settings = get_project_settings()
settings['LOG_LEVEL'] = logging.INFO
runner = CrawlerRunner(settings)