AttributeError:“NoneType”对象没有属性“find_all”,仅适用于 Flask

问题描述 投票:0回答:1

main.py

from flask import Flask, render_template, request, redirect
from scrap import Scrap, Scrap_web3, Scrap_remoteok

app = Flask("Job Scrapper")

db = {}


@app.route("/")
def index():
    return render_template("index.html")


@app.route("/search")
def search():
    keyword = request.args.get("keyword")
    if keyword is None:
        return redirect("/")

    if keyword in db:
        jobs = db[keyword]
    else:
        all_jobs = []

        berlin_script = Scrap(keyword)
        berlin_script.find_job()
        all_jobs.extend(berlin_script.results)

        web3_script = Scrap_web3(keyword)
        web3_script.find_job()
        all_jobs.extend(web3_script.results)

        remoteok_script = Scrap_remoteok(keyword)
        remoteok_script.find_job()
        all_jobs.extend(remoteok_script.results)

        db[keyword] = all_jobs
        jobs = all_jobs

    return render_template("search.html", keyword=keyword, jobs=jobs)


if __name__ == "__main__":
    app.run(debug=True)

scrap.py

import requests
from bs4 import BeautifulSoup


class Scrap:
    def __init__(self, keyword):
        self.keyword = keyword
        self.url_list = [
            f"https://berlinstartupjobs.com/skill-areas/{self.keyword}",
            f"https://web3.career/{self.keyword}-jobs",
            f"https://weworkremotely.com/remote-jobs/search?utf8=%E2%9C%93&term={self.keyword}",
        ]
        self.results = []
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
        }

    def find_job(self):
        response = requests.get(self.url_list[0], headers=self.headers)

        soup = BeautifulSoup(response.content, "html.parser")
        jobs = soup.find("ul", class_="jobs-list-items").find_all("li")
        for job in jobs:
            title = job.find("h4", class_="bjs-jlid__h").text
            company = job.find("a", class_="bjs-jlid__b").text
            link = job.find("h4", class_="bjs-jlid__h").find("a")["href"]
            job_db = {
                "Title": title,
                "Company": company,
                "URL": link,
            }
        self.results.append(job_db)


class Scrap_web3(Scrap):
    def __init__(self, keyword):
        super().__init__(keyword)

    def find_job(self):
        response = requests.get(self.url_list[1], headers=self.headers)

        soup = BeautifulSoup(response.content, "html.parser")
        jobs = soup.find("tbody", class_="tbody").find_all("tr")
        for index, job in enumerate(jobs):
            if index == 4:
                continue
            title = job.find(
                "h2", class_="fs-6 fs-md-5 fw-bold my-primary").text
            company = job.find(
                "td", class_="job-location-mobile").find("h3").text
            link = f'https: // web3.career{job.find("td", class_="job-location-mobile").find("a")["href"]}'
            job_db = {
                "Title": title,
                "Company": company,
                "URL": link,
            }
        self.results.append(job_db)


class Scrap_remoteok(Scrap):
    def __init__(self, keyword):
        super().__init__(keyword)

    def find_job(self):
        response = requests.get(self.url_list[2], headers=self.headers)

        soup = BeautifulSoup(response.content, "html.parser")
        jobs = soup.find_all("li", class_="feature")
        for job in jobs:
            title = job.find("span", class_="title").text
            company = job.find("span", class_="company").text
            link = f"https://weworkremotely.com{job.find(lambda tag: tag.name == 'a' and 'remote-jobs' in tag.get('href', ''))['href']}"
            job_db = {
                "Title": title,
                "Company": company,
                "URL": link,
            }
        self.results.append(job_db)

`当我尝试使用 Flask 启动时,我不知道为什么这段代码会出现这个问题。 我已经检查过 bs4 可以访问这些页面,并且当我在 scrap.py 上打印每一页时 一切都很好。

当我单独运行这段代码时,完全没有问题,但是当我使用 Flask 将其作为 Web 应用程序运行时,同样的错误不断发生。我简直无法理解为什么。

如果bs4或者代码本身有问题,那么运行.py文件应该也会导致错误,但看起来根本没有问题。 `

我尝试启动每个 scrap.py 的代码(没问题) 检查 HTML(没问题)

python flask web-scraping
1个回答
0
投票

AttributeError: 'NoneType' object has no attribute 'find_all'
错误意味着在调用
obj.find_all(...)
时,objNone。代码中的这些 obj 值是
soup.find(...)
调用的结果,如果文档中不存在此类 DOM 元素(HTML 标签),该调用将返回 None。

修复方法:

  • 而不是从互联网下载

    response.content
    ,而是将 HTML 文档作为硬编码常量字符串传递。那么无论您在哪里运行程序,行为都将是确定性的。

  • 修改您的 Python 代码,以便

    soup.find(...)
    能够找到匹配的 DOM 元素。

  • 向您的 Python 代码中添加类似

    if soup.find(...) is None:
    的条件,并使其在未找到指定的 DOM 元素时执行一些对您有用的操作。

© www.soinside.com 2019 - 2024. All rights reserved.