我正在使用下面的Python代码来抓取这个网站(https://www.eeoc.gov/newsroom/search)。我希望它能抓取标题所链接的标题、描述、日期和网址。
代码正在创建一个空的 csv。知道我做错了什么吗?
import csv
import requests
from bs4 import BeautifulSoup
def scrape_eec_news():
url = "https://www.eeoc.gov/newsroom/search"
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
results = []
entries = soup.find_all("div", class_="views-row")
for entry in entries:
title_elem = entry.find("h3", class_="field-content")
description_elem = entry.find("div", class_="field-content")
date_elem = entry.find("span", class_="date-display-single")
url_elem = entry.find("a")
if title_elem and description_elem and date_elem and url_elem:
title = title_elem.text.strip()
description = description_elem.text.strip()
date = date_elem.text.strip()
url = url_elem["href"]
results.append({"title": title, "description": description, "date": date, "url": url})
return results
else:
print("Failed to fetch the webpage")
return []
def export_to_csv(data, filename):
with open(filename, "w", newline="", encoding="utf-8") as csvfile:
fieldnames = ["title", "description", "date", "url"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for entry in data:
writer.writerow(entry)
if __name__ == "__main__":
news_entries = scrape_eec_news()
export_to_csv(news_entries, "eec_news.csv")
print("Data exported to eec_news.csv")
运行代码后清空 csv 文件
这里是稍微修改过的代码,将结果保存到 CSV 文件:
import csv
import requests
from bs4 import BeautifulSoup
def scrape_eec_news():
url = "https://www.eeoc.gov/newsroom/search"
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
results = []
entries = soup.find_all("div", class_="views-row")
for entry in entries:
title_elem = entry.h2
description_elem = entry.p
date_elem = entry.find("div", class_="field--type-datetime")
url_elem = entry.a
title = title_elem.text.strip()
description = description_elem.text.strip()
date = date_elem.text.strip()
url = url_elem["href"]
results.append(
{
"title": title,
"description": description,
"date": date,
"url": url,
}
)
return results
def export_to_csv(data, filename):
with open(filename, "w", newline="", encoding="utf-8") as csvfile:
fieldnames = ["title", "description", "date", "url"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for entry in data:
writer.writerow(entry)
if __name__ == "__main__":
news_entries = scrape_eec_news()
export_to_csv(news_entries, "eec_news.csv")
print("Data exported to eec_news.csv")
保存
eec_news.csv
(LibreOffice 的屏幕截图):