所以我正在访问这个 RSS feed 如您所见,有一个描述标签。当我解析提要时,它不会返回任何描述标签
这是我收到的错误消息
AttributeError: 'NoneType' object has no attribute 'text'
Traceback:
File "C:\Users\User\Desktop\news-recommendation\env\lib\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 535, in _run_script
exec(code, module.__dict__)
File "C:\Users\User\Desktop\news-recommendation\app.py", line 66, in <module>
data=parseRSS('https://rss.app/feeds/6BJraU9Ff0IeqC3c.xml')
File "C:\Users\User\Desktop\news-recommendation\parseRSS.py", line 23, in parseRSS
description_content = BeautifulSoup(item.description.text, "html.parser"
这是我正在使用的代码
resp=requests.get(url)
soup = BeautifulSoup(resp.content, features="xml")
soup.prettify()
items = soup.findAll('item')
news_items = []
for item in items:
news_item={}
news_item['title']=item.title.text
news_item['link']=item.link.text
news_item['pubDate']=item.pubDate.text
news_items.append(news_item)
description_content = BeautifulSoup(item.description.text, "html.parser")
# Remove the img tag
img_tag = description_content.find('img')
if img_tag:
img_tag.decompose()
# Assuming you want to keep the rest of the content as HTML
news_item['description'] = str(description_content)
有些
<item>
没有<description>
标签,所以你需要处理这个:
import requests
from bs4 import BeautifulSoup
def get_data(url):
resp = requests.get(url)
soup = BeautifulSoup(resp.content, features="xml")
soup.prettify()
items = soup.findAll("item")
news_items = []
for item in items:
news_item = {}
news_item["title"] = item.title.text
news_item["link"] = item.link.text
news_item["pubDate"] = item.pubDate.text
news_items.append(news_item)
if item.description:
description_content = BeautifulSoup(item.description.text, "html.parser")
# Remove the img tag
img_tag = description_content.find("img")
if img_tag:
img_tag.decompose()
description_content = str(description_content)
else:
description_content = ""
# Assuming you want to keep the rest of the content as HTML
news_item["description"] = description_content
return news_items
print(get_data("https://rss.app/feeds/6BJraU9Ff0IeqC3c.xml"))