我有以下代码,它将毫无问题地为您工作,它在示例 Google Collab(https://colab.research.google.com/drive/1Rbx1iERTZI6Tahm4dBtt0P9jcAVDbxSa?usp=sharing) 中运行良好并给出结果(下图)。但是 jupyter notebook 中的相同代码显示所有行的分数均为 0。差别不大,我使用 nltk.data.path[4]。不过没关系。你认为问题是什么?
!pip install feedparser
import feedparser
import pickle
import time
import requests
import nltk
import csv
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('vader_lexicon')
posts = []
rss_url='https://www.pravda.com.ua/ukr/rss/view_news/'
response = feedparser.parse(rss_url)
for each in response['entries']:
if each['title'] in [x['title'] for x in posts]:
pass
else:
posts.append({
"title": each['title'],
"link": each['links'][0]['href'],
"tags": [x['term'] for x in each['tags']],
"date": time.strftime('%Y-%m-%d', each['published_parsed'])
})
for i, post in enumerate(p['title'] for p in posts):
print(i, post)
url = 'https://raw.githubusercontent.com/lang-uk/tone-dict-uk/master/tone-dict-uk.tsv'
r = requests.get(url)
with open(nltk.data.path[0]+'/tone-dict-uk.tsv', 'wb') as f:
f.write(r.content)
d = {}
with open(nltk.data.path[0]+'/tone-dict-uk.tsv', 'r') as csv_file:
for row in csv.reader(csv_file, delimiter='\t'):
d[row[0]] = float(row[1])
from nltk.sentiment.vader import SentimentIntensityAnalyzer
SIA = SentimentIntensityAnalyzer()
SIA.lexicon.update(d)
for i, post in enumerate(p['title'] for p in posts):
print(i, post, SIA.polarity_scores(post)["compound"])