import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv ('C:\\Users\\LENOVO\\Desktop\\slides\\3rd semester\\CP\\train.csv')
from nltk.stem import WordNetLemmatizer
lemma= WordNetLemmatizer()
stop = stopwords.words('English')
def clean_tweets(text):
text = text.lower()
words= nltk.word_tokenize(text)
# Lemmatization is another technique used to reduce inflected words to their root word. It describes the algorithmic process of identifying an inflected word's “lemma” (dictionary form) based on its intended meaning.
words = ' '.join ([lemma.lemmatize(word) for word in words
if word not in (stop)])
text=''.join(words)
# removing non-alphabit characters
text = re.sub('[^a-z]',' ',text)
return text
# applying the clean_tweets function in our text columns
df['cleaned_tweets'] = df['text'].apply(clean_tweets)
import textblob
def tweet_sentiment(tweet):
tb= textblob(tweet)
score= tb.sentiment.polarity
if score > 0 :
return 'Positive'
elif score < 0:
return 'Nigative'
else:
return 'Neutral'
df['tweet_sentiment'] = df['cleaned_tweets'].apply(tweet_sentiment)
plt.clf()
df[tweet_sentiment].value_counts().plot(kind = 'barh')
plt.title('sentiment of tweets')
plt.xlabel('frequency of tweet sentiment')
plt.show()
我在成功清理数据后尝试做情绪分析(tweet scor),我在情绪分析步骤中挣扎,我无法得到结果,任何帮助请求