我试图在转换后将所有字符串点打印到散点图中,但没有打印我是否遗漏了什么,
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
from numpy import random
corpus = []
for i in range(0, 100000, 10000):
corpus.append("This is example %s %s" %(i, random.randint(1000000)))
print (corpus)
tfidf_vectorizer = TfidfVectorizer()
tfidf = tfidf_vectorizer.fit_transform(corpus)
plt.scatter(tfidf[:,0].toarray(), tfidf[:,1].toarray())
for i in range(0, 10, 1):
plt.annotate(i, (tfidf[i,0], tfidf[i,1]))
plt.show()
如果可能每个点标签编号
kmeans = KMeans(n_clusters=6).fit(tfidf)
labels = kmeans.labels_