topics_df = pd.DataFrame([用于主题中元组的[dict((y,x)x,y中的x,y))])TypeError:'int'对象不可迭代
这是我要传递给此函数的Python版本或参数的问题吗?下面是我的代码。
from stop_words import get_stop_words
import pandas as pd
import numpy as np
from nltk import bigrams
from lib.lda import lda, visualizeLDA
from nltk.tokenize import RegexpTokenizer
from gensim import corpora, models
import gensim
import pyLDAvis.gensim
#provide path name here
mypath = " "
allcomplaints = pd.read_csv(mypath)
#combining complaints for each ID
myremarks= allcomplaints.groupby(['complaint_ID'])['complaint_txt'].agg(lambda x: ''.join(x)).values
#create English stop words list
en_stop = get_stop_words('en')
#including domain specific stop words
my_stopwords = ["xx","xxxx"]
my_stopwords= [i.decode('utf-8') for i in my_stopwords]
en_stop = en_stop +my_stopwords
texts = []
for doc in myremarks:
raw = doc.lower()
tokens = bigrams(i for i in tokenizer.tokenize(raw)if not i in en_stop and len(i)>1)
mergedtokens = [i[0]+" "+i[1] for i in tokens]
stopped_tokens = [i for i in mergedtokens if not i in en_stop]
texts.append(stopped_tokens)
dictionary = corpora.Dictionary(texts)
print dictionary
# convert tokenized documents into a document-term matrix
corpus = [dictionary.doc2bow(text) for text in texts]
# generate LDA model
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = 5 , id2word = dictionary, passes = 1)
print(ldamodel.print_topics(num_topics=5))
# Visualize ldamodel
vis= pyLDAvis.gensim.prepare(ldamodel,corpus,dictionary)
pyLDAvis.display(vis)
#以下是我用于运行LDA的数据示例:
Complaint_ID| Complaint_txt
------------| --------------
4545 | cust has billing issue
4545 | for $480
6878 | connct issue for a day ne
6878 | ed immediate resoltn
我正在经历同一件事,你知道吗?