TypeError:'

问题描述 投票:0回答:1

任何想法为何会引发此错误当most_similar()]的文档标签不存在时,“ TypeError:'我有一个保存在我的数据文件夹中的.txt文档列表,想通过本地主机上的flask应用程序将一个文档与另一个文档进行比较。

追踪(最近通话):
File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
2463, in __call__
return self.wsgi_app(environ, start_response)

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
2449, in wsgi_app
response = self.handle_exception(e)

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
1866, in handle_exception
reraise(exc_type, exc_value, tb)

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\_compat.py", line 
39, in reraise
raise value

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
2446, in wsgi_app
response = self.full_dispatch_request()

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
1951, in full_dispatch_request
rv = self.handle_user_exception(e)

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
1820, 
in handle_user_exception
reraise(exc_type, exc_value, tb)

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\_compat.py", line 
39, in reraise
raise value

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
1949, 
in full_dispatch_request
rv = self.dispatch_request()

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site-packages\flask\app.py", line 
1935, 
in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)

File "C:\Users\ibrahimm\Desktop\doc2vec-compare-doc-demo\app.py", line 56, in api_compare_2
vec1 = d2v_model.docvecs.most_similar(data['doc1'])

File "C:\Users\ibrahimm\AppData\Local\Continuum\anaconda3\lib\site- 
packages\gensim\models\keyedvectors.py", line 1715, in most_similar

elif doc in self.doctags or doc < self.count:
TypeError: '<' not supported between instances of 'str' and 'int'\

app.py
@app.route('/api/compare_2', methods=['POST'])
def api_compare_2():
    data = request.get_json()
    if not 'doc1' in data or not 'doc2' in data:
        return 'ERROR'

    vec1 = d2v_model.docvecs.most_similar(data['doc1'])
    vec2 = d2v_model.docvecs.most_similar(data['doc2'])

    vec1 = gensim.matutils.full2sparse(vec1)
    vec2 = gensim.matutils.full2sparse(vec2)

    print (data)
    print (vec2)
    print (vec1)

    return jsonify(sim=gensim.matutils.cossim(vec1, vec2))


@app.route('/api/compare_all', methods=['POST'])
def api_compare_all():
    data = request.get_json()
    if not 'doc' in data:
        return 'ERROR'

    vec = d2v_model.docvecs.most_similar(data['doc'])
    res = d2v_model.docvecs.most_similar([vec], topn=5)

    return jsonify(list=res)

model.py
def load_model():
    try:
        return gensim.models.doc2vec.Doc2Vec.load("doc2vec.model2")
    except:
        print ('Model not found!')
        return None

def train_model():
    #path to the input corpus files
    data="data"

    #tagging the text files
    class DocIterator(object):
        def __init__(self, doc_list, labels_list):
            self.labels_list = labels_list
            self.doc_list = doc_list

        def __iter__(self):
            for idx, doc in enumerate(self.doc_list):
                yield TaggedDocument(words=doc.split(), tags=[self.labels_list[idx]])

    docLabels = [f for f in listdir(data) if f.endswith('.txt')]
    print(docLabels)
    data = []
    for doc in docLabels:
        data.append(open(r'C:\Users\ibrahimm\Desktop\doc2vec-compare-doc-demo\data\\' + doc, 
    encoding='cp437').read())

    tokenizer = RegexpTokenizer(r'\w+')
    stopword_set = set(stopwords.words('english'))

    #This function does all cleaning of data using two objects above

def nlp_clean(data):
    new_data = []
    for d in data:
        new_str = d.lower()
        dlist = tokenizer.tokenize(new_str)
        dlist = list(set(dlist).difference(stopword_set))
        new_data.append(dlist)
        return new_data

        data = nlp_clean(data)
        it = DocIterator(data, docLabels)


    #train doc2vec model
    model = gensim.models.Doc2Vec(size=300, window=15, min_count=4, workers=10,alpha=0.025, min_alpha=0.025, iter=20) # use fixed learning rate
    model.build_vocab(it)
    model.train(it, epochs=model.iter, total_examples=model.corpus_count)


    model.save("doc2vec.model2")

为什么抛出此错误的任何想法“ TypeError:'

api flask doc2vec
1个回答
0
投票

如果您尝试查找不在模型中的字符串doc-tag,很不幸,您将得到此令人困惑的错误,而不是更清晰的错误。 (请参阅gensim的未解决问题:https://github.com/RaRe-Technologies/gensim/issues/1737#issuecomment-346995119

© www.soinside.com 2019 - 2024. All rights reserved.