如何对这个词典列表进行排序

问题描述 投票:0回答:0
def searchSimilarDocumentsByPhrases(corpus, Ids, contractIds,count,phrases=None):
  tfidf = TfidfVectorizer(vocabulary = phrases, ngram_range=(1, 6))
  tfs = tfidf.fit_transform(corpus)
  feature_names = tfidf.get_feature_names_out()
  rows, cols = tfs.nonzero()

  phrase_counts = defaultdict(list)
  for row, col in zip(rows, cols):
    phraseCount = corpus[row].count(feature_names[col])
    phrase_counts[feature_names[col]].append({Ids[row]:{contractIds[row]: phraseCount}})

  counter=count
  phraselist=[]
  for phrase, counts in phrase_counts.items():
    counts.sort(key=lambda x: list(x.items()), reverse=True) 
    counts=counts[:counter]
    phraselist.append(phrase)
    phraselist.append(counts)
  return phraselist

我提供的输入是

{ "phrases":
 [
  "test and evaluation"
 ],
 "count":"3"
}

我得到输出为

[
    "test and evaluation",
    [
        {
            "64": {
                "LMLB_C-41": 2
            }
        },
        {
            "24": {
                "LMLB_C-2": 1
            }
        },
        {
            "1180": {
                "LMLB_C-157": 4
            }
        }
    ],
   ]

虽然我希望输出像下面的值降序排列

[
    "test and evaluation",
    [
        {
            "1180": {
                "LMLB_C-157": 4
            }
        },
        {
            "64": {
                "LMLB_C-41": 2
            }
        },
        {
            "24": {
                "LMLB_C-2": 1
            }
        }
   ],
]
python python-3.x tf-idf
© www.soinside.com 2019 - 2024. All rights reserved.