我创建了这个程序来自动检查文章中的单词并提供更正,但在运行它时,我在 .join 调用中不断收到上述错误。这是代码
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer
def autocorrect_essay(essay):
# Tokenize the essay into sentences and words
sentences = sent_tokenize(essay)
corrected_sentences = []
for sentence in sentences:
words = word_tokenize(sentence)
corrected_words = []
for word in words:
# Check if the word is misspelled
if not wordnet.synsets(word):
corrected_word = correct_word(word)
corrected_words.append(corrected_word)
else:
corrected_words.append(word)
# Join the corrected words to form a sentence
corrected_sentence = ' '.join(corrected_words)
corrected_sentences.append(corrected_sentence)
# Join the corrected sentences to form the final essay
corrected_essay = ' '.join(corrected_sentences)
return corrected_essay
def correct_word(word):
# Lemmatize the word to get its base form
lemmatizer = WordNetLemmatizer()
lemma = lemmatizer.lemmatize(word)
# Find similar words based on Levenshtein distance
similar_words = []
for synset in wordnet.synsets(lemma):
for lemma_name in synset.lemma_names():
similar_words.append(lemma_name)
# Find the most similar word based on Levenshtein distance
min_distance = float('inf')
best_word = None
for similar_word in similar_words:
distance = nltk.edit_distance(word, similar_word)
if distance < min_distance:
min_distance = distance
best_word = similar_word
return best_word
# Example usage
essay = "I havv a bigg problm with speling. Plese help me correct my essay."
corrected_essay = autocorrect_essay(essay)
print(corrected_essay)
我尝试过的一些事情是删除返回空的块的部分,但没有任何改变。
你的函数
correct_word
可以返回None。当这种情况发生时,你最终会在你的列表中添加“None”,这会破坏事情的进展。
如果根本找不到任何合适的匹配项,您可以做的一件事就是返回原始单词。
这样的东西有效(我将
best_word = None
更改为best_word = word
)
def correct_word(word):
# Lemmatize the word to get its base form
lemmatizer = WordNetLemmatizer()
lemma = lemmatizer.lemmatize(word)
# Find similar words based on Levenshtein distance
similar_words = set()
for synset in wordnet.synsets(lemma):
for lemma_name in synset.lemma_names():
similar_words.add(lemma_name)
# Find the most similar word based on Levenshtein distance
min_distance = float('inf')
best_word = word # changed this line to make best_word start off as word instead of None
for similar_word in similar_words:
distance = nltk.edit_distance(word, similar_word)
if distance < min_distance:
min_distance = distance
best_word = similar_word
return best_word