此函数应该检查
colors
中子列表中的元素是否存在于food
中包含的子列表中,并根据categ_cols
中包含的颜色首字母创建字典
def group_by_category(categs_names, categs_subs, text_subs):
temp_dict = {name: [] for name in categs_names}
for categ_index, category_list in enumerate(categs_subs):
for substring in category_list:
for sublist in text_subs:
for text in sublist:
if substring in text:
temp_dict[categs_names[categ_index]].append(text)
else:
temp_dict[categs_names[categ_index]].append('None')
return temp_dict
categs_cols = ['y', 'r', 'g']
food = [['banana is yellow', 'apple is red', 'pear is green' ] , ['lettuce is green' ,'pasta is yellowish','sugar is brown'] ]
colors = [[ 'yellow', 'yellowish' ], ['red'], ['green']]
grouped_cols = group_by_category(categs_cols, colors, food)
print(grouped_cols)
预期结果如下
{'y': ['banana is yellow', 'pasta is yellowish'], 'r': ['None', 'apple is red'],
'g': ['pear is green', 'lettuce is green']}
但我却得到了这个
{'y': ['banana is yellow', 'None', 'None', 'None', 'pasta is yellowish', 'None', 'None', 'None', 'None', 'None', 'pasta is yellowish', 'None'], 'r': ['None', 'apple is red', 'None', 'None', 'None', 'None'], 'g': ['None', 'None', 'pear is green', 'lettuce is green', 'None', 'None']}
我尝试在下面使用
break
if substring in text:
temp_dict[categs_names[categ_index]].append(text)
但是没有效果
每当食物项与当前关键字不匹配时,else 语句将添加 None 。相反,您想要的是检查当前category_list的最末尾(即最外层循环的末尾)是否匹配列表为空,然后添加 None 。结果应该是这样的:
def group_by_category(categs_names, categs_subs, text_subs):
temp_dict = dict()
for categ_index, category_list in enumerate(categs_subs):
matches = [] # Here is the list of matches
for substring in category_list:
for sublist in text_subs:
for text in sublist:
if substring in text:
matches.append(text)
if len(matches) == 0: # Only add None if the search came up empty
matches.append("None")
temp_dict[categs_names[categ_index]] = matches
return temp_dict
categs_cols = ['y', 'r', 'g', 'e']
food = [['banana is yellow', 'apple is red', 'pear is green' ] , ['lettuce is green' ,'pasta is yellowish','sugar is brown']]
colors = [[ 'yellow', 'yellowish' ], ['red'], ['green'], ['empty']]
grouped_cols = group_by_category(categs_cols, colors, food)
print(grouped_cols)
""" >>> {'y': ['banana is yellow', 'pasta is yellowish', 'pasta is yellowish'], 'r': ['apple is red'], 'g': ['pear is green', 'lettuce is green'], 'e': ['None']} """
如果您通过
zip()
和扁平化理解进行一些输入重塑,那么您可以执行以下操作:
def group_by_category(categs_names, categs_subs, text_subs):
retval = {}
for categs_id, categs_words in zip(categs_names, categs_subs):
for text_sub in [cell for row in text_subs for cell in row]:
if any(word in text_sub for word in categs_words):
retval.setdefault(categs_id, []).append(text_sub)
return retval
categs_cols = [
'y',
'r',
'g'
]
colors = [
['yellow', 'yellowish'],
['red'],
['green']
]
food = [
['banana is yellow', 'apple is red', 'pear is green' ],
['lettuce is green' ,'pasta is yellowish','sugar is brown']
]
print(group_by_category(categs_cols, colors, food))
给你:
{
'y': ['banana is yellow', 'pasta is yellowish'],
'r': ['apple is red'],
'g': ['pear is green', 'lettuce is green']
}