我有一个Python代码,可以在列表字典中搜索字符串列表。目前有效;但我的代码遍历字典中的每个项目,然后尝试查找与子列表匹配的列表。如果字典很大,我的代码就不会高效。有没有更好/有效的方法来做到这一点?
我的Python代码:
from collections import defaultdict
import numpy as np
def matchingStringsByKeys(dictionary, searchString):
return [val for key, val in dictionary.items() if searchString in key]
def getsublist(firstArr, secondArr):
# Maps elements of firstArr[]
# to their respective indices
mp = {}
print('len(firstArr): %s' % str(len(firstArr)))
# Traverse the array firstArr[]
for i in range(len(firstArr)):
mp[firstArr[i]] = i + 1
# Stores the indices of common elements
# between firstArr[] and secondArr[]
tempArr = []
# Traverse the array secondArr[]
for i in range(len(secondArr)):
returned_keys = matchingStringsByKeys(mp, secondArr[i])
if (len(returned_keys) > 0):
print('Match found for: %s' % secondArr[i])
for k in returned_keys:
tempArr.append(k)
tail = []
tempArrLen = len(tempArr)
if ( tempArrLen > 0):
# Stores lIS from tempArr[]
tail.append(tempArr[0])
for i in range(1, len(tempArr)):
if (tempArr[i] > tail[-1]):
tail.append(tempArr[i])
elif (tempArr[i] < tail[0]):
tail.insert(0, tempArr[i])
else :
it = bisect_left(tail, tempArr[i])
tail.insert(it, tempArr[i])
else:
print('Did not find a match')
return tail
#keyword_list = ['men', 'boy']
keyword_list = ['men', 'boy', 'dog']
#db_data={'male':['man', 'men'], 'child':['boy','lad', 'girl'], 'animal':['cat','dog']}
db_data={'male':['1:man', '2:men'], 'child':['3:boy','4:lad', '5:girl'], 'animal':['6:cat','7:dog, puppy']}
output = defaultdict(list)
for key, value in db_data.items():
print('\nchecking in: %s\n ' % (value))
a = np.array(value)
b = a.ravel()
result = getsublist(b, keyword_list)
print('Result: %s' % (result))
for x in result:
output[key].append(b[x-1])
print(output)
我建议做逆向
db_data
,这样搜索就很容易了:
inv_db = {}
for k, v in db_data.items():
for vv in v:
inv_db.setdefault(vv, []).append(k)
# inv_db is now:
{
"man": ["male"],
"men": ["male"],
"boy": ["child"],
"lad": ["child"],
"girl": ["child"],
"cat": ["animal"],
"dog": ["animal"],
}
然后:
keyword_list = ["men", "boy"]
out = {}
for k in keyword_list:
for v in inv_db.get(k, []):
out.setdefault(v, []).append(k)
print(out)
打印:
{'male': ['men'], 'child': ['boy']}