[{'pf': 'ABC', 'src': 'SCI'}, {'pf': 'ABC', 'src': 'MC'}, {'pf': 'ASR', 'src': 'CC'}, {'pf': 'ASR', 'src': 'CC'}, {'pf': 'ASR', 'src': 'ACS'}, {'pf': 'ABC', 'src': 'CC'}, {'pf': 'NS', 'src': 'ATT'}]
是否有可能获得上述字典的输出?
{ platform:all, customers:[{ pf:ABC, src:[SCI,MC,CC] },{ pf:ASR, src:[CC,ACS] #CC - duplicates of ASR are removed ],{ pf:NS, src:[ATT] }
我尝试使用defaultdict(list),由于我想要的特定格式,它没有用。有帮助吗?
itertools.groupby
完成,通过set
删除重复项。例如:
from pprint import pprint
from itertools import groupby
l = [{'pf': 'ABC', 'src': 'SCI'}, {'pf': 'ABC', 'src': 'MC'}, {'pf': 'ASR', 'src': 'CC'}, {'pf': 'ASR', 'src': 'CC'}, {'pf': 'ASR', 'src': 'ACS'}, {'pf': 'ABC', 'src': 'CC'}, {'pf': 'NS', 'src': 'ATT'}]
out = {'customers': [], 'platform': 'all'}
for v, g in groupby(sorted(l, key=lambda k: k['pf']), lambda k: k['pf']):
out['customers'].append( {'pf': v, 'src': list(set(val['src'] for val in g))} )
pprint(out)
打印:
{'customers': [{'pf': 'ABC', 'src': ['MC', 'SCI', 'CC']}, {'pf': 'ASR', 'src': ['ACS', 'CC']}, {'pf': 'NS', 'src': ['ATT']}], 'platform': 'all'}
from collections import defaultdict
from pprint import pprint
l = [{'pf': 'ABC', 'src': 'SCI'}, {'pf': 'ABC', 'src': 'MC'}, {'pf': 'ASR', 'src': 'CC'}, {'pf': 'ASR', 'src': 'CC'}, {'pf': 'ASR', 'src': 'ACS'}, {'pf': 'ABC', 'src': 'CC'}, {'pf': 'NS', 'src': 'ATT'}]
# use of set so that we have only unique values of 'src' for respective 'pf'
pf_dict = defaultdict(set)
for data_dict in l:
# with default dict if key does not exist it will create a key in dict with
# one element in set, and if key is present it will add new value to already
# existing set against this key.
pf_dict[data_dict['pf']].add(data_dict['src'])
customers = []
# converting all sets of 'src' against respective 'pf' to list
for key,value in pf_dict.items():
customers.append({'pf':key, 'src': list(value)})
result = {
'platform': 'all',
'customers': customers
}
pprint(result)
自从您提到defaultdict
,以下是使用它来避免重复的方法
from collections import defaultdict
from pprint import pprint
ds = [
{'pf': 'ABC', 'src': 'SCI'},
{'pf': 'ABC', 'src': 'MC'},
{'pf': 'ASR', 'src': 'CC'},
{'pf': 'ASR', 'src': 'CC'},
{'pf': 'ASR', 'src': 'ACS'},
{'pf': 'ABC', 'src': 'CC'},
{'pf': 'NS', 'src': 'ATT'}
]
pfs = defaultdict(set)
for e in ds:
pfs[e['pf']].add(e['src'])
pprint(pfs)
out = {'customers': [{'pf': k, 'src': i} for k,i in pfs.items()],
'platform': 'all'}
pprint(out)
产生
defaultdict(<class 'set'>, {'ABC': {'MC', 'SCI', 'CC'}, 'ASR': {'ACS', 'CC'}, 'NS': {'ATT'}}) {'customers': [{'pf': 'ABC', 'src': {'MC', 'SCI', 'CC'}}, {'pf': 'ASR', 'src': {'ACS', 'CC'}}, {'pf': 'NS', 'src': {'ATT'}}], 'platform': 'all'}