我正在读取JSON文件,并尝试从每个log
中提取以下信息并将其存储在Excel工作表中。
["name","TOM DOE"]
["value","132"]
我的python代码能够从JSON中提取其他一些必需的信息。但是,由于JSON包含多个嵌套列表和字典,因此我无法获取上述两个信息并进行存储。请任何人帮助我提供python代码。
这是我的python代码:
import json
import pandas
class ConvertToExcel:
def Process(self):
with open('C:/Users/Desktop/SampleTestFiles/new17.json') as json_file:
dataarray = json.load(json_file)
data1 = []
logcount = 0
for data in dataarray:
logcount = logcount + 1
for i in range(len(data['log'])):
code = data['log'][i]['code']
message = data['log'][i]['message']
try:
cid = data['log'][i]['message']['cid']
except:
cid = 0
text = data['log'][i]['message']['text']
refs = data['log'][i]['refs']
for k in range(len(data['log'][i]['refs'])):
try:
hrefs = data['log'][i]['refs'][k]['href']
except:
hrefs =''
try:
hrefsFacts = data['log'][i]['refs'][k]['href']
except:
hrefsFacts =''
# print(type(data['log'][i]['refs'][k]['href']['properties']))
# res1 = 'name' in chain(*data['log'][i]['refs'][k]['href']['properties'])
# for elem in len(data['log'][i]['refs'][k]['href']['properties']):
# for item in elem:
# if(item == 'val'):
# hrefspropertiesvalue = item
# else:
# pass
hrefspropertiesvalue = 'a'
level = data['log'][i]['level']
data1.append((logcount,i, code, message,cid, text, refs,hrefs,hrefsFacts, hrefspropertiesvalue, level))
pandas.DataFrame(data1, columns=['Log', 'Innerlog', 'code', 'message','cid','text','refs','hrefs','hrefsFacts','hrefspropertiesvalue', 'level']).to_excel("output.xlsx")
A = ConvertToExcel()
A.Process()
和JSON:
[{ "log": [
{
"code": "nikv.F1.all.1",
"message": {
"cid": "61785360",
"filing_url": "C:\\Users\\farizaleta\\Desktop\\test-428-2016Q4F1.abcd",
"severity": "error",
"text": "[nikv.F1.all.1] The values of 6,075,786 for the elements nikv:OtherChargestested is duplicated in the filing 2 times.\n\nElement : nikv:OtherChargestested\nPeriod : 2016-01-01 to 2016-12-31\n\n\nRule Id:nikv.F1.all.1 - test-428-2016Q4F1.abcd 4122"
},
"refs": [
{
"href": "test-428-2016Q4F1.abcd#f-743",
"sourceLine": 4122,
"properties": [
[
"label",
"Other charges, tested"
],
[
"namespace",
"http://nikv.com/form/2002-01-01/nikv"
],
[
"name",
"TOM DOE"
],
[
"QName",
"nikv:OtherChargestested"
],
[
"contextRef",
"c-01",
[
[
"entity",
"C002089",
[
[
"scheme",
"http://tested.com/entity/identification/scheme"
]
]
],
[
"startDate",
"2016-01-01"
],
[
"endDate",
"2016-12-31"
],
[
"dimensions",
"(1)",
[
[
"nikv:OfficerAxis",
"<nikv:OfficerDomain>0-1</nikv:OfficerDomain>\n\t\t\t\t\n"
]
]
]
]
],
[
"unitRef",
"u-02",
[
[
"measure",
"iso4217"
]
]
],
[
"decimals",
"INF"
],
[
"precision",
"None"
],
[
"xsi:nil",
"false"
],
[
"value",
"132"
]
],
"objectId": "91269"
}
],
"level": "error"
}]
}]
您可以像这样遍历属性:
for type, val, *_ in data['log'][i]['refs'][k]['href']['properties']:
[*_
忽略properties
子列表中的任何其他元素。
然后您可以测试type
是name
还是value
并设置适当的变量。]>
def Process(self): with open('C:/Users/Desktop/SampleTestFiles/new17.json') as json_file: dataarray = json.load(json_file) data1 = [] logcount = 0 for data in dataarray: logcount = logcount + 1 for x in data['log']: code = x['code'] message = x['message'] try: cid = x['message']['cid'] except: cid = 0 text = x['message']['text'] refs = x['refs'] name = '' value = '' for ref in x['refs']: try: hrefs = ref['href'] except: hrefs ='' try: hrefsFacts = ref['href'] except: hrefsFacts ='' for type, val, *_ in ref['properties']: if type = 'name': name = val elif type = 'value': value = val hrefspropertiesvalue = 'a' level = x['level'] data1.append((logcount,i, code, message,cid, text, refs,hrefs,hrefsFacts, hrefspropertiesvalue, level, name, value)) pandas.DataFrame(data1, columns=['Log', 'Innerlog', 'code', 'message','cid','text','refs','hrefs','hrefsFacts','hrefspropertiesvalue', 'level', 'name', 'value']).to_excel("output.xlsx")
我还简化了所有循环,以使用
for <variable> in <list>
代替for <indexvariable> in range(len(<list>))
。