使用python从文本文件到JSON文件

问题描述 投票:0回答:1

假设我有一个如下所示的txt文件(缩进为4个空格):

key1=value1
key2
    key2_1=value2_1
    key2_2
        key2_2_1=value2_2_1
    key2_3=value2_3_1,value2_3_2,value2_3_3
key3=value3_1,value3_2,value3_3

我想将其转换为任何有效的 json,就像这样:

{
'key1':'value1',
'key2': {
    'key2_1':'value2_1',
    'key2_2':{
        'key2_2_1':'value2_2_1'
        },
    'key2_3':['value2_3_1','value2_3_2','value2_3_3']
    },
'key3':['value3_1','value3_2','value3_3']
}

我已经尝试过这个(我从另一篇文章中得到的):

# helper method to convert equals sign to indentation for easier parsing
def convertIndentation(inputString):
    indentCount = 0
    indentVal = "    "
    for position, eachLine in enumerate(inputString):
        if "=" not in eachLine:
            continue
        else:
            strSplit = eachLine.split("=", 1)
            #get previous indentation
            prevIndent = inputString[position].count(indentVal)
            newVal = (indentVal * (prevIndent + 1)) + strSplit[1]
            inputString[position] = strSplit[0] + '\n'
            inputString.insert(position+1, newVal)
    flatList = "".join(inputString)
    return flatList

# helper class for node usage
class Node:
    def __init__(self, indented_line):
        self.children = []
        self.level = len(indented_line) - len(indented_line.lstrip())
        self.text = indented_line.strip()

    def add_children(self, nodes):
        childlevel = nodes[0].level

        while nodes:
            node = nodes.pop(0)
            if node.level == childlevel: # add node as a child
                self.children.append(node)
            elif node.level > childlevel: # add nodes as grandchildren of the last child
                nodes.insert(0,node)
                self.children[-1].add_children(nodes)
            elif node.level <= self.level: # this node is a sibling, no more children
                nodes.insert(0,node)
                return

    def as_dict(self):
        if len(self.children) > 1:
            return {self.text: [node.as_dict() for node in self.children]}
        elif len(self.children) == 1:
            return {self.text: self.children[0].as_dict()}
        else:
            return self.text

# process our file here
with open(filename, 'r') as fh:
    fileContent = fh.readlines()
    fileParse = convertIndentation(fileContent)
    # convert equals signs to indentation
    root = Node('root')
    root.add_children([Node(line) for line in fileParse.splitlines() if line.strip()])
    d = root.as_dict()['root']
    # this variable is storing the json output
    jsonOutput = json.dumps(d, indent = 4, sort_keys = False)
    print(jsonOutput)

产生以下结果:

[
    {
        "key1": "value1"
    },
    {
        "key2": [
            {
                "key2_1": "value2_1"
            },
            {
                "key2_2": {
                    "key2_2_1": "value2_2_1"
                }
            },
            {
                "key2_3": "value2_3_1,value2_3_2,value2_3_3"
            },
        ]
    },
    {
        "key3": "value3_1,value3_2,value3_3"
    }
]

但这仍然不是有效的 JSON 文件。

当我尝试使用“json”模块打开输出文件时,我收到此可预测的消息:“JSONDecodeError:期望属性名称用双引号括起来:第 10 行第 5 列(字符 165)”。

with open(r'C:\Users\nigel\OneDrive\Documents\LAB\lean\sample_01.02_R00.json', 'r', encoding='utf-8') as read_file:
    data = json.load(read_file)

输出:

JSONDecodeError                           Traceback (most recent call last)
Input In [2], in <cell line: 1>()
      1 with open(r'C:\Users\nigel\OneDrive\Documents\LAB\lean\sample_01.02_R00.json', 'r', encoding='utf-8') as read_file:
----> 2     data = json.load(read_file)

File ~\Anaconda3\lib\json\__init__.py:293, in load(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    274 def load(fp, *, cls=None, object_hook=None, parse_float=None,
    275         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    276     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    277     a JSON document) to a Python object.
    278 
   (...)
    291     kwarg; otherwise ``JSONDecoder`` is used.
    292     """
--> 293     return loads(fp.read(),
    294         cls=cls, object_hook=object_hook,
    295         parse_float=parse_float, parse_int=parse_int,
    296         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)

File ~\Anaconda3\lib\json\__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    341     s = s.decode(detect_encoding(s), 'surrogatepass')
    343 if (cls is None and object_hook is None and
    344         parse_int is None and parse_float is None and
    345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
    347 if cls is None:
    348     cls = JSONDecoder

File ~\Anaconda3\lib\json\decoder.py:337, in JSONDecoder.decode(self, s, _w)
    332 def decode(self, s, _w=WHITESPACE.match):
    333     """Return the Python representation of ``s`` (a ``str`` instance
    334     containing a JSON document).
    335 
    336     """
--> 337     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    338     end = _w(s, end).end()
    339     if end != len(s):

File ~\Anaconda3\lib\json\decoder.py:353, in JSONDecoder.raw_decode(self, s, idx)
    344 """Decode a JSON document from ``s`` (a ``str`` beginning with
    345 a JSON document) and return a 2-tuple of the Python
    346 representation and the index in ``s`` where the document ended.
   (...)
    350 
    351 """
    352 try:
--> 353     obj, end = self.scan_once(s, idx)
    354 except StopIteration as err:
    355     raise JSONDecodeError("Expecting value", s, err.value) from None

JSONDecodeError: Expecting property name enclosed in double quotes: line 10 column 5 (char 165)

原因是 JSON 期望在实际在其位置找到 json 对象(嵌套字典)时找到键(用双引号括起来的字符串)。就是这样!

我真的很感谢任何评论。最好的,

奈杰尔

python arrays json dictionary txt
1个回答
2
投票

对于登陆此页面的用户来说:我无法重现 OP 发布的错误。

json.dumps()
输出“bad json”的可能性非常小。

将字符串拆分为列表

我假设根据您的评论,您的意思是您想要获取字符串,例如这一行

key2_3=value2_3_1,value2_3_2,value2_3_3
并将这些值分解为
"key2_3": ["value2_3_1", "value2_3_2", "value2_3_3"]

为此,您必须对提供给您的代码进行以下调整:

def as_dict(self):
    if len(self.children) > 1:
        return {self.text: [node.as_dict() for node in self.children]}
    elif len(self.children) == 1:
        return {self.text: self.children[0].as_dict()}
    else:
        return self.text.split(",") # was self.text

字典的字典而不是列表

为了使输出字典成为具有列表节点基值的字典字典,即

{k1: {k2: [1, 2, 3]}}
等,我们必须进行 2 处更改。

  1. 更新as_dict方法以使用
    {}
    而不是
    []
  2. 包含压缩按键的功能。

当我这样做时,我很难输出正确的数据结构......它看起来基本上像这样,

{k1: {k1: {k2: {k2: value}}}}
。当您不在代码中运行
d = compress(root.as_dict()['root'])
(
d = root.as_dict()['root']
) 函数时,这一点会变得很明显。所以代码来自

def as_dict(self):
    if len(self.children) > 1:
        return {self.text: [node.as_dict() for node in self.children]}
    elif len(self.children) == 1:
        return {self.text: self.children[0].as_dict()}
    else:
        return self.text.split(",") if "," in self.text else self.text

至:

def as_dict(self):
    if len(self.children) > 1:
        return {self.text: {node.text: node.as_dict() for node in self.children}}
    elif len(self.children) == 1:
        return {self.text: self.children[0].as_dict()}
    else:
        return self.text.split(",") if "," in self.text else self.text

然后我添加了压缩功能:

# for merging like sub keys and values
def compress(dictionary):
    if isinstance(dictionary, dict):
        for k, v in dictionary.items():
            if isinstance(v, dict):
                if k in v.keys():
                    dictionary[k] = dictionary[k].pop(k)
                compress(dictionary[k])
            compress(k)
    return dictionary

完整代码

如果将以下内容放入文件中并从命令行运行它,它应该 100% 工作。否则,可能是 Anaconda 或 Python 版本的问题(尽管这看起来不太可能)。

from io import StringIO
import json

# for merging like sub keys and values
def compress(dictionary):
    if isinstance(dictionary, dict):
        for k, v in dictionary.items():
            if isinstance(v, dict):
                if k in v.keys():
                    dictionary[k] = dictionary[k].pop(k)
                compress(dictionary[k])
            compress(k)
    return dictionary

# helper method to convert equals sign to indentation for easier parsing
def convertIndentation(inputString):
    indentCount = 0
    indentVal = "    "
    for position, eachLine in enumerate(inputString):
        if "=" not in eachLine:
            continue
        else:
            strSplit = eachLine.split("=", 1)
            #get previous indentation
            prevIndent = inputString[position].count(indentVal)
            newVal = (indentVal * (prevIndent + 1)) + strSplit[1]
            inputString[position] = strSplit[0] + '\n'
            inputString.insert(position+1, newVal)
    flatList = "".join(inputString)
    return flatList



# helper class for node usage
class Node:
    def __init__(self, indented_line):
        self.children = []
        self.level = len(indented_line) - len(indented_line.lstrip())
        self.text = indented_line.strip()
    def add_children(self, nodes):
        childlevel = nodes[0].level
        while nodes:
            node = nodes.pop(0)
            if node.level == childlevel: # add node as a child
                self.children.append(node)
            elif node.level > childlevel: # add nodes as grandchildren of the last child
                nodes.insert(0,node)
                self.children[-1].add_children(nodes)
            elif node.level <= self.level: # this node is a sibling, no more children
                nodes.insert(0,node)
                return
    def as_dict(self):
        if len(self.children) > 1:
            return {self.text: {node.text: node.as_dict() for node in self.children}}
        elif len(self.children) == 1:
            return {self.text: self.children[0].as_dict()}
        else:
            return self.text.split(",") if "," in self.text else self.text

if __name__ == "__main__":

    s = """
        key1=value1
        key2
            key2_1=value2_1
            key2_2
                key2_2_1
                    key2_2_1_1=value2_2_1_1
            key2_3=value2_3_1,value2_3_2,value2_3_3
        key3=value3_1,value3_2,value3_3
    """

    fh = StringIO(s)
    fileContent = fh.readlines()
    fileParse = convertIndentation(fileContent)
    # convert equals signs to indentation
    root = Node('root')
    root.add_children([Node(line) for line in fileParse.splitlines() if line.strip()])
    d = compress(root.as_dict()['root'])
    # this variable is storing the json output
    jsonOutput = json.dumps(d, indent=4, sort_keys=False)
    f = StringIO(jsonOutput)

    # load the "file"
    loaded = json.load(f)

    print(s)
    print(jsonOutput)
    print(loaded)
© www.soinside.com 2019 - 2024. All rights reserved.