从两个数据帧创建字典groupby

问题描述 投票:0回答:1

当 Back 等于 1 时,我想实现以下目标。

逻辑如下: 8582是主节点级别为1,有4个子节点(8584、8593、8585、8586),每个子节点都有子节点 想法是在 json 结构或树中进行分组,以便能够可视化结构和金额总和我有以下代码,但这并不完全是我想要得到的:

import pandas as pd
from collections import defaultdict
import json

data = {
    'Nod': [8582, 8586, 8585, 8593, 8584, 8590, 8583, 8597, 8587, 8674, 8589, 8588],
    'Levels': [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3],
    'Parents': [None, 8582, 8582, 8582, 8582, 8586, 8593, 8584, 8585, 8586, 8586, 8585],
    'Names': ['Xhelp', 'Hd', 'Ejjd', 'Mmmm', 'Awe', 'Urj', 'Bdh', 'Ddj', 'Lsk', 'Bws', 'Jsk', 'Pqq'],
    'ID': [90, 89, 92, 85, 37, 28, 19, 34, 11, 83, 433, 37]
}

df1 = pd.DataFrame(data)

data2 = {
    'ID': [10, 90, 89, 92, 85, 37, 28, 19, 34, 11, 83, 433, 433, 19],
    'Amounts': [1288, 998, 7338, 9337, 784, 3884, 399, 8559, 5146, 9348, 111, 8445, 40, 90],
    'Back': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2]
}
df2 = pd.DataFrame(data2)


def nested_dict():
    return defaultdict(nested_dict)


tree = nested_dict()

for _, row in df1.iterrows():
    node = {
        "Levels": row["Levels"],
        "Names": row["Names"],
        "ID": row["ID"],
        "Amounts": 0,
    }

    parent_id = row["Parents"]
    if parent_id is None:
        tree[row["Nod"]] = node
    else:
        parent_node = tree[parent_id]
        parent_node["children"][row["Nod"]] = node

for _, row in df2.iterrows():
    node_id = row["ID"]
    amount = row["Amounts"]
    if node_id in tree:
        tree[node_id]["Amounts"] += amount

json_tree = json.dumps(tree, indent = 2)

print(json_tree)

谢谢

python pandas tree pivot-table
1个回答
0
投票

你可以尝试:

# first merge the df2 into df1 (only values where df2.Back == 1)
df1 = df1.merge(df2[df2.Back == 1], on="ID")

# create a tree structure
dct, index = {}, {}
for _, row in df1.iterrows():
    a = row["Amounts"]
    if pd.isna(row["Parents"]):
        dct[row["Nod"]] = index[row["Nod"]] = {"Amounts": a}
    else:
        if row["Parents"] not in index:
            index[row["Parents"]] = {}
        index[row["Parents"]][row["Nod"]] = index[row["Nod"]] = {"Amounts": a}


def sum_dct(dct):
    a = 0
    for k, v in dct.items():
        if k != "Amounts":
            a += sum_dct(v)

    dct["Amounts"] = dct.get("Amounts", 0) + a
    return dct["Amounts"]

# sum the nodes
for v in dct.values():
    sum_dct(v)

print(dct)

打印:

{
    8582: {
        "Amounts": 58233,
        8586: {
            "Amounts": 16293,
            8590: {"Amounts": 399},
            8674: {"Amounts": 111},
            8589: {"Amounts": 8445},
        },
        8585: {"Amounts": 22569, 8588: {"Amounts": 3884}, 8587: {"Amounts": 9348}},
        8593: {"Amounts": 9343, 8583: {"Amounts": 8559}},
        8584: {"Amounts": 9030, 8597: {"Amounts": 5146}},
    }
}

合并后的数据框如下所示:

     Nod  Levels  Parents  Names   ID  Amounts  Back
0   8582       1      NaN  Xhelp   90      998     1
1   8586       2   8582.0     Hd   89     7338     1
2   8585       2   8582.0   Ejjd   92     9337     1
3   8593       2   8582.0   Mmmm   85      784     1
4   8584       2   8582.0    Awe   37     3884     1
5   8588       3   8585.0    Pqq   37     3884     1
6   8590       3   8586.0    Urj   28      399     1
7   8583       3   8593.0    Bdh   19     8559     1
8   8597       3   8584.0    Ddj   34     5146     1
9   8587       3   8585.0    Lsk   11     9348     1
10  8674       3   8586.0    Bws   83      111     1
11  8589       3   8586.0    Jsk  433     8445     1
© www.soinside.com 2019 - 2024. All rights reserved.