AWS 胶水 ETL 作业-我希望我的函数将空值转换为“null”,当在键中时,存在任何数据类型,我想删除该键

问题描述 投票:0回答:0
def handle_data_type_recursive(rec):
    new_dict = {}

    if isinstance(rec, dict):
        for key, value in rec.items():
            if isinstance(value, dict):
                result = handle_data_type_recursive(value)
                if result:
                    new_dict[key] = result
            elif isinstance(value, list):
                arr = []
                for item in value:
                    if isinstance(item, dict):
                        result = handle_data_type_recursive(item)
                        if result:
                            arr.append(result)
                    elif item not in ["", None]:
                        arr.append(item)
                    else:
                        arr.append(None)
                if arr:
                    new_dict[key] = arr
            elif value not in ["", None]:
                new_dict[key] = value
            else:
                new_dict[key] = None
            
    return new_dict

    new_dict = {}

    if isinstance(rec, dict):
        for key, value in rec.items():
            if isinstance(key, type):
                key = str(key)
            if isinstance(value, dict):
                result = handle_data_type_recursive(value)
                if result:
                    new_dict[key] = result
            elif isinstance(value, list):
                arr = []
                for item in value:
                    if isinstance(item, dict):
                        result = handle_data_type_recursive(item)
                        if result:
                            arr.append(result)
                    elif item not in ["", None, "null"]:
                        arr.append(item)
                if arr:
                    new_dict[key] = arr
            elif value not in ["", None, "null"]:
                new_dict[key] = value
            
    return new_dict

json_data = '''
{
    "ptype#id": "USER#748f610a488a9327b9609b151a0fb3b2e4f23e8925fe89353d5fe8f5ce89d8ea",
    "stype#sk": "USER#748f610a488a9327b9609b151a0fb3b2e4f23e8925fe89353d5fe8f5ce89d8ea",
    "accountCreatedDate": "",
    "accountStatus": "",
    "anonymUserID": "4be17719-df5e-43e3-b2fe-c10c0a858b4c",
    "app": "OCM",
    "appOrigin": "OCM",
    "emailAddress": "[email protected]",
    "migration_time": 1679390468329,
    "ofs": {
        "accountStatus": "active",
        "attributes": {
            "accountType": "saving",
            "appSettings": {
                "string": "",
                "struct": ""
            },
            "ccPolicy": {
                "array": "",
                "string": ""
            },
            "country": "US",
            "goals": {
                "string": "",
                "struct": ""
            },
            "personalPref": {
                "string": "",
                "struct": ""
            },
            "phoneLocale": "en-us",
            "policy": {
                "df": [
                    {
                        "v1": 1,
                        "v2": "",
                        "v3": "",
                        "v4": "",
                        "v5": ""
                    }
                ],
                "string": ""
            },
            "subscriptionOC": {
                "autoRenewStatus": "",
                "pendingRenewalInfo": {
                    "expiryTimeMillis": "1672444800000",
                    "productId": "com.omronhealthcare.omronconnect.premium.monthly"
                },
                "subscriptionEndDate": "1672444800000",
                "subscriptionPackage": "com.omronhealthcare.omronconnect.premium.monthly",
                "subscriptionStartDate": 1650000000000,
                "subscriptionStatus": "subscribed",
                "subscriptionTrackPackage": [
                    {
                        "isManual": "1",
                        "packageName": "com.omronhealthcare.omronconnect.premium.monthly",
                        "subscribedDate": 1650000000000,
                        "subscriptionStartDate": 1650000000000
                    }
                ],
                "userLoginDate": "hai"
            },
            "weightRange": ""
        },
        "cloudOpt": {},
        "emailAddress": "[email protected]",
        "optIn": {
            "boolean": false
        },
        "walgreens": "hai mere pass"
    },
    "weightInsightRandomID": "hjgeg",
    "weightInsightWeeklyID": ""
}
'''

b = json.loads(json_data)
clean_dict = handle_data_type_recursive(b)
print(clean_dict)

我试图在同一个函数中添加另一个函数,但没有得到任何帮助

amazon-web-services apache-spark pyspark etl aws-glue
© www.soinside.com 2019 - 2024. All rights reserved.