在 Snowflake SQL 中提取嵌套键

问题描述 投票:0回答:1

我的需要是提取 Snowflake SQL 中的嵌套键。

我使用 LATERAL FLATTEN,我能够使用多个级别提取很多内容。现在是我不再有想法的时刻了。澄清一下,我知道如何在 Python 中执行此操作,但是这个特定任务应该使用纯 Snowflake SQL 或结合 Snowflake SQL + Snowflake 存储过程来完成,这一要求是因为其他仅具备 SQL 技能的团队成员应该是能够轻松跟随它。

WITH a AS
(
  SELECT {'asset_type': 'pU5HWtJlh4wTei', 'generated_time': 'feD6LmEhe1v91S8ThK7pKeS8lg9', 'hadron_id': 'wvnMQNOHmfyp9jkyKPBUvpMKI6', 'header': {'message_uuid': 'MPjRCvmDsbdhqEpA2O1wwh2cLxXp2I7bIcLb', 'time': 1491081167506}, 'md5': 'h38KEiEW9L21p1ICVLbHPeyiwT3Xs9K9', 'media_id': '72jv7XJ4', 'metadata': {'ffprobe': {'data': {'disposition': [{'key': 'phL8fWq', 'value': 'B'}, {'key': 'Iac', 'value': 'L'}, {'key': 'ZTTA1qc5', 'value': 'x'}, {'key': 'xNeqFDv', 'value': 'L'}, {'key': 'IlqS8w', 'value': '3'}, {'key': 'yXZBEnu', 'value': 'M'}, {'key': 'igwGWL', 'value': 'D'}, {'key': 'e5DJlCFqsz1dpAen', 'value': 'T'}, {'key': 'LV66Fp6Gw0Vgx1z', 'value': '4'}, {'key': 'ZmG7yT8lMyLxt', 'value': 'V'}, {'key': 'kUOcM1Kb892v', 'value': 'Y'}, {'key': 'WGeouRSfn2yVRffx', 'value': '2'}], 'info': [{'key': 'uQn6wzAZNM', 'value': 'qjyg'}, {'key': 'VboD2qXFsVSTRsG7', 'value': 'vCL0'}, {'key': 'RmsMAopEn', 'value': '6Nr18gH7QH'}, {'key': '6uY2ddNS1Kzw', 'value': 'NRo'}, {'key': 'YcNUiisISKpMn8', 'value': 'adIn'}, {'key': 'U55JXo0tb', 'value': 'qQQCzqx'}, {'key': '6jiqHekUs', 'value': 'u'}, {'key': 'j0rYLKX1ZI', 'value': 'eHsBiPbC'}, {'key': 'zdGn5NBUNRW', 'value': 'DwJkbk'}, {'key': 'CSInNzwZ', 'value': '9mGqDTdai'}, {'key': 'jhP0Grby', 'value': 'J'}, {'key': 'w6bIVymfJ', 'value': 'z'}], 'tags': [{'key': 'Xt8GwQlO', 'value': 'zQ8'}, {'key': 'VdQTTNnv1xzQ', 'value': 'pMk5jeEaG088qWZ'}, {'key': 'AWleaD9g', 'value': 'csrX4BA72bL'}]}, 'format': {'info': [{'key': 'NZgipc0Z', 'value': 'unyZuTBkK5JMti5usSpGozN5DNnGOaQAhv3qWQCUOVjs2tPsLUn7'}, {'key': 'Gqb6qx9Alm', 'value': 'c'}, {'key': 'CVV3MdaB8av', 'value': 'L'}, {'key': 'BSc0Rz9dl80', 'value': 'NetBQucXaJ9kHti64hXvinD'}, {'key': 'l55ewTmKE5JOt8X3', 'value': '2pPwLdwzhynjc40'}, {'key': 'lrNsIhxldj', 'value': 'YjgRiR6X'}, {'key': '8gs0qiRI', 'value': 'xbW2AzpYb'}, {'key': 'BYq8', 'value': 'qZDXHOcb'}, {'key': '5AQWG8oJ', 'value': 'kKpb59o'}, {'key': '4sYuU7HABCW', 'value': 'RG7'}], 'tags': [{'key': 'ovL4OtgMw20', 'value': 'dZhH'}, {'key': 'VNxmhchnq7wd6', 'value': '7o4'}, {'key': 'MHcGB3oft8RIgUejA', 'value': 'Ix5O'}, {'key': 'fyiXMsZ', 'value': 'uJW7dyVmxLGXs'}]}, 'video': {'disposition': [{'key': 'FWF5PSN', 'value': '5'}, {'key': 'zaF', 'value': 'X'}, {'key': '7MZAIgMi', 'value': 'a'}, {'key': 'lhiEyZS', 'value': 'H'}, {'key': 'rQd0r2', 'value': '5'}, {'key': 'VTgu0Af', 'value': 'M'}, {'key': 'GNUvRO', 'value': 'J'}, {'key': 'Ii4OiBw9x26XhK2F', 'value': '6'}, {'key': 'reWkrzdBoNCRr3U', 'value': 'T'}, {'key': 'mKaC9tEfFrh6o', 'value': 'd'}, {'key': 'OVZfGC0S3gXG', 'value': 'P'}, {'key': 'yOtXvBSGtEiNV3tw', 'value': 'b'}], 'info': [{'key': 'AlJEBfs5u0', 'value': 'yxWP'}, {'key': 'oDwI2jJgAsJ16du', 'value': 'SOOjtSTDWaIECxgUmJlMZrXit2nGbM25r152X3bsR'}, {'key': 'UFDLDza', 'value': 'rnZ8'}, {'key': 'WTEj621kM2', 'value': 'NXUnf'}, {'key': 'RpHt0Yv4QGX5LEa', 'value': 'ymSo'}, {'key': 'iTIvy8k5Jdx15yzi', 'value': 'iXB2'}, {'key': 'dDnstWKPG', 'value': 'vCSe5eGwCk'}, {'key': 'YVh3x', 'value': 'n3C'}, {'key': 'mnRasf', 'value': 'f36'}, {'key': 'h5kaYHNUt9c', 'value': 'fYX'}, {'key': 'KcJ9MwBU9xGp', 'value': 'RbB'}, {'key': 'iDtBvXK7qmXh', 'value': 'f'}, {'key': 'K1SiMBP1ST5xGmisnL6', 'value': '5yMnuun'}, {'key': 'Fk7y8tIHJCYcRo2NCyD8', 'value': 'P0eK'}, {'key': 'y3B9zsO', 'value': 'foU8mkX'}, {'key': 'uWnVD', 'value': 'dQ'}, {'key': 'coJul9M8gtetnce', 'value': '2pGM'}, {'key': 'ApGf', 'value': '8'}, {'key': 'UMoSm6', 'value': 'vrnQ'}, {'key': 'N2AUDTrPEqJ3goK', 'value': 'c'}, {'key': '1FROhO5iOL6p', 'value': 'BYOX'}, {'key': 'zQmfGwW7IbgFj3', 'value': 'DSUU'}, {'key': 'iL9fYrKYg', 'value': 'aNTqm1u'}, {'key': 'dvDqSoEpS', 'value': 'Z'}, {'key': '4IdFdxKHLS', 'value': 'dQY9uAZA'}, {'key': 'QURiBUctTGz', 'value': 'oVMZPT'}, {'key': 'lI7L8HxR', 'value': 'Dq0uNQ3LY'}, {'key': 'Lwi3SpWH', 'value': 'nhefdqn'}, {'key': 'zwBooRMeaxthf86Mizw', 'value': 'h'}, {'key': 'D8I9PjTHS', 'value': 'lEI'}], 'tags': [{'key': 'ZAFWnREV', 'value': 'r9y'}, {'key': 'u0TnyvWw7wSb', 'value': 'PitGFtXQtCaJ'}, {'key': 'KAhwRJW', 'value': 'FrKqpOw'}, {'key': 'QDUZLbtb', 'value': 'aIwtez0yWuG'}]}}}, 'path': 'NxUuTYUPuLURcPcZd5uqyQH1L5JhU2lfucLj', 'resolution': 'AY'} as a 
),
top_level AS
(
  SELECT key AS k, value AS val FROM a, LATERAL FLATTEN(input => a)
),
second_level AS
(
  SELECT k || '.' || key AS k, value AS val FROM top_level, LATERAL FLATTEN(input => val) 
  WHERE STARTSWITH(val, '{') 
  UNION ALL 
  SELECT k, val FROM top_level WHERE NOT STARTSWITH(val, '{') OR STARTSWITH(val, '{}')
),
third_level AS
(
  SELECT k || '.' || key AS k, value AS val FROM second_level, LATERAL FLATTEN(input => val) 
  WHERE STARTSWITH(val, '{') 
  UNION ALL 
  SELECT k, val FROM second_level WHERE NOT STARTSWITH(val, '{') OR STARTSWITH(val, '{}')
),
fourth_level AS
(
  SELECT k || '.' || key AS k, value AS val FROM third_level, LATERAL FLATTEN(input => val) 
  WHERE STARTSWITH(val, '{') 
  UNION ALL 
  SELECT k, val FROM third_level WHERE NOT STARTSWITH(val, '{') OR STARTSWITH(val, '{}')
)
SELECT * FROM fourth_level;

这是我的 JSON(具有相同结构的合成数据)。

/*
present nested VAL structure still left unparsed after several levels:
*/
 
K; VAL
metadata.ffprobe.data.tags; [   {     "key": "Xt8GwQlO",     "value": "zQ8"   },   {     "key": "VdQTTNnv1xzQ",     "value": "pMk5jeEaG088qWZ"   },   {     "key": "AWleaD9g",     "value": "csrX4BA72bL"   } ]
header.message_uuid;    "MPjRCvmDsbdhqEpA2O1wwh2cLxXp2I7bIcLb"
 
 
/*
desired result:
*/
 
metadata.ffprobe.data.tags.Xt8GwQlO
metadata.ffprobe.data.tags.VdQTTNnv1xzQ
metadata.ffprobe.data.tags.AWleaD9g
header.message_uuid

我在简短的示例中包含了 header.message_uuid,只是为了表明对于 VAL 列中没有嵌套结构的简单 K 值,结果应该是相同的。

我该如何进行?请指教。

sql json parsing snowflake-cloud-data-platform
1个回答
0
投票
WITH a AS (
  SELECT {'asset_type': 'pU5HWtJlh4wTei', 'generated_time': 'feD6LmEhe1v91S8ThK7pKeS8lg9', 'hadron_id': 'wvnMQNOHmfyp9jkyKPBUvpMKI6', 'header': {'message_uuid': 'MPjRCvmDsbdhqEpA2O1wwh2cLxXp2I7bIcLb', 'time': 1491081167506}, 'md5': 'h38KEiEW9L21p1ICVLbHPeyiwT3Xs9K9', 'media_id': '72jv7XJ4', 'metadata': {'ffprobe': {'data': {'disposition': [{'key': 'phL8fWq', 'value': 'B'}, {'key': 'Iac', 'value': 'L'}, {'key': 'ZTTA1qc5', 'value': 'x'}, {'key': 'xNeqFDv', 'value': 'L'}, {'key': 'IlqS8w', 'value': '3'}, {'key': 'yXZBEnu', 'value': 'M'}, {'key': 'igwGWL', 'value': 'D'}, {'key': 'e5DJlCFqsz1dpAen', 'value': 'T'}, {'key': 'LV66Fp6Gw0Vgx1z', 'value': '4'}, {'key': 'ZmG7yT8lMyLxt', 'value': 'V'}, {'key': 'kUOcM1Kb892v', 'value': 'Y'}, {'key': 'WGeouRSfn2yVRffx', 'value': '2'}], 'info': [{'key': 'uQn6wzAZNM', 'value': 'qjyg'}, {'key': 'VboD2qXFsVSTRsG7', 'value': 'vCL0'}, {'key': 'RmsMAopEn', 'value': '6Nr18gH7QH'}, {'key': '6uY2ddNS1Kzw', 'value': 'NRo'}, {'key': 'YcNUiisISKpMn8', 'value': 'adIn'}, {'key': 'U55JXo0tb', 'value': 'qQQCzqx'}, {'key': '6jiqHekUs', 'value': 'u'}, {'key': 'j0rYLKX1ZI', 'value': 'eHsBiPbC'}, {'key': 'zdGn5NBUNRW', 'value': 'DwJkbk'}, {'key': 'CSInNzwZ', 'value': '9mGqDTdai'}, {'key': 'jhP0Grby', 'value': 'J'}, {'key': 'w6bIVymfJ', 'value': 'z'}], 'tags': [{'key': 'Xt8GwQlO', 'value': 'zQ8'}, {'key': 'VdQTTNnv1xzQ', 'value': 'pMk5jeEaG088qWZ'}, {'key': 'AWleaD9g', 'value': 'csrX4BA72bL'}]}, 'format': {'info': [{'key': 'NZgipc0Z', 'value': 'unyZuTBkK5JMti5usSpGozN5DNnGOaQAhv3qWQCUOVjs2tPsLUn7'}, {'key': 'Gqb6qx9Alm', 'value': 'c'}, {'key': 'CVV3MdaB8av', 'value': 'L'}, {'key': 'BSc0Rz9dl80', 'value': 'NetBQucXaJ9kHti64hXvinD'}, {'key': 'l55ewTmKE5JOt8X3', 'value': '2pPwLdwzhynjc40'}, {'key': 'lrNsIhxldj', 'value': 'YjgRiR6X'}, {'key': '8gs0qiRI', 'value': 'xbW2AzpYb'}, {'key': 'BYq8', 'value': 'qZDXHOcb'}, {'key': '5AQWG8oJ', 'value': 'kKpb59o'}, {'key': '4sYuU7HABCW', 'value': 'RG7'}], 'tags': [{'key': 'ovL4OtgMw20', 'value': 'dZhH'}, {'key': 'VNxmhchnq7wd6', 'value': '7o4'}, {'key': 'MHcGB3oft8RIgUejA', 'value': 'Ix5O'}, {'key': 'fyiXMsZ', 'value': 'uJW7dyVmxLGXs'}]}, 'video': {'disposition': [{'key': 'FWF5PSN', 'value': '5'}, {'key': 'zaF', 'value': 'X'}, {'key': '7MZAIgMi', 'value': 'a'}, {'key': 'lhiEyZS', 'value': 'H'}, {'key': 'rQd0r2', 'value': '5'}, {'key': 'VTgu0Af', 'value': 'M'}, {'key': 'GNUvRO', 'value': 'J'}, {'key': 'Ii4OiBw9x26XhK2F', 'value': '6'}, {'key': 'reWkrzdBoNCRr3U', 'value': 'T'}, {'key': 'mKaC9tEfFrh6o', 'value': 'd'}, {'key': 'OVZfGC0S3gXG', 'value': 'P'}, {'key': 'yOtXvBSGtEiNV3tw', 'value': 'b'}], 'info': [{'key': 'AlJEBfs5u0', 'value': 'yxWP'}, {'key': 'oDwI2jJgAsJ16du', 'value': 'SOOjtSTDWaIECxgUmJlMZrXit2nGbM25r152X3bsR'}, {'key': 'UFDLDza', 'value': 'rnZ8'}, {'key': 'WTEj621kM2', 'value': 'NXUnf'}, {'key': 'RpHt0Yv4QGX5LEa', 'value': 'ymSo'}, {'key': 'iTIvy8k5Jdx15yzi', 'value': 'iXB2'}, {'key': 'dDnstWKPG', 'value': 'vCSe5eGwCk'}, {'key': 'YVh3x', 'value': 'n3C'}, {'key': 'mnRasf', 'value': 'f36'}, {'key': 'h5kaYHNUt9c', 'value': 'fYX'}, {'key': 'KcJ9MwBU9xGp', 'value': 'RbB'}, {'key': 'iDtBvXK7qmXh', 'value': 'f'}, {'key': 'K1SiMBP1ST5xGmisnL6', 'value': '5yMnuun'}, {'key': 'Fk7y8tIHJCYcRo2NCyD8', 'value': 'P0eK'}, {'key': 'y3B9zsO', 'value': 'foU8mkX'}, {'key': 'uWnVD', 'value': 'dQ'}, {'key': 'coJul9M8gtetnce', 'value': '2pGM'}, {'key': 'ApGf', 'value': '8'}, {'key': 'UMoSm6', 'value': 'vrnQ'}, {'key': 'N2AUDTrPEqJ3goK', 'value': 'c'}, {'key': '1FROhO5iOL6p', 'value': 'BYOX'}, {'key': 'zQmfGwW7IbgFj3', 'value': 'DSUU'}, {'key': 'iL9fYrKYg', 'value': 'aNTqm1u'}, {'key': 'dvDqSoEpS', 'value': 'Z'}, {'key': '4IdFdxKHLS', 'value': 'dQY9uAZA'}, {'key': 'QURiBUctTGz', 'value': 'oVMZPT'}, {'key': 'lI7L8HxR', 'value': 'Dq0uNQ3LY'}, {'key': 'Lwi3SpWH', 'value': 'nhefdqn'}, {'key': 'zwBooRMeaxthf86Mizw', 'value': 'h'}, {'key': 'D8I9PjTHS', 'value': 'lEI'}], 'tags': [{'key': 'ZAFWnREV', 'value': 'r9y'}, {'key': 'u0TnyvWw7wSb', 'value': 'PitGFtXQtCaJ'}, {'key': 'KAhwRJW', 'value': 'FrKqpOw'}, {'key': 'QDUZLbtb', 'value': 'aIwtez0yWuG'}]}}}, 'path': 'NxUuTYUPuLURcPcZd5uqyQH1L5JhU2lfucLj', 'resolution': 'AY'} as a 
),
top_level AS
(
  SELECT key AS k, value AS val FROM a, LATERAL FLATTEN(input => a)
),
second_level AS
(
  SELECT k || '.' || key AS k, value AS val FROM top_level, LATERAL FLATTEN(input => val) 
  WHERE STARTSWITH(val, '{') 
  UNION ALL 
  SELECT k, val FROM top_level WHERE NOT STARTSWITH(val, '{') OR STARTSWITH(val, '{}')
),
third_level AS
(
  SELECT k || '.' || key AS k, value AS val FROM second_level, LATERAL FLATTEN(input => val) 
  WHERE STARTSWITH(val, '{') 
  UNION ALL 
  SELECT k, val FROM second_level WHERE NOT STARTSWITH(val, '{') OR STARTSWITH(val, '{}')
),
fourth_level AS
(
  SELECT k || '.' || key AS k, value AS val FROM third_level, LATERAL FLATTEN(input => val) 
  WHERE STARTSWITH(val, '{') 
  UNION ALL 
  SELECT k, val FROM third_level WHERE NOT STARTSWITH(val, '{') OR STARTSWITH(val, '{}')
),
fifth_level AS (
    SELECT DE.K||'.'||VALUE:key::VARCHAR AS PARSED_KEY FROM fourth_level DE
              ,LATERAL FLATTEN(DE.VAL) F1
    UNION
    SELECT K as PARSED_KEY FROM fourth_level DE WHERE val:key[0] IS NULL
)
select distinct t.PARSED_KEY from fifth_level as t;
metadata.ffprobe.data.disposition.phL8fWq
metadata.ffprobe.data.disposition.Iac
metadata.ffprobe.data.disposition.ZTTA1qc5
metadata.ffprobe.data.disposition.xNeqFDv
metadata.ffprobe.data.disposition.IlqS8w
...
© www.soinside.com 2019 - 2024. All rights reserved.