雪花查询运行时间太长

问题描述 投票:0回答:1
SELECT 
    category_tree_master__."category_id" AS category_id,
    --searchchildrenmaster.children,
    --ARRAY_CONTAINS(category_product_nb_stand_nb_product."category_id" ::varchar:: VARIANT ,TO_ARRAY(SPLIT(CAST(searchchildrenmaster."category_id" AS VARCHAR) || COALESCE (', ' || searchchildrenmaster.children, ''), ','))) AS check_,
    --category_product_nb_stand_nb_product.stands AS nb_stands,
    --category_product_nb_stand_nb_product.products AS nb_products
    COUNT(DISTINCT category_product_nb_stand_nb_product.stands) OVER (PARTITION BY category_tree_master__."category_id"),
    COUNT(DISTINCT category_product_nb_stand_nb_product.products) OVER (PARTITION BY category_tree_master__."category_id")
    --IFF (count (DISTINCT category_product_nb_stand_nb_product.stands)  IS NOT NULL , count (DISTINCT category_product_nb_stand_nb_product.stands) , 0) AS nb_stands,
    --IFF (count (DISTINCT category_product_nb_stand_nb_product.products)  IS NOT NULL , count (DISTINCT category_product_nb_stand_nb_product.products) , 0) AS nb_products
FROM 
    (SELECT 
         src."category_id",
         CAT_CHILDREN_LIST AS children
     FROM 
         (SELECT 
              EXPO_MASTER.category_tree_master."category_tree_master_id", 
              EXPO_MASTER.category_tree_master."category_id"
          FROM 
              EXPO_MASTER.category_tree_master
          INNER JOIN 
              (SELECT 
                   EXPO_MASTER.category_lang."category_id",  
                   EXPO_MASTER.category_lang."lang"
               FROM 
                   EXPO_MASTER.category_lang
               WHERE 
                   EXPO_MASTER.category_lang."lang" = 'fr') AS tmp
            ON EXPO_MASTER.category_tree_master."category_id" = tmp."category_id") src
            LEFT JOIN PERFORMANCE.SEARCH_CHILDREN_MASTER  children_cat
                ON CHILDREN_CAT."category_id"  = src."category_id" ) searchchildrenmaster
        INNER JOIN 
        (
        SELECT 
          EXPO_MASTER.category_tree_master."category_tree_master_id", 
          EXPO_MASTER.category_tree_master."category_id", 
          EXPO_MASTER.category_tree_master."parent_id",
          EXPO_MASTER.category_tree_master."gauche", 
          EXPO_MASTER.category_tree_master."droite", 
          EXPO_MASTER.category_tree_master."niveau", 
          EXPO_MASTER.category_tree_master_ref_plateforme_selection."selection_leadgen",
          EXPO_MASTER.category_tree_master_ref_plateforme_selection."selection_stand"
        FROM 
            EXPO_MASTER.category_tree_master
        JOIN 
            EXPO_MASTER.category_tree_master_ref_plateforme_selection 
            ON 
            (
                EXPO_MASTER.category_tree_master_ref_plateforme_selection."category_tree_master_id" = EXPO_MASTER.category_tree_master."category_tree_master_id" AND 
                EXPO_MASTER.category_tree_master_ref_plateforme_selection."ref_plateforme_id" = 1
            )
        )category_tree_master__ ON category_tree_master__."category_id" = 
        CASE WHEN searchchildrenmaster."category_id" IS NOT NULL THEN searchchildrenmaster."category_id" ELSE 1 END
         LEFT JOIN LATERAL
         (
            SELECT
              EXPO_MASTER.category_product."category_id",
              EXPO_MASTER.product."stand_id" AS stands,
              EXPO_MASTER.product."product_id" AS products
            FROM 
                EXPO_MASTER.category_product
            JOIN 
                EXPO_MASTER.product ON (EXPO_MASTER.product."product_id" = EXPO_MASTER.category_product."product_id")
                WHERE 
                ARRAY_CONTAINS(EXPO_MASTER.category_product."category_id" ::varchar:: VARIANT ,TO_ARRAY(SPLIT(CAST(searchchildrenmaster."category_id" AS VARCHAR) || COALESCE (', ' || searchchildrenmaster.children, ''), ',')))
                    )category_product_nb_stand_nb_product
                --GROUP BY category_tree_master__."category_id"

我使用此查询来计算

EXPO_MASTER.category_tree_master."category_id"
中每个类别的摊位数量和产品数量,其中
EXPO_MASTER.category_product
位于值列表中。

这就是查询,我尝试选择不聚合的列,它工作正常,但是当我尝试使用分区依据或分组依据进行计数时,查询需要永远运行(大约6-8分钟)。

有人可以告诉我问题是什么以及如何解决吗?

sql join optimization snowflake-cloud-data-platform lateral-join
1个回答
0
投票

好的,让我们首先重写 SQL,使其对我来说更具可读性,我会将双引号中的内容重命名为正常,并指出未使用的列,这些 IFF 可以交换为 ZeroifNull,并让我们使用一些可读的大小别名(只是为了保持一切流畅):

SELECT 
    ctm_.category_id
    --,scm.children,
    --,ARRAY_CONTAINS(cp_s_p.category_id ::varchar::variant ,  scm.array_of_ids  ) AS check_
    --,cp_s_p.stands AS nb_stands
    --,cp_s_p.products AS nb_products
    ,COUNT(DISTINCT cp_s_p.stands) OVER (PARTITION BY ctm_.category_id)
    ,COUNT(DISTINCT cp_s_p.products) OVER (PARTITION BY ctm_.category_id)
    --,zeroifnull(count (DISTINCT cp_s_p.stands)) AS nb_stands
    --,zeroifnull(count (DISTINCT cp_s_p.products)) AS nb_products
FROM (
    SELECT 
        src.category_id,
        NVL(src.category_id, 1) as category_id_or_one,
        cc.cat_children_list AS children,
        SPLIT( scm."category_id"::VARCHAR || COALESCE (', ' || scm.children, ''), ',') as array_of_ids
     FROM (
        SELECT 
            --ctm_a."category_tree_master_id" AS category_tree_master_id, /* not used in scm select */
            ctm_a."category_id" AS category_id
        FROM expo_master.category_tree_master AS ctm_a
        JOIN (
            SELECT 
                cl."category_id" AS category_id  
                --cl."lang" AS lang /* this was not used */
            FROM expo_master.category_lang AS cl
            WHERE cl."lang" = 'fr'
        ) AS tmp
            ON ctm_a.category_id = tmp.category_id
    ) AS src
    LEFT JOIN performance.search_children_master AS cc
        ON cc."category_id"  = src.category_id
) as scm /*searchchildrenmaster*/
JOIN (
    SELECT 
        -- ctm_b."category_tree_master_id" /* not used */
        ctm_b."category_id" as category_id /*,*/ 
        --ctm_b."parent_id", /* not used */
        --ctm_b."gauche", /* not used */
        --ctm_b."droite", /* not used */
        --ctm_b."niveau", /* not used */
        --ctmrps."selection_leadgen", /* not used */
        --ctmrps."selection_stand"  /* not used */
    FROM expo_master.category_tree_master as ctm_b 
    JOIN expo_master.category_tree_master_ref_plateforme_selection as ctmrps
        ON ctmrps."category_tree_master_id" = ctm_b."category_tree_master_id" 
            AND ctmrps."ref_plateforme_id" = 1
) as ctm_ /*category_tree_master__ */
    ON ctm_.category_id = scm.category_id_or_one
LEFT JOIN LATERAL (
    SELECT
        cp."category_id" as category_id,
        p."stand_id" AS stands,
        p."product_id" AS products
    FROM expo_master.category_product AS cp
    JOIN EXPO_MASTER.product AS p 
        ON p."product_id" = cp."product_id"
    WHERE ARRAY_CONTAINS(cp."category_id" ::varchar::variant , scm.array_of_ids)
) as cp_s_p /*category_product_nb_stand_nb_product*/
--GROUP BY ctm_.category_id

我非常倾向于从 ARRAY_CONTAINS 更改为展平该“数组”并对其进行正常的等连接,并且可能在子选择上添加几个 DISTINCT 以避免已知的重复爆炸:

SELECT 
    ctm_.category_id
    --,scm.children,
    --,cp_s_p.stands AS nb_stands
    --,cp_s_p.products AS nb_products
    ,COUNT(DISTINCT cp_s_p.stands) OVER (PARTITION BY ctm_.category_id)
    ,COUNT(DISTINCT cp_s_p.products) OVER (PARTITION BY ctm_.category_id)
    --,zeroifnull(count (DISTINCT cp_s_p.stands)) AS nb_stands
    --,zeroifnull(count (DISTINCT cp_s_p.products)) AS nb_products
FROM (
    SELECT 
        src.category_id,
        NVL(src.category_id, 1) as category_id_or_one,
        --cc.cat_children_list AS children,
        try_to_number(trim(s.value)) as split_ids
     FROM (
        SELECT 
            ctm_a."category_id" AS category_id
        FROM expo_master.category_tree_master AS ctm_a
        JOIN (
            SELECT 
                cl."category_id" AS category_id
            FROM expo_master.category_lang AS cl
            WHERE cl."lang" = 'fr'
        ) AS tmp
            ON ctm_a.category_id = tmp.category_id
    ) AS src
    LEFT JOIN performance.search_children_master AS cc
        ON cc."category_id"  = src.category_id
    ,LATERAL SPLIT_TO_TABLE( scm."category_id"::VARCHAR || COALESCE (',' || cc.cat_children_list, ''), ',') as s
) as scm /*searchchildrenmaster*/
JOIN (
    SELECT 
        ctm_b."category_id" as category_id
    FROM expo_master.category_tree_master as ctm_b 
    JOIN expo_master.category_tree_master_ref_plateforme_selection as ctmrps
        ON ctmrps."category_tree_master_id" = ctm_b."category_tree_master_id" 
            AND ctmrps."ref_plateforme_id" = 1
) as ctm_ /*category_tree_master__ */
    ON ctm_.category_id = scm.category_id_or_one
LEFT JOIN (
    SELECT DISTINCT
        cp."category_id" as category_id,
        p."stand_id" AS stands,
        p."product_id" AS products
    FROM expo_master.category_product AS cp
    JOIN EXPO_MASTER.product AS p 
        ON p."product_id" = cp."product_id"
) as cp_s_p /*category_product_nb_stand_nb_product*/
    ON cp_s_p.category_id = scm.split_ids
--GROUP BY ctm_.category_id
© www.soinside.com 2019 - 2024. All rights reserved.