我有以下 Oracle SQL 表,其中
product_id
101 和 103 分别在 attr1
和 attr3
列中具有不同的值。
data
| PRODUCT_ID | ATTR1 | ATTR2 | ATTR3 |
|------------|-------|-------|-------|
| 101 | a | x | z |
| 101 | a | x | zzz |
| 101 | aa | x | z |
| 102 | b | y | z |
| 102 | b | y | z |
| 103 | c | z | z |
| 103 | c | z | zz |
我想获得以下输出,其中列出了不同的值及其在列中的计数。
output
| PRODUCT_ID | DESCR | VALUE_COUNT |
|------------|------------------|--------------|
| 101 | Issue with attr1 | a(2), aa(1) |
| 101 | Issue with attr3 | z(2), zzz(1) |
| 103 | Issue with attr3 | z(1), zz(1) |
我写了一个查询来只获取一列的结果,但是要为实际数据编写它需要相当大的努力,我需要检查 20 多列不同的值。有什么建议可以提高效率吗?
query
WITH data AS (
SELECT 101 product_id, 'a' attr1, 'x' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 101 product_id, 'a' attr1, 'x' attr2, 'zzz' attr3 FROM dual UNION ALL
SELECT 101 product_id, 'aa' attr1, 'x' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 102 product_id, 'b' attr1, 'y' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 102 product_id, 'b' attr1, 'y' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 103 product_id, 'c' attr1, 'z' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 103 product_id, 'c' attr1, 'z' attr2, 'zz' attr3 FROM dual
), d1 AS (
SELECT product_id, 'Issue with attr1' descr
FROM data
GROUP BY product_id
HAVING COUNT(DISTINCT attr1) > 1
), d2 AS (
SELECT DISTINCT d1.product_id, d1.descr, data.attr1, COUNT(attr1) OVER (PARTITION BY attr1) cnt
FROM d1
INNER JOIN data
ON d1.product_id = data.product_id
)
SELECT product_id, descr, LISTAGG(attr1 || '(' || cnt || ')', ', ') WITHIN GROUP (ORDER BY product_id) value_count
FROM d2
GROUP BY product_id, descr
;
您可以将所有属性反透视到单独的行中,计算每个属性和值的行数,并将其与每个
product_id
的行数进行比较。然后用 listagg
. 聚合错误
这只需要向
inpivot ... for ...
添加更多列。
with prep as (
select
sample.*
/*Rowcount per product_id*/
, count(1) over(partition by product_id) as rowcnt
from sample
)
, unp as (
select
product_id,
/*Classify the issue*/
case
/*Count per value*/
when count(1) != rowcnt
then 'Issue with ' || col
end as issue,
/*Count per value*/
val || '(' || count(1) || ')' as cnt
from prep
unpivot(
val for col in (attr1, attr2, attr3)
)
group by
product_id,
col,
val,
rowcnt
)
/*Aggregate all*/
select
product_id,
issue,
listagg(cnt, ', ') as value_count
from unp
where issue is not null
group by
product_id,
issue
order by 1, 2
PRODUCT_ID | 问题 | VALUE_COUNT |
---|---|---|
101 | ATTR1 问题 | a(2), aa(1) |
101 | ATTR3 问题 | z(2), zzz(1) |
103 | ATTR3 问题 | z(1), zz(1) |
首先尝试 GROUPing,然后围绕它使用 LISTAGG。像这样的东西:
WITH data AS (
SELECT 101 product_id, 'a' attr1, 'x' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 101 product_id, 'a' attr1, 'x' attr2, 'zzz' attr3 FROM dual UNION ALL
SELECT 101 product_id, 'aa' attr1, 'x' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 102 product_id, 'b' attr1, 'y' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 102 product_id, 'b' attr1, 'y' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 103 product_id, 'c' attr1, 'z' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 103 product_id, 'c' attr1, 'z' attr2, 'zz' attr3 FROM dual
)
SELECT product_id,'Issue with attr1' descr,LISTAGG(value_count,',') WITHIN GROUP (ORDER BY value_count) value_count
FROM (SELECT product_id,
attr1||'('||COUNT(*)||')' value_count
FROM (SELECT product_id,
attr1,
COUNT(DISTINCT attr1) OVER (PARTITION BY product_id) cnt
FROM data)
WHERE cnt > 1
GROUP BY product_id,
attr1)
GROUP BY product_id
UNION ALL
SELECT product_id,'Issue with attr3' descr,LISTAGG(value_count,',') WITHIN GROUP (ORDER BY value_count) value_count
FROM (
SELECT product_id,
attr3||'('||COUNT(*)||')' value_count
FROM (SELECT product_id,
attr3,
COUNT(DISTINCT attr3) OVER (PARTITION BY product_id) cnt
FROM data)
WHERE cnt > 1
GROUP BY product_id,
attr3)
GROUP BY product_id
如果你没有太多的属性,你可以这样做:
WITH data AS (
SELECT 101 product_id, 'a' attr1, 'x' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 101 product_id, 'a' attr1, 'x' attr2, 'zz' attr3 FROM dual UNION ALL
SELECT 101 product_id, 'aa' attr1, 'x' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 102 product_id, 'b' attr1, 'y' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 102 product_id, 'b' attr1, 'y' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 103 product_id, 'c' attr1, 'z' attr2, 'z' attr3 FROM dual UNION ALL
SELECT 103 product_id, 'c' attr1, 'z' attr2, 'zz' attr3 FROM dual
), count_ as (SELECT product_id, COUNT(DISTINCT attr1) attr1
, COUNT(DISTINCT attr2) attr2
, COUNT(DISTINCT attr3) attr3
FROM data
GROUP BY product_id ),
d2 AS (
SELECT DISTINCT count_.product_id, 'Issue with attr1' descr , data.attr1, COUNT(data.attr1) OVER (PARTITION BY data.attr1) cnt
FROM count_
INNER JOIN data
ON count_.product_id = data.product_id AND count_.attr1 > 1
UNION ALL
SELECT DISTINCT count_.product_id, 'Issue with attr2' descr , data.attr2, COUNT(data.attr2) OVER (PARTITION BY data.attr2) cnt
FROM count_
INNER JOIN data
ON count_.product_id = data.product_id AND count_.attr2 > 1
UNION ALL
SELECT DISTINCT count_.product_id, 'Issue with attr3' descr , data.attr3, COUNT(data.attr3) OVER (PARTITION BY data.attr3) cnt
FROM count_
INNER JOIN data
ON count_.product_id = data.product_id AND count_.attr3 > 1
)
SELECT product_id, descr, LISTAGG(attr1 || '(' || cnt || ')', ', ') WITHIN GROUP (ORDER BY product_id) value_count
FROM d2
GROUP BY product_id, descr
;
PRODUCT_ID | 描述 | VALUE_COUNT |
---|---|---|
101 | attr1 问题 | a(2), aa(1) |
101 | attr3 问题 | z(3), zz(2) |
103 | attr3 问题 | z(3), zz(2) |