我有一张有结构的大桌子
CREATE TABLE t (
id SERIAL primary key ,
a_list int[] not null,
b_list int[] not null,
c_list int[] not null,
d_list int[] not null,
type int not null
)
我想查询
a_list
、b_list
、c_list
、d_list
中 type
的所有唯一值,如下所示
select
some_array_unique_agg_function(a_list),
some_array_unique_agg_function(b_list),
some_array_unique_agg_function(c_list),
some_array_unique_agg_function(d_list),
count(1)
where type = 30
例如对于这个数据
+----+---------+--------+--------+---------+------+
| id | a_list | b_list | c_list | d_list | type |
+----+---------+--------+--------+---------+------+
| 1 | {1,3,4} | {2,4} | {1,1} | {2,4,5} | 30 |
| 1 | {1,2,4} | {2,4} | {4,1} | {2,4,5} | 30 |
| 1 | {1,3,5} | {2} | {} | {2,4,5} | 30 |
+----+---------+--------+--------+---------+------+
我想要下一个结果
+-------------+--------+--------+-----------+-------+
| a_list | b_list | c_list | d_list | count |
+-------------+--------+--------+-----------+-------+
| {1,2,3,4,5} | {2,4} | {1,4} | {2,4,5} | 3 |
+-------------+--------+--------+-----------+-------+
有
some_array_unique_agg_function
适合我的目的吗?
试试这个
with cte as (select
unnest( a_list::text[] )::integer as a_list,
unnest( b_list::text[] )::integer as b_list,
unnest( c_list::text[] )::integer as c_list,
unnest( d_list::text[] )::integer as d_list,
(select count(type) from t) as type
from t
where type = 30
)
select array_agg(distinct a_list),array_agg(distinct b_list)
,array_agg(distinct c_list),array_agg(distinct d_list),type from cte group by type ;
结果:
"{1,2,3,4,5}";"{2,4,NULL}";"{1,4,NULL}";"{2,4,5}";3
尝试对这篇文章中的旧答案进行此处理: 数组的所有排列
这列出了 INT 数组的唯一有序排列,该数组按某些内容(在我的例子中是row_id和instrument_id)分组,对于长度为<= 10 :
的数组来说,时间相当长您可能需要安装 intarray 扩展...
WITH RECURSIVE data AS (
SELECT a1.instrument_id, ARRAY_AGG(a1.obs_pos ORDER BY a1.obs_pos) AS arr
FROM tmp_xxxx a1
GROUP BY 1
)
, keys AS (
SELECT instrument_id, GENERATE_SUBSCRIPTS(d.arr, 1) AS rn
FROM data d
)
, cte AS (
SELECT DISTINCT x.instrument_id
, public.sort(x.initial_arr) AS initial_arr
, public.sort(x.new_arr) AS new_arr
, public.sort(x.used_rn) AS used_rn
FROM (
SELECT d.instrument_id
, d.arr initial_arr
, ARRAY [d.arr[k.rn]] new_arr
, ARRAY [k.rn] used_rn
FROM data d
JOIN keys k
ON d.instrument_id = k.instrument_id
) x
UNION ALL
SELECT DISTINCT c.instrument_id
, public.sort(initial_arr) AS initial_arr
, public.sort(c.new_arr || c.initial_arr[k.rn]) AS new_arr
, public.sort(used_rn || k.rn) AS used_rn
FROM cte c
JOIN keys k
ON c.instrument_id = k.instrument_id AND NOT (k.rn = ANY (c.used_rn))
)
INSERT
INTO tmp_xxxx( row_id
, instrument_id
, obs_pos_array
)
SELECT DISTINCT _row_id AS row_id
, cte.instrument_id
, public.sort(new_arr) AS obs_pos_array
FROM cte
WHERE ARRAY_LENGTH(new_arr, 1) >= 2 -- change it to your needs
ON CONFLICT ON CONSTRAINT pk_xxxx DO NOTHING;