我的查询在 Athena 中工作,但在 Spark SQL 中不起作用。
查询-
With cte as ( select year(date) as year, count(*) as total, count(col1) as col1_not_null,count(col2) as col2_not_null from table group by year(date))
Select cte.year,t.total,t.not_null,t.colum_name,t.not_null from cte
Cross join
UNNEST (array[
cast(row('col1',cte.total,cte.co1_notnull)as row(column_name varchar,total bigint,not_null bigint)),
cast(row('col2',cte.total,cte.col2_notnull)as row(column_name varchar,total bigint,not_null bigint))])a(t)
])
请帮助我创建 Spark SQL 查询以在 Spark 中运行。
预期结果:
日期 | 总计 | 列_名称 | not_null |
---|---|---|---|
2002年1月1日 | 5 | col1 | 4 |
2002年1月1日 | 4 | col2 | 2 |
检查以下查询,转换为
Spark
WITH cte AS (
SELECT
YEAR(date) AS year,
COUNT(*) AS total,
COUNT(col1) AS col1_not_null,
COUNT(col2) AS col2_not_null,
FROM table
GROUP BY YEAR(date)
),
unnest_cte AS (
SELECT
INLINE(
array(
struct(
'col1' as column_name,
cte.total,
cte.col1_not_null AS not_null
),
struct(
'col2' as column_name,
cte.total,
cte.col2_not_null AS not_null
)
)
) FROM cte
)
SELECT
year,
cte.total,
not_null,
column_name,
not_null
FROM cte
CROSS JOIN unnest_cte