我们有 3 个表(每个表包含大约 300k-500k 条记录),我们想通过连接这些表来创建一个视图:
SELECT ROW_NUMBER() OVER(ORDER BY CREATED_ON DESC) AS RN, t.* from (SELECT
o.USER_ID AS ACCOUNT_DN,
t.TRANSACTION_STATUS,
t.CREATED_ON ,
tx.TAX_CALCULATED,
tx.TRANSACTION_STATUS AS TAX_TXN_STATUS
FROM abc.xyz.TAX_TRANSACTIONS tx join
`abc.xyz.ORDER` o on o.ORDER_NUMBER = tx.ORDER_NUMBER join
abc.xyz.TRANSACTION t on o.ORDER_NUMBER = t.ORDER_NUMBER
WHERE
t.TRANSACTION_TYPE != 'auth' AND
((t.TRANSACTION_TYPE IN ("purchase") AND t.TRANSACTION_STATUS ="approved" AND tx.TAXATION_TYPE = "SalesInvoice") or
(t.TRANSACTION_TYPE IN ("refund") AND tx.TAXATION_TYPE = "ReturnInvoice") or
(tx.TRANSACTION_STATUS IN ("Error")))) as t
ORDER BY CREATED_ON DESC
但是查询需要2个多小时才能完成。下面是执行图:
我们希望提高查询的性能。经过几个小时的研究,我们发现:
If you create a search index on a table that is smaller than 10GB, then the index is not populated
非常感谢
更新: 如果我们只连接前 2 个表(从查询中删除第三个表),只需要 2-3 秒即可完成:
SELECT
o.USER_ID AS ACCOUNT_DN,
tx.TAX_CALCULATED,
tx.TRANSACTION_STATUS AS TAX_TXN_STATUS
FROM abc.xyz.TAX_TRANSACTIONS tx
join `abc.xyz.ORDER` o on o.ORDER_NUMBER = tx.ORDER_NUMBER
如果我们只连接最后 2 个表(从查询中删除第一个表),只需要 2-3 秒即可完成:
SELECT
o.USER_ID AS ACCOUNT_DN,
t.TRANSACTION_STATUS,
t.CREATED_ON ,
FROM
`abc.xyz.ORDER` o
join abc.xyz.TRANSACTION t on o.ORDER_NUMBER = t.ORDER_NUMBER
WHERE
t.TRANSACTION_TYPE != 'auth'
但是如果我们加入3张桌子(即使没有
ROW_NUMBER() OVER
),也需要几个小时才能完成:
SELECT
o.USER_ID AS ACCOUNT_DN,
t.TRANSACTION_STATUS,
t.CREATED_ON ,
tx.TAX_CALCULATED,
tx.TRANSACTION_STATUS AS TAX_TXN_STATUS
FROM abc.xyz.TAX_TRANSACTIONS tx
join `abc.xyz.ORDER` o on o.ORDER_NUMBER = tx.ORDER_NUMBER
join abc.xyz.TRANSACTION t on o.ORDER_NUMBER = t.ORDER_NUMBER
WHERE
t.TRANSACTION_TYPE != 'auth' AND
((t.TRANSACTION_TYPE IN ("purchase") AND t.TRANSACTION_STATUS ="approved" AND tx.TAXATION_TYPE = "SalesInvoice") or
(t.TRANSACTION_TYPE IN ("refund") AND tx.TAXATION_TYPE = "ReturnInvoice") or
(tx.TRANSACTION_STATUS IN ("Error")))
ORDER BY CREATED_ON DESC
从执行图中,我可以看到它卡在加入步骤
更新2:请看上面的图片。加入后产生8.9G记录。
更新 3:1 小时后我看到此错误
查询产生某种连接爆炸,超出了按需计划的限制。
您可以考虑将项目升级到固定费率版本,如错误消息所示。
您还可以考虑将分析拆分为更小的组件,也许可以使用块和分区进行返工以减少行数。
我不认为问题出在你的查询上。我使用虚拟数据创建了一个场景,其中仅包含您选择来解释我的观点的字段。
所有表都有 100 行,并且
order_number
字段填充 1 到 10 之间的随机值。
编写脚本来创建和填充表 [order]、[transaction] 和 [tax_transactions]
CREATE TABLE [order] (
[orderID] INTEGER NOT NULL IDENTITY(1, 1),
[order_number] INTEGER NULL,
[user_id] INTEGER NULL,
PRIMARY KEY ([orderID])
);
GO
INSERT INTO [order] (order_number,user_id)
VALUES
(3,995),
(6,789),
(6,964),
(8,756),
(2,684),
(1,897),
(1,828),
(7,860),
(5,700),
(9,914);
INSERT INTO [order] (order_number,user_id)
VALUES
(8,854),
(7,692),
(5,744),
(9,696),
(2,893),
(3,947),
(10,943),
(5,861),
(2,868),
(9,989);
INSERT INTO [order] (order_number,user_id)
VALUES
(8,864),
(8,878),
(2,935),
(2,941),
(1,949),
(1,984),
(6,926),
(4,800),
(3,734),
(8,978);
INSERT INTO [order] (order_number,user_id)
VALUES
(2,732),
(9,773),
(9,676),
(3,698),
(7,944),
(1,776),
(1,877),
(6,767),
(1,752),
(6,931);
INSERT INTO [order] (order_number,user_id)
VALUES
(8,909),
(7,672),
(1,927),
(3,947),
(8,804),
(6,719),
(8,990),
(6,725),
(4,842),
(9,741);
INSERT INTO [order] (order_number,user_id)
VALUES
(4,710),
(6,690),
(7,965),
(3,785),
(9,867),
(7,994),
(4,935),
(9,959),
(2,701),
(5,732);
INSERT INTO [order] (order_number,user_id)
VALUES
(6,924),
(3,933),
(5,861),
(3,975),
(7,821),
(4,984),
(9,805),
(9,693),
(2,937),
(9,696);
INSERT INTO [order] (order_number,user_id)
VALUES
(5,964),
(4,938),
(1,833),
(1,680),
(7,986),
(7,768),
(3,788),
(2,891),
(2,830),
(8,682);
INSERT INTO [order] (order_number,user_id)
VALUES
(6,685),
(4,901),
(4,799),
(8,790),
(2,835),
(6,714),
(7,676),
(5,876),
(3,759),
(3,848);
INSERT INTO [order] (order_number,user_id)
VALUES
(3,835),
(6,927),
(5,902),
(4,806),
(1,787),
(1,805),
(1,743),
(1,801),
(5,891),
(5,667);
CREATE TABLE [transaction] (
[transactionID] INTEGER NOT NULL IDENTITY(1, 1),
[order_number] INTEGER NULL,
[transaction_type] VARCHAR(MAX) NULL,
[transaction_status] VARCHAR(MAX) NULL,
[created_on] VARCHAR(255) NULL,
PRIMARY KEY ([transactionID])
);
GO
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(7,'auth','Error','2025-01-16 17:36:39'),
(6,'purchase','null','2024-01-03 01:13:47'),
(8,'auth','Error','2023-07-28 11:49:40'),
(9,'other','Error','2024-01-10 17:49:22'),
(8,'purchase','null','2024-11-04 02:33:41'),
(7,'auth','null','2024-10-18 10:34:23'),
(6,'other','approved','2024-10-20 12:19:42'),
(9,'purchase','Error','2023-07-13 18:39:02'),
(9,'purchase','Error','2024-06-24 12:09:42'),
(9,'other','null','2023-11-16 03:43:24');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(1,'other','null','2024-02-23 22:50:33'),
(3,'purchase','Error','2024-09-24 20:53:25'),
(4,'purchase','null','2023-03-25 01:29:01'),
(4,'other','null','2024-02-25 13:57:43'),
(3,'other','Error','2024-08-18 02:20:56'),
(2,'null','Error','2024-02-21 15:19:37'),
(4,'other','Error','2024-02-14 10:31:01'),
(9,'purchase','null','2024-04-13 14:52:26'),
(10,'purchase','Error','2023-10-07 18:00:57'),
(10,'refund','Error','2024-08-20 08:05:33');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(7,'refund','approved','2023-03-13 07:29:17'),
(2,'purchase','null','2024-03-29 18:37:15'),
(6,'refund','Error','2024-02-25 20:52:25'),
(8,'refund','Error','2024-07-28 02:05:34'),
(2,'auth','Error','2024-01-19 13:10:58'),
(10,'auth','Error','2023-06-24 10:21:12'),
(8,'purchase','Error','2024-06-25 21:39:47'),
(8,'auth','null','2024-02-17 05:46:19'),
(3,'purchase','null','2023-08-01 10:23:36'),
(8,'purchase','approved','2024-06-05 10:49:20');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(7,'other','Error','2023-05-30 05:16:54'),
(3,'null','null','2023-09-09 17:55:54'),
(10,'null','Error','2024-02-28 23:06:36'),
(8,'purchase','Error','2024-08-03 13:01:26'),
(8,'purchase','Error','2024-06-22 18:26:34'),
(7,'purchase','approved','2023-05-04 17:38:37'),
(9,'purchase','Error','2023-03-02 06:53:35'),
(3,'auth','Error','2024-09-24 19:08:29'),
(6,'refund','Error','2024-05-24 17:24:20'),
(9,'refund','Error','2023-12-16 13:30:59');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(9,'purchase','Error','2024-05-30 04:07:44'),
(9,'purchase','null','2024-02-11 07:19:17'),
(6,'refund','approved','2023-05-06 21:41:09'),
(3,'other','approved','2024-07-29 11:30:42'),
(1,'other','Error','2024-05-13 07:47:56'),
(2,'other','Error','2023-08-03 04:35:46'),
(6,'other','approved','2024-01-19 03:05:12'),
(10,'refund','Error','2023-12-11 03:57:15'),
(1,'other','Error','2023-04-15 09:28:51'),
(3,'refund','null','2023-04-30 19:42:25');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(5,'other','approved','2024-03-19 12:06:34'),
(6,'other','Error','2024-04-06 06:18:15'),
(6,'refund','approved','2024-11-15 02:26:58'),
(6,'other','Error','2023-10-05 02:27:24'),
(8,'null','null','2024-04-01 16:09:14'),
(5,'other','Error','2024-06-11 05:08:41'),
(3,'other','approved','2023-03-04 12:11:15'),
(3,'refund','Error','2023-11-28 11:39:42'),
(3,'purchase','Error','2023-05-21 18:07:25'),
(2,'other','approved','2024-09-25 18:06:17');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(10,'auth','approved','2024-07-19 03:12:07'),
(4,'other','null','2023-06-15 11:22:50'),
(2,'other','approved','2024-03-27 11:31:51'),
(9,'auth','Error','2023-07-25 19:05:47'),
(10,'other','null','2023-08-10 00:50:20'),
(7,'refund','Error','2023-08-17 19:04:40'),
(7,'purchase','null','2024-02-04 01:05:01'),
(4,'other','Error','2024-09-14 22:37:55'),
(8,'other','Error','2023-05-04 02:33:57'),
(8,'refund','approved','2023-10-24 20:56:07');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(3,'other','approved','2023-07-18 01:31:54'),
(4,'other','null','2023-03-20 08:24:06'),
(2,'null','null','2023-06-26 19:53:33'),
(9,'auth','null','2023-08-19 14:05:18'),
(6,'purchase','Error','2024-02-06 05:10:49'),
(6,'purchase','null','2025-01-13 08:35:23'),
(6,'refund','Error','2023-12-04 13:07:48'),
(10,'refund','Error','2024-11-08 07:59:38'),
(1,'purchase','Error','2023-07-26 22:24:51'),
(10,'null','Error','2023-10-05 10:34:43');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(7,'other','null','2024-01-10 12:47:41'),
(7,'refund','approved','2023-04-17 22:51:58'),
(2,'purchase','Error','2024-03-21 04:33:14'),
(6,'refund','Error','2023-12-27 07:56:43'),
(8,'purchase','approved','2024-09-18 05:43:04'),
(4,'purchase','null','2023-08-27 12:08:18'),
(3,'refund','null','2024-10-09 15:27:05'),
(5,'other','Error','2024-09-15 01:06:50'),
(2,'purchase','approved','2024-11-20 03:38:16'),
(6,'other','null','2024-01-03 00:40:01');
INSERT INTO [transaction] (order_number,transaction_type,transaction_status,created_on)
VALUES
(5,'auth','Error','2023-04-16 16:44:47'),
(8,'refund','null','2024-10-19 09:02:59'),
(4,'purchase','Error','2024-09-18 03:06:04'),
(4,'purchase','Error','2024-03-17 20:46:13'),
(2,'refund','null','2024-08-14 13:55:14'),
(7,'refund','Error','2023-03-19 03:49:58'),
(7,'purchase','approved','2023-10-24 05:37:52'),
(6,'purchase','Error','2024-04-14 17:50:06'),
(6,'auth','null','2023-07-16 02:26:03'),
(7,'null','Error','2024-05-09 04:17:23');
CREATE TABLE [tax_transactions] (
[tax_transactionsID] INTEGER NOT NULL IDENTITY(1, 1),
[order_number] INTEGER NULL,
[tax_calculated] VARCHAR(255) NULL,
[transaction_status] VARCHAR(MAX) NULL,
[taxation_type] VARCHAR(MAX) NULL,
PRIMARY KEY ([tax_transactionsID])
);
GO
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(2,'1','approved','SalesInvoice'),
(8,'0','Error','null'),
(10,'1','Error','SalesInvoice'),
(9,'0','approved','ReturnInvoice'),
(5,'0','null','null'),
(2,'1','Error','null'),
(7,'1','Error','null'),
(4,'0','null','null'),
(1,'0','Error','ReturnInvoice'),
(9,'1','Error','null');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(4,'1','Error','SalesInvoice'),
(4,'0','null','ReturnInvoice'),
(4,'0','Error','null'),
(5,'0','null','null'),
(2,'0','Error','ReturnInvoice'),
(8,'0','null','ReturnInvoice'),
(2,'0','null','null'),
(7,'1','Error','null'),
(6,'1','Error','ReturnInvoice'),
(7,'1','Error','null');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(5,'1','Error','null'),
(9,'0','Error','ReturnInvoice'),
(3,'0','null','ReturnInvoice'),
(2,'0','Error','null'),
(8,'1','Error','ReturnInvoice'),
(10,'1','approved','SalesInvoice'),
(5,'0','Error','ReturnInvoice'),
(6,'0','approved','SalesInvoice'),
(5,'1','null','null'),
(3,'0','approved','ReturnInvoice');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(2,'1','Error','SalesInvoice'),
(5,'1','Error','ReturnInvoice'),
(1,'1','approved','ReturnInvoice'),
(2,'1','approved','null'),
(2,'0','Error','SalesInvoice'),
(2,'0','null','SalesInvoice'),
(8,'1','approved','ReturnInvoice'),
(8,'1','null','SalesInvoice'),
(9,'1','approved','ReturnInvoice'),
(8,'1','approved','null');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(9,'0','approved','null'),
(7,'1','Error','ReturnInvoice'),
(3,'0','approved','ReturnInvoice'),
(9,'0','approved','ReturnInvoice'),
(5,'1','approved','ReturnInvoice'),
(4,'0','Error','ReturnInvoice'),
(8,'1','Error','SalesInvoice'),
(9,'1','approved','ReturnInvoice'),
(1,'0','null','ReturnInvoice'),
(7,'0','Error','ReturnInvoice');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(9,'1','approved','ReturnInvoice'),
(3,'1','approved','ReturnInvoice'),
(8,'1','approved','SalesInvoice'),
(7,'1','approved','null'),
(9,'1','approved','ReturnInvoice'),
(6,'1','null','ReturnInvoice'),
(4,'1','Error','SalesInvoice'),
(4,'0','Error','null'),
(5,'1','Error','SalesInvoice'),
(9,'0','Error','ReturnInvoice');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(7,'0','approved','ReturnInvoice'),
(5,'0','null','SalesInvoice'),
(10,'1','null','null'),
(10,'1','approved','ReturnInvoice'),
(2,'0','Error','null'),
(3,'0','approved','SalesInvoice'),
(9,'1','null','null'),
(3,'0','Error','null'),
(4,'0','Error','null'),
(2,'0','Error','SalesInvoice');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(3,'0','Error','ReturnInvoice'),
(8,'0','approved','ReturnInvoice'),
(7,'1','approved','ReturnInvoice'),
(7,'1','Error','ReturnInvoice'),
(8,'1','Error','null'),
(7,'0','Error','ReturnInvoice'),
(10,'0','Error','null'),
(4,'0','approved','ReturnInvoice'),
(9,'1','Error','ReturnInvoice'),
(9,'0','Error','SalesInvoice');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(2,'0','Error','ReturnInvoice'),
(1,'0','Error','null'),
(1,'0','Error','ReturnInvoice'),
(3,'0','approved','null'),
(6,'0','approved','ReturnInvoice'),
(2,'1','Error','ReturnInvoice'),
(7,'1','null','ReturnInvoice'),
(5,'1','Error','ReturnInvoice'),
(6,'0','null','ReturnInvoice'),
(9,'0','approved','ReturnInvoice');
INSERT INTO [tax_transactions] (order_number,tax_calculated,transaction_status,taxation_type)
VALUES
(8,'0','null','SalesInvoice'),
(10,'1','approved','ReturnInvoice'),
(8,'0','Error','null'),
(8,'0','null','ReturnInvoice'),
(3,'0','Error','ReturnInvoice'),
(5,'0','Error','ReturnInvoice'),
(3,'0','Error','ReturnInvoice'),
(6,'0','Error','ReturnInvoice'),
(8,'0','Error','ReturnInvoice'),
(6,'1','null','ReturnInvoice');
1。总行数
如果您只是连接这些表,那么与您的数据集相比,您应该会获得大量行。但是,如果您过滤
order_number = 8
,您将大大减少行数。
select count(*) from [order] -- 100 rows
select count(*) from [transaction] -- 100 rows
select count(*) from [tax_transactions] -- 100 rows
select count(*) -- 10551 rows
from [order] o
join [transaction] t on o.order_number = t.order_number
join [tax_transactions] tx on o.order_number = tx.order_number
select count(*) from [order] where order_number = 8 -- 10/100
select count(*) from [transaction] where order_number = 8 -- 13/500
select count(*) from [tax_transactions] where order_number = 8 -- 14/500
2。连接
这意味着无论您做什么连接,您总是会乘以每个表的结果行总数,正如您所看到的,每个连接之间有很大的差异(带过滤器与不带过滤器)
/*
* Not filtering order_number
*/
select count(*) -- 1033 rows
from [order] o
join [transaction] t on o.order_number = t.order_number
select count(*) -- 1018 rows
from [order] o
join [tax_transactions] tx on o.order_number = tx.order_number
select count(*) -- 1029 rows
from [transaction] t
join [tax_transactions] tx on t.order_number = tx.order_number
/*
* Filtering order_number = 8
*/
select count(*) -- 130 rows (10 from order * 13 from transaction)
from [order] o
join [transaction] t on o.order_number = t.order_number
where o.order_number = 8
select count(*) -- 140 rows (10 from order * 14 from tax_transactions)
from [order] o
join [tax_transactions] tx on o.order_number = tx.order_number
where o.order_number = 8
select count(*) -- 182 rows (13 from transaction * 14 from tax_transactions)
from [transaction] t
join [tax_transactions] tx on t.order_number = tx.order_number
where tx.order_number = 8
3.解决方案/建议
因此,如果您连接所有三个表,您的结果集中应该会出现大量行。在这种情况下,我们每个表有 100 行,在您的情况下,每个表有 300k 到 500k 行。
我尝试了同样的场景,每个表中有 500 行,连接三个表花费了 9 秒。
我建议,您创建一个带有
store_procedure
参数的 order_number
,这样您就可以只过滤您需要的内容。
create procedure sp_GetDataFromOrderNumber @orderNumber int
as
select count(*)
from [order] o
join [transaction] t on o.order_number = t.order_number
join [tax_transactions] tx on o.order_number = tx.order_number
where o.order_number = @orderNumber
go
exec sp_GetDataFromOrderNumber @orderNumber = 8
对于您的代码,如果您不打算在字段列表中使用多个单词,则不需要使用
IN
,只需使用 =
。并且您应该考虑分析数据以使用另一个参数作为过滤器。
CREATE PROCEDURE [abc].[xyz].[sp_GetDataFromOrderNumber] @orderNumber int
AS
SELECT
o.USER_ID AS ACCOUNT_DN,
t.TRANSACTION_STATUS,
t.CREATED_ON ,
tx.TAX_CALCULATED,
tx.TRANSACTION_STATUS AS TAX_TXN_STATUS
FROM [abc].[xyz].[TAX_TRANSACTIONS] tx
JOIN [abc].[xyz].[ORDER] o ON o.ORDER_NUMBER = tx.ORDER_NUMBER
JOIN [abc].[xyz].[TRANSACTION] t ON o.ORDER_NUMBER = t.ORDER_NUMBER AND t.TRANSACTION_TYPE <> 'auth'
WHERE (t.TRANSACTION_TYPE = 'purchase' AND tx.TAXATION_TYPE = 'SalesInvoice' AND t.TRANSACTION_STATUS = 'approved')
or (t.TRANSACTION_TYPE = 'refund' AND tx.TAXATION_TYPE = 'ReturnInvoice')
or tx.TRANSACTION_STATUS = 'Error'
ORDER BY CREATED_ON DESC
GO
您可以这样调用该过程:
执行 sp_GetDataFromOrderNumber @orderNumber = 8
希望这有帮助。