GaussDB(DWS)性能调优:大表关联(结果集较小)引起的性能瓶颈问题案例
【摘要】 SQL性能瓶颈位于大表关联处,且关联后结果集数据量减少很多。本文针对这一类场景利用Bloom Filter进行提前过滤以提高语句执行性能。
1、【问题描述】
CNG-IT去HANA性能优化过程中,存在部分SQL语句大表关联慢的情况,并且关联后结果集数据行数降低幅度大。
2、【原始SQL】
SELECT PERIOD_MONTH,
PERIOD_ID,
AATP_BIG_MODEL_EXTERNAL,
LAG(SUM(CURR_AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL ORDER BY PERIOD_ID) AS CURR_AATP_QTY,
SUM(AATP_QTY) AS AATP_QTY,
SUM(SHIPMENT_QTY) AS SHIPMENT_QTY,
SUM(INV_QTY) AS INV_QTY,
SUM(WIP_QTY) AS WIP_QTY,
SUM(SUM(AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS AATP_CUM_QTY,
SUM(SUM(SHIPMENT_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS SHIPMENT_CUM_QTY,
SUM(SUM(INV_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS INV_CUM_QTY,
LAG(SUM(AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS AATP_LAG_QTY,
LAG(SUM(SHIPMENT_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS SHIPMENT_LAG_QTY,
LAG(SUM(AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL ORDER BY PERIOD_ID) AS AATP_HIS_LAG_QTY,
LAG(SUM(SHIPMENT_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL ORDER BY PERIOD_ID) AS SHIPMENT_HIS_LAG_QTY
FROM (
SELECT PERIOD_MONTH,PERIOD_ID,
PR.BIG_MODEL_EXTERNAL AS AATP_BIG_MODEL_EXTERNAL,
PR.PROD_BIG_MODEL AS AATP_BIG_MODEL,
PR.PROD_EN_NAME AS AATP_PROD_EN_NAME,
PR.PROD_SPEC AS AATP_PROD_SPEC,
PR.PROD_COORDINATOR AS AATP_PROD_COORDINATOR,
PR.PS_SKU_NUMBER AS AATP_PS_SKU_NUMBER,
PR.SKU_NAME AS AATP_SKU_NAME,
PR.PROD_COLOUR AS AATP_PROD_COLOUR,
SUM(CURR_AATP_QTY) AS CURR_AATP_QTY,
SUM(AATP_QTY) AS AATP_QTY,
SUM(SHIPMENT_QTY) AS SHIPMENT_QTY,
SUM(INV_QTY) AS INV_QTY,
SUM(WIP_QTY) AS WIP_QTY,
SUM(AATP_QTY_HIS) AS AATP_QTY_HIS,
SUM(SHIPMENT_QTY_HIS) AS SHIPMENT_QTY_HIS
FROM (
SELECT CAST(TO_CHAR(ADDDATE(ADDDATE(R.BUCKET_DATE,-WEEKDAY(R.BUCKET_DATE)),3),'YYYYMM') AS DECIMAL(28,0)) AS PERIOD_MONTH,
CASE WHEN 'WK' = 'DY' AND T.RN = 1
THEN CAST(TO_CHAR(ADDDATE(R.BUCKET_DATE,6-WEEKDAY(R.BUCKET_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
WHEN 'WK' = 'DY' AND T.RN > 1
THEN CAST(TO_CHAR(R.BUCKET_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.BUCKET_DATE,-WEEKDAY(R.BUCKET_DATE)),'YYYYMMDD') AS DECIMAL(28,0)) END AS PERIOD_ID,
R.PS_SKU_NUMBER,
0 AS CURR_AATP_QTY,
SUM(REGION_AATP_QTY) AS AATP_QTY,
SUM(SUM(CASE WHEN R.BUCKET_DATE <= ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)) THEN REGION_AATP_QTY ELSE 0 END))
OVER(PARTITION BY PS_SKU_NUMBER) AS AATP_QTY_HIS,
0 AS SHIPMENT_QTY,
0 AS SHIPMENT_QTY_HIS,
0 AS INV_QTY,
0 AS WIP_QTY
FROM DMISC.DM_PLAN_FCST_AATP_GAP_F R
INNER JOIN DMISC.DM_DIM_PLAN_LOOKUP_D D
ON R.IPMT_CODE = D.LOOKUP_CODE
INNER JOIN (
SELECT CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN CAST(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') AS DECIMAL(28,0))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN CAST(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-2),'YYYYMMDD') AS DECIMAL(28,0))
ELSE 20231117
END AS PLAN_ID,
MIN(TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-3),'YYYYMMDD'))) AS BUCKET_DATE_START,
MAX(TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,3),'YYYYMMDD'))) AS BUCKET_DATE_END,
ROW_NUMBER() OVER(ORDER BY CAST(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') AS DECIMAL(28,0))) AS RN
FROM DMISC.DM_DIM_CBG_PERIOD_D D
WHERE D.PERIOD_TYPE = 'D'
AND D.PERIOD_ST_DATE >= ADDDATE(TO_DATE(20190429,'YYYYMMDD'),-7)
AND D.PERIOD_ST_DATE <= ADDDATE(TO_DATE(20190528,'YYYYMMDD'),6)
AND D.DAY_OF_WEEK = 4
GROUP BY D.PERIOD_ST_DATE,
CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-2),'YYYYMMDD'))
ELSE 20231117
END
) T
ON R.DATA_VERSION = T.PLAN_ID
WHERE PLAN_DATE_TYPE = 'DY'
AND R.REGION_CN_NAME = '大中华终端业务部'
AND R.PS_SKU_NUMBER IS NOT NULL
--AND R.PS_SKU_NUMBER <> ''
AND R.BUCKET_DATE >= TO_DATE(T.BUCKET_DATE_START,'YYYYMMDD')
AND R.BUCKET_DATE <= TO_DATE(T.BUCKET_DATE_END,'YYYYMMDD')
GROUP BY PERIOD_MONTH,
PERIOD_ID,
R.PS_SKU_NUMBER
UNION ALL
SELECT CAST(TO_CHAR(ADDDATE(ADDDATE(R.BUCKET_DATE,-WEEKDAY(R.BUCKET_DATE)),3),'YYYYMM') AS DECIMAL(28,0)) AS PERIOD_MONTH,
CASE WHEN 'WK' = 'DY' AND T.RN = 1
THEN CAST(TO_CHAR(ADDDATE(R.BUCKET_DATE,6-WEEKDAY(R.BUCKET_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
WHEN 'WK' = 'DY' AND T.RN > 1
THEN CAST(TO_CHAR(R.BUCKET_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.BUCKET_DATE,-WEEKDAY(R.BUCKET_DATE)),'YYYYMMDD') AS DECIMAL(28,0)) END AS PERIOD_ID,
R.PS_SKU_NUMBER,
SUM(REGION_AATP_QTY) AS CURR_AATP_QTY,
0 AS AATP_QTY,
0 AS AATP_QTY_HIS,
0 AS SHIPMENT_QTY,
0 AS SHIPMENT_QTY_HIS,
0 AS INV_QTY,
0 AS WIP_QTY
FROM DMISC.DM_PLAN_FCST_AATP_GAP_F R
INNER JOIN DMISC.DM_DIM_PLAN_LOOKUP_D D
ON R.IPMT_CODE = D.LOOKUP_CODE
INNER JOIN (
SELECT CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
ELSE 20231117
END AS PLAN_ID,
MIN(TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-10),'YYYYMMDD'))) AS BUCKET_DATE_START,
MAX(TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-4),'YYYYMMDD'))) AS BUCKET_DATE_END,
ROW_NUMBER() OVER(ORDER BY TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))) AS RN
FROM DMISC.DM_DIM_CBG_PERIOD_D D
WHERE D.PERIOD_TYPE = 'D'
AND D.PERIOD_ST_DATE >= ADDDATE(TO_DATE('20190429'),0)
AND D.PERIOD_ST_DATE <= ADDDATE(TO_DATE('20190528'),6)
AND D.DAY_OF_WEEK = 4
GROUP BY D.PERIOD_ST_DATE,
CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
ELSE 20231117
END
) T
ON R.DATA_VERSION = T.PLAN_ID
WHERE PLAN_DATE_TYPE = 'DY'
AND R.REGION_CN_NAME = '大中华终端业务部'
AND R.PS_SKU_NUMBER IS NOT NULL
--AND R.PS_SKU_NUMBER <> ''
AND R.BUCKET_DATE >= TO_DATE(T.BUCKET_DATE_START)
AND R.BUCKET_DATE <= TO_DATE(T.BUCKET_DATE_END)
GROUP BY PERIOD_MONTH,
PERIOD_ID,
R.PS_SKU_NUMBER
UNION ALL
SELECT PERIOD_MONTH,
PERIOD_ID,
PS_SKU_NUMBER,
0 AS CURR_AATP_QTY,
0 AS AATP_QTY,
0 AS AATP_QTY_HIS,
SUM(IFNULL(SHIPMENT_QTY,0)) AS SHIPMENT_QTY, --发货数量
SUM(SUM(IFNULL(SHIPMENT_QTY,0))) OVER(PARTITION BY PS_SKU_NUMBER) AS SHIPMENT_QTY_HIS,
0 AS INV_QTY,
0 AS WIP_QTY FROM (
SELECT CAST(TO_CHAR(ADDDATE(ADDDATE(R.ACTUAL_SHIPMENT_DATE,-WEEKDAY(R.ACTUAL_SHIPMENT_DATE)),3),'YYYYMM') AS DECIMAL(28,0)) AS PERIOD_MONTH,
CASE WHEN 'WK' = 'DY'
AND R.ACTUAL_SHIPMENT_DATE >= ADDDATE(TO_DATE(20190429),-7)
AND R.ACTUAL_SHIPMENT_DATE < TO_DATE(20190429)
THEN CAST(TO_CHAR(ADDDATE(R.ACTUAL_SHIPMENT_DATE,6-WEEKDAY(R.ACTUAL_SHIPMENT_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
WHEN 'WK' = 'DY'
AND R.ACTUAL_SHIPMENT_DATE >= TO_DATE(20190429)
THEN CAST(TO_CHAR(R.ACTUAL_SHIPMENT_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.ACTUAL_SHIPMENT_DATE,-WEEKDAY(R.ACTUAL_SHIPMENT_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
END AS PERIOD_ID,
PS.PS_SKU_NUMBER,
SHIPMENT_QTY
FROM DMISC.DM_OM_SHIPMENT_DTL_F R
INNER JOIN DMISC.DM_DIM_CBG_PS_SKU_REL_R PS
ON R.ITEM_CODE = PS.ITEM_NUMBER
WHERE R.ACTUAL_SHIPMENT_DATE >= ADDDATE(TO_DATE(20190429,'YYYYMMDD'),-7)
AND R.ACTUAL_SHIPMENT_DATE <= TO_DATE(20190602,'YYYYMMDD')
AND R.SC_SHIP_FLAG=1
AND R.ERP_REGION_CN_NAME ='大中华终端业务部'
AND R.SCOPE_FLAG=1
AND IFNULL(SHIPMENT_QTY,0) > 0 )
GROUP BY PERIOD_MONTH,
PERIOD_ID,
PS_SKU_NUMBER
UNION ALL
SELECT CAST(TO_CHAR(ADDDATE(ADDDATE(R.TRANSACTION_DATE,-WEEKDAY(R.TRANSACTION_DATE)),3),'YYYYMM') AS DECIMAL(28,0)) AS PERIOD_MONTH,
CASE WHEN 'WK' = 'DY'
THEN CAST(TO_CHAR(R.TRANSACTION_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.TRANSACTION_DATE,-WEEKDAY(R.TRANSACTION_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
END AS PERIOD_ID,
PS.PS_SKU_NUMBER,
0 AS CURR_AATP_QTY,
0 AS AATP_QTY,
0 AS AATP_QTY_HIS,
0 AS SHIPMENT_QTY,
0 AS SHIPMENT_QTY_HIS,
0 AS INV_QTY,
SUM(TRANSACTION_QTY) AS WIP_QTY
FROM DMISC.DM_MFG_WIP_EXECUTION_DTL_F R
INNER JOIN DMISC.DM_DIM_CBG_PS_SKU_REL_R PS
ON R.ITEM_CODE = PS.ITEM_NUMBER
INNER JOIN (
SELECT DISTINCT R.PS_SKU_NUMBER
FROM DMISC.DM_PLAN_FCST_AATP_GAP_F R
INNER JOIN DMISC.DM_DIM_PLAN_LOOKUP_D D
ON R.IPMT_CODE = D.LOOKUP_CODE
INNER JOIN (
SELECT CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN CAST(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') AS DECIMAL(28,0))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN CAST(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-2),'YYYYMMDD') AS DECIMAL(28,0))
ELSE 20231117
END AS PLAN_ID,
MIN(TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-3),'YYYYMMDD'))) AS BUCKET_DATE_START,
MAX(TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,3),'YYYYMMDD'))) AS BUCKET_DATE_END,
ROW_NUMBER() OVER(ORDER BY CAST(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') AS DECIMAL(28,0))) AS RN
FROM DMISC.DM_DIM_CBG_PERIOD_D D
WHERE D.PERIOD_TYPE = 'D'
AND D.PERIOD_ST_DATE >= ADDDATE(TO_DATE(20190429,'YYYYMMDD'),-7)
AND D.PERIOD_ST_DATE <= ADDDATE(TO_DATE(20190528,'YYYYMMDD'),6)
AND D.DAY_OF_WEEK = 4
GROUP BY D.PERIOD_ST_DATE,
CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-2),'YYYYMMDD'))
ELSE 20231117
END
) T
ON R.DATA_VERSION = T.PLAN_ID
WHERE PLAN_DATE_TYPE = 'DY'
AND R.REGION_CN_NAME = '大中华终端业务部'
AND R.PS_SKU_NUMBER IS NOT NULL
--AND R.PS_SKU_NUMBER <> ''
AND R.BUCKET_DATE >= TO_DATE(T.BUCKET_DATE_START)
AND R.BUCKET_DATE <= TO_DATE(T.BUCKET_DATE_END)
) DD
ON DD.PS_SKU_NUMBER = PS.PS_SKU_NUMBER
WHERE R.TRANSACTION_DATE >= TO_DATE(20190429)
AND R.TRANSACTION_DATE <= TO_DATE(20190602)
GROUP BY PERIOD_MONTH,
PERIOD_ID,
PS.PS_SKU_NUMBER
) R
INNER JOIN DMISC.DM_DIM_CBG_PRODUCT_PS_SKU_REL_R PR
ON R.PS_SKU_NUMBER = PR.PS_SKU_NUMBER
WHERE 1=1
AND ('ALL' = 'ALL' OR PR.COMPANY_BRAND IN ('ALL'))
AND ('ALL' = 'ALL' OR INSTR('ALL'||',',PR.EXTERNAL_NAME||',')>0)
AND ('ALL' = 'ALL' OR INSTR('ALL'||',',PR.PROD_COLOUR||',')>0)
AND ('ALL' = 'ALL' OR INSTR('ALL'||',',R.PS_SKU_NUMBER||',')>0)
AND ('ALL' = 'ALL' OR INSTR('ALL'||',',PR.PROD_COORDINATOR||',')>0)
AND 1=1
AND (1=1)
AND (1=1)
AND 1=1 AND NVL(BOM_TYPE,'@@') <> 'ACC'
GROUP BY PERIOD_MONTH,PERIOD_ID,
PR.BIG_MODEL_EXTERNAL,
PR.PROD_BIG_MODEL,
PR.PROD_EN_NAME,
PR.PROD_SPEC,
PR.PROD_COORDINATOR,
PR.PS_SKU_NUMBER,
PR.SKU_NAME,
PR.PROD_COLOUR
)
WHERE AATP_BIG_MODEL_EXTERNAL IS NOT NULL
GROUP BY PERIOD_MONTH,PERIOD_ID,
AATP_BIG_MODEL_EXTERNAL
3、【性能分析】
上图是原始SQL语句的performance执行计划(具体计划放在附件一),从中可以看出,该计划存在两个问题:
-
- SQL语句未向量化执行
-
- r表与d表关联慢
问题1可以通过落临时表来降低不可向量化的范围(create temp table)https://support.huaweicloud.cn/sqlreference-dws/dws_06_0177.html
本篇博文重点介绍如何解决问题2,从计划中可以看出,r表作为大表与d表关联后结果集数据量不大,此时可以借助Bloom Filter,利用d表关联条件对r表进行提前过滤,从而减少关联时r表的数据量,降低关联时耗费的时间。
- r表与d表关联慢
Bloom Filter使用方法(对大表操作)
-
- 关联条件中若存在NUMERIC数据类型,改为int/bigint
-
- 关联条件中数据类型若为text,则需要先重新创建该表,创建时在该关联条件后加上COLLATE “C”
-
- 数据类型没问题后,再将该关联条件设置为PCK,能够加速过滤
开启Bloom Filter的标志
- 数据类型没问题后,再将该关联条件设置为PCK,能够加速过滤
优化后,执行计划如下所示(完整performance放在附件二中):
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)