SQL的LEFT JOIN优化
原sql,一个base表a,LEFT JOIN三个表抽数
SELECT
ccu.*,
ctr.*,
om.*,
of.*
FROM
ods.a ccu
LEFT JOIN
ods.b ctr ON ccu.coupon_code = ctr.coupon_code AND ctr.is_deleted = 0
LEFT JOIN
ods.c om ON ctr.bill_code = om.order_id AND om.deleted = 0
LEFT JOIN
ods.d of ON om.order_id = of.order_id AND of.deleted = 0 and of.foe_type=1 and of.status=20
WHERE
ccu.created_date BETWEEN $start_time AND $end_time;
缺点致命伤:如果四个表数据量都比较大,需要大量的内存做left join匹配,就容易打爆内存,同时效率很低;
**优化一:**减少base表的取数据范围,将base表写成子查询,而不是将where语句写到最后,经验证性能可以提升40%左右(单表千万以上的数据量),但是由于b,c,d表数据量很大,LEFT JOIN依然会内存爆掉
SELECT ccutwo.*,ctr.*,om.*,of.*
FROM(SELECT ccu.* FROM ods.a ccu WHEREccu.created_date BETWEEN $start_time AND $end_time) AS ccutwoLEFT JOINods.coupon_trade_record ctr ON ccutwo.coupon_code = ctr.coupon_code AND ctr.is_deleted = 0LEFT JOINods.order_master om ON ctr.bill_code = om.order_id AND om.deleted = 0LEFT JOINods.order_foe of ON om.order_id = of.order_idAND of.deleted = 0AND of.foe_type = 1AND of.status = 20
**优化二:**通过where条件减少b,c,d表数据量,性能又提升一个level,基于create_date统一数据时间范围
SELECT ccutwo.*,ctr.*,om.*,of.*
FROM(SELECT ccu.* FROM ods.a ccu WHEREccu.created_date BETWEEN $start_time AND $end_time) AS ccutwoLEFT JOINods.coupon_trade_record ctr ON ccutwo.coupon_code = ctr.coupon_codeAND ctr.is_deleted = 0 and ctr.created_date BETWEEN $start_time AND $end_timeLEFT JOINods.order_master om ON ctr.bill_code = om.order_idAND om.deleted = 0 and om.created_date BETWEEN $start_time AND $end_timeLEFT JOINods.order_foe of ON om.order_id = of.order_idAND of.deleted = 0AND of.foe_type = 1AND of.status = 20AND of.created_date BETWEEN $start_time AND $end_time
**优化三:**基于doris数据库表的特性,通过分区字段限制数据范围性能更高,trans_date为分区字段
SELECT ccutwo.*,ctr.*,om.*,of.*
FROM(SELECT ccu.* FROM ods.a ccu WHEREccu.trans_date = $trans_dateLEFT JOINods.coupon_trade_record ctr ON ccutwo.coupon_code = ctr.coupon_codeAND ctr.is_deleted = 0 and ctr.trans_date = $trans_dateLEFT JOINods.order_master om ON ctr.bill_code = om.order_idAND om.deleted = 0 and om.trans_date = $trans_dateLEFT JOINods.order_foe of ON om.order_id = of.order_idAND of.deleted = 0AND of.foe_type = 1AND of.status = 20AND of.trans_date = $trans_date
**优化四:**按照sql开发规范,多表LEFT JOIN不能超过三个表,拆分两个sql执行,同时根据doris主键模型(该模型保证 Key 列的唯一性,插入或更新数据时,新数据会覆盖具有相同 Key 的旧数据,确保数据记录为最新。),新建主键模型表
老表带分区字段
CREATE TABLE `dwd_table` (--fields
) ENGINE=OLAP
UNIQUE KEY(`coupon_code`, `trans_date`)
COMMENT '领券生命周期dwd宽表'
PARTITION BY RANGE(`trans_date`)
(PARTITION p20250809 VALUES [('2025-08-09'), ('2025-08-10')),
PARTITION p20250810 VALUES [('2025-08-10'), ('2025-08-11')))
DISTRIBUTED BY HASH(`coupon_code`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-2147483648",
"dynamic_partition.end" = "32",
"dynamic_partition.prefix" = "p",
"dynamic_partition.replication_allocation" = "tag.location.default: 3",
"dynamic_partition.buckets" = "1",
"dynamic_partition.create_history_partition" = "true",
"dynamic_partition.history_partition_num" = "350",
"dynamic_partition.hot_partition_num" = "0",
"dynamic_partition.reserved_history_periods" = "NULL",
"dynamic_partition.storage_policy" = "",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728",
"enable_mow_light_delete" = "false"
);
新表,不带分区字段
CREATE TABLE `dwd_table_new` (--fields
) ENGINE=OLAP
UNIQUE KEY(`coupon_code`)
COMMENT '生命周期dwd宽表'
DISTRIBUTED BY HASH(`coupon_code`) BUCKETS 8
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728",
"enable_mow_light_delete" = "false"
);
写入时先写base(a)表,再写附表(bcd),如果UNIQUE KEY一样就会覆盖老数据,不会新插入一条