SQL 公用表表达式(CTE)
公用表表达式(Common Table Expression,简称 CTE)是一种临时结果集,可以在 SELECT、INSERT、UPDATE、DELETE 语句中引用。CTE 使复杂查询更易读、更易维护。
什么是 CTE?
CTE 就像是为查询定义的临时视图,它:
- 只在当前 SQL 语句执行期间存在
- 可以被多次引用
- 提高代码可读性
- 支持递归查询
基本语法
WITH cte_name AS (
-- CTE 查询定义
SELECT ...
)
-- 主查询(引用 CTE)
SELECT * FROM cte_name;
简单示例
-- 定义一个 CTE
WITH high_value_orders AS (
SELECT * FROM orders WHERE amount > 1000
)
-- 主查询使用 CTE
SELECT
user_id,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM high_value_orders
GROUP BY user_id;
为什么使用 CTE?
1. 提高可读性
-- 不使用 CTE(难以阅读)
SELECT
u.name,
stats.total_orders,
stats.total_amount
FROM users u
JOIN (
SELECT
user_id,
COUNT(*) AS total_orders,
SUM(amount) AS total_amount
FROM orders
WHERE status = 'completed'
GROUP BY user_id
) stats ON u.id = stats.user_id
WHERE stats.total_amount > 10000;
-- 使用 CTE(清晰易读)
WITH completed_orders AS (
SELECT * FROM orders WHERE status = 'completed'
),
user_stats AS (
SELECT
user_id,
COUNT(*) AS total_orders,
SUM(amount) AS total_amount
FROM completed_orders
GROUP BY user_id
)
SELECT
u.name,
s.total_orders,
s.total_amount
FROM users u
JOIN user_stats s ON u.id = s.user_id
WHERE s.total_amount > 10000;
2. 避免重复代码
-- 不使用 CTE(重复相同的子查询)
SELECT
(SELECT AVG(salary) FROM employees WHERE department = 'IT') AS it_avg,
(SELECT AVG(salary) FROM employees WHERE department = 'IT') -
(SELECT MIN(salary) FROM employees WHERE department = 'IT') AS it_range,
(SELECT MAX(salary) FROM employees WHERE department = 'IT') AS it_max;
-- 使用 CTE(只写一次)
WITH it_employees AS (
SELECT salary FROM employees WHERE department = 'IT'
)
SELECT
AVG(salary) AS it_avg,
AVG(salary) - MIN(salary) AS it_range,
MAX(salary) AS it_max
FROM it_employees;
3. 便于调试
可以单独测试 CTE 中的查询,逐步构建复杂查询。
多个 CTE
可以在一个 WITH 子句中定义多个 CTE,用逗号分隔:
WITH
-- 第一个 CTE:活跃用户
active_users AS (
SELECT * FROM users WHERE last_login > DATE_SUB(NOW(), INTERVAL 30 DAY)
),
-- 第二个 CTE:高价值订单
high_value_orders AS (
SELECT * FROM orders WHERE amount > 5000
),
-- 第三个 CTE:用户统计
user_stats AS (
SELECT
user_id,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM high_value_orders
GROUP BY user_id
)
-- 主查询
SELECT
u.name,
u.email,
COALESCE(s.order_count, 0) AS high_value_order_count,
COALESCE(s.total_amount, 0) AS high_value_total
FROM active_users u
LEFT JOIN user_stats s ON u.id = s.user_id
ORDER BY s.total_amount DESC;
CTE 与子查询对比
| 特性 | CTE | 子查询 |
|---|---|---|
| 可读性 | 好 | 较差 |
| 可复用 | 可多次引用 | 需重复编写 |
| 调试 | 易于单独测试 | 较难调试 |
| 性能 | 通常相同 | 通常相同 |
| 递归 | 支持 | 不支持 |
递归 CTE
递归 CTE 是一种特殊的 CTE,它可以引用自身,用于处理层级数据或递归问题。
递归 CTE 语法
WITH RECURSIVE cte_name AS (
-- 基础查询(锚点)
SELECT ...
UNION ALL
-- 递归查询(引用自身)
SELECT ...
FROM cte_name ...
)
SELECT * FROM cte_name;
示例:组织架构层级
假设有一个员工表,包含上下级关系:
-- employees 表
| id | name | manager_id |
|----|--------|------------|
| 1 | CEO | NULL |
| 2 | VP1 | 1 |
| 3 | VP2 | 1 |
| 4 | M1 | 2 |
| 5 | M2 | 3 |
| 6 | E1 | 4 |
| 7 | E2 | 4 |
| 8 | E3 | 5 |
WITH RECURSIVE org_hierarchy AS (
-- 基础查询:顶级经理(没有上级)
SELECT
id,
name,
manager_id,
1 AS level,
CAST(name AS CHAR(1000)) AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归查询:找到下属
SELECT
e.id,
e.name,
e.manager_id,
h.level + 1 AS level,
CONCAT(h.path, ' -> ', e.name) AS path
FROM employees e
JOIN org_hierarchy h ON e.manager_id = h.id
)
SELECT
level,
name,
path
FROM org_hierarchy
ORDER BY level, name;
结果:
| level | name | path |
|-------|------|-------------------------|
| 1 | CEO | CEO |
| 2 | VP1 | CEO -> VP1 |
| 2 | VP2 | CEO -> VP2 |
| 3 | M1 | CEO -> VP1 -> M1 |
| 3 | M2 | CEO -> VP2 -> M2 |
| 4 | E1 | CEO -> VP1 -> M1 -> E1 |
| 4 | E2 | CEO -> VP1 -> M1 -> E2 |
| 4 | E3 | CEO -> VP2 -> M2 -> E3 |
示例:生成数字序列
WITH RECURSIVE numbers AS (
-- 基础查询:从 1 开始
SELECT 1 AS n
UNION ALL
-- 递归查询:每次加 1
SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;
结果:
| n |
|----|
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| 10 |
示例:生成日期序列
WITH RECURSIVE date_series AS (
SELECT DATE('2024-01-01') AS date
UNION ALL
SELECT DATE_ADD(date, INTERVAL 1 DAY)
FROM date_series
WHERE date < '2024-01-10'
)
SELECT date FROM date_series;
示例:物料清单(BOM)展开
-- 产品组成表:product_id 由 component_id 组成,数量为 quantity
WITH RECURSIVE bom_explosion AS (
-- 基础查询:顶级组件
SELECT
product_id,
component_id,
quantity,
1 AS level
FROM bill_of_materials
WHERE product_id = 'PRODUCT_A'
UNION ALL
-- 递归查询:展开子组件
SELECT
b.product_id,
b.component_id,
b.quantity * bom.quantity AS quantity,
bom.level + 1 AS level
FROM bill_of_materials b
JOIN bom_explosion bom ON b.product_id = bom.component_id
)
SELECT * FROM bom_explosion;
CTE 在数据修改中的应用
INSERT 使用 CTE
WITH new_users AS (
SELECT
name,
email,
created_at
FROM temp_users
WHERE valid = 1
)
INSERT INTO users (name, email, created_at)
SELECT * FROM new_users;
UPDATE 使用 CTE
WITH user_totals AS (
SELECT
user_id,
SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
)
UPDATE users u
JOIN user_totals t ON u.id = t.user_id
SET u.total_spent = t.total_amount,
u.customer_level = CASE
WHEN t.total_amount >= 10000 THEN 'VIP'
WHEN t.total_amount >= 5000 THEN 'Gold'
ELSE 'Regular'
END;
DELETE 使用 CTE
WITH duplicate_records AS (
SELECT id
FROM (
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at) AS rn
FROM users
) t
WHERE rn > 1
)
DELETE FROM users
WHERE id IN (SELECT id FROM duplicate_records);
实战案例
案例1:多级聚合统计
-- 计算每个用户的月度统计,然后找出月度消费最高的用户
WITH monthly_stats AS (
SELECT
user_id,
DATE_FORMAT(order_date, '%Y-%m') AS month,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM orders
GROUP BY user_id, DATE_FORMAT(order_date, '%Y-%m')
),
top_users AS (
SELECT
user_id,
SUM(total_amount) AS yearly_total,
AVG(total_amount) AS avg_monthly
FROM monthly_stats
GROUP BY user_id
)
SELECT
u.name,
t.yearly_total,
t.avg_monthly,
RANK() OVER (ORDER BY t.yearly_total DESC) AS spending_rank
FROM top_users t
JOIN users u ON t.user_id = u.id
ORDER BY t.yearly_total DESC
LIMIT 10;
案例2:同比环比分析
WITH monthly_sales AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
SUM(amount) AS total_sales
FROM orders
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
),
with_comparison AS (
SELECT
month,
total_sales,
LAG(total_sales, 1) OVER (ORDER BY month) AS prev_month,
LAG(total_sales, 12) OVER (ORDER BY month) AS prev_year_month
FROM monthly_sales
)
SELECT
month,
total_sales,
prev_month,
ROUND((total_sales - prev_month) / prev_month * 100, 2) AS mom_growth,
prev_year_month,
ROUND((total_sales - prev_year_month) / prev_year_month * 100, 2) AS yoy_growth
FROM with_comparison
WHERE prev_month IS NOT NULL;
案例3:分类树结构
-- 商品分类树
WITH RECURSIVE category_tree AS (
-- 顶级分类
SELECT
id,
name,
parent_id,
0 AS depth,
CAST(name AS CHAR(1000)) AS full_path
FROM categories
WHERE parent_id IS NULL
UNION ALL
-- 子分类
SELECT
c.id,
c.name,
c.parent_id,
t.depth + 1,
CONCAT(t.full_path, ' > ', c.name)
FROM categories c
JOIN category_tree t ON c.parent_id = t.id
)
SELECT
depth,
REPEAT(' ', depth) AS indent,
name,
full_path
FROM category_tree
ORDER BY full_path;
案例4:分页统计
WITH paginated_data AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY created_at DESC) AS row_num
FROM articles
WHERE status = 'published'
)
SELECT * FROM paginated_data
WHERE row_num BETWEEN 11 AND 20;
注意事项
1. CTE 必须被引用
定义的 CTE 如果不在主查询中使用,会导致错误(某些数据库)或警告。
-- 错误:unused_cte 未被使用
WITH used_cte AS (SELECT 1),
unused_cte AS (SELECT 2)
SELECT * FROM used_cte;
2. CTE 的作用域
CTE 只能在定义后的主查询中引用,不能在其他 CTE 之前引用(除非是递归 CTE)。
-- 错误:cte2 不能引用后面定义的 cte3
WITH
cte1 AS (SELECT 1),
cte2 AS (SELECT * FROM cte3), -- 错误!
cte3 AS (SELECT 2)
SELECT * FROM cte1;
3. 递归 CTE 必须有终止条件
递归 CTE 的递归部分必须有条件限制,否则会无限循环。
-- 危险:可能无限循环!
WITH RECURSIVE infinite AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM infinite -- 没有 WHERE 条件!
)
SELECT * FROM infinite LIMIT 100; -- LIMIT 只能限制最终结果
4. 性能考虑
虽然 CTE 提高了可读性,但在某些数据库中可能会影响性能:
- 某些数据库会将 CTE 物化(存储临时结果)
- 多次引用同一 CTE 可能导致多次计算
- 对于简单查询,子查询可能更高效
小结
本章我们学习了:
- CTE 基本语法:使用 WITH 子句定义临时结果集
- 多个 CTE:用逗号分隔,可相互引用
- 递归 CTE:处理层级数据、生成序列
- CTE 在 DML 中使用:INSERT、UPDATE、DELETE
- 实战案例:多级聚合、同比环比、分类树
练习
- 使用 CTE 重写一个复杂的嵌套子查询
- 使用递归 CTE 展示组织架构层级
- 使用 CTE 计算用户月度消费的同比和环比
- 使用递归 CTE 生成一个日期范围
- 使用 CTE 实现分页查询