跳到主要内容

SQL 公用表表达式(CTE)

公用表表达式(Common Table Expression,简称 CTE)是一种临时结果集,可以在 SELECT、INSERT、UPDATE、DELETE 语句中引用。CTE 使复杂查询更易读、更易维护。

什么是 CTE?

CTE 就像是为查询定义的临时视图,它:

  • 只在当前 SQL 语句执行期间存在
  • 可以被多次引用
  • 提高代码可读性
  • 支持递归查询

基本语法

WITH cte_name AS (
-- CTE 查询定义
SELECT ...
)
-- 主查询(引用 CTE)
SELECT * FROM cte_name;

简单示例

-- 定义一个 CTE
WITH high_value_orders AS (
SELECT * FROM orders WHERE amount > 1000
)
-- 主查询使用 CTE
SELECT
user_id,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM high_value_orders
GROUP BY user_id;

为什么使用 CTE?

1. 提高可读性

-- 不使用 CTE(难以阅读)
SELECT
u.name,
stats.total_orders,
stats.total_amount
FROM users u
JOIN (
SELECT
user_id,
COUNT(*) AS total_orders,
SUM(amount) AS total_amount
FROM orders
WHERE status = 'completed'
GROUP BY user_id
) stats ON u.id = stats.user_id
WHERE stats.total_amount > 10000;

-- 使用 CTE(清晰易读)
WITH completed_orders AS (
SELECT * FROM orders WHERE status = 'completed'
),
user_stats AS (
SELECT
user_id,
COUNT(*) AS total_orders,
SUM(amount) AS total_amount
FROM completed_orders
GROUP BY user_id
)
SELECT
u.name,
s.total_orders,
s.total_amount
FROM users u
JOIN user_stats s ON u.id = s.user_id
WHERE s.total_amount > 10000;

2. 避免重复代码

-- 不使用 CTE(重复相同的子查询)
SELECT
(SELECT AVG(salary) FROM employees WHERE department = 'IT') AS it_avg,
(SELECT AVG(salary) FROM employees WHERE department = 'IT') -
(SELECT MIN(salary) FROM employees WHERE department = 'IT') AS it_range,
(SELECT MAX(salary) FROM employees WHERE department = 'IT') AS it_max;

-- 使用 CTE(只写一次)
WITH it_employees AS (
SELECT salary FROM employees WHERE department = 'IT'
)
SELECT
AVG(salary) AS it_avg,
AVG(salary) - MIN(salary) AS it_range,
MAX(salary) AS it_max
FROM it_employees;

3. 便于调试

可以单独测试 CTE 中的查询,逐步构建复杂查询。

多个 CTE

可以在一个 WITH 子句中定义多个 CTE,用逗号分隔:

WITH 
-- 第一个 CTE:活跃用户
active_users AS (
SELECT * FROM users WHERE last_login > DATE_SUB(NOW(), INTERVAL 30 DAY)
),
-- 第二个 CTE:高价值订单
high_value_orders AS (
SELECT * FROM orders WHERE amount > 5000
),
-- 第三个 CTE:用户统计
user_stats AS (
SELECT
user_id,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM high_value_orders
GROUP BY user_id
)
-- 主查询
SELECT
u.name,
u.email,
COALESCE(s.order_count, 0) AS high_value_order_count,
COALESCE(s.total_amount, 0) AS high_value_total
FROM active_users u
LEFT JOIN user_stats s ON u.id = s.user_id
ORDER BY s.total_amount DESC;

CTE 与子查询对比

特性CTE子查询
可读性较差
可复用可多次引用需重复编写
调试易于单独测试较难调试
性能通常相同通常相同
递归支持不支持

递归 CTE

递归 CTE 是一种特殊的 CTE,它可以引用自身,用于处理层级数据或递归问题。

递归 CTE 语法

WITH RECURSIVE cte_name AS (
-- 基础查询(锚点)
SELECT ...

UNION ALL

-- 递归查询(引用自身)
SELECT ...
FROM cte_name ...
)
SELECT * FROM cte_name;

示例:组织架构层级

假设有一个员工表,包含上下级关系:

-- employees 表
| id | name | manager_id |
|----|--------|------------|
| 1 | CEO | NULL |
| 2 | VP1 | 1 |
| 3 | VP2 | 1 |
| 4 | M1 | 2 |
| 5 | M2 | 3 |
| 6 | E1 | 4 |
| 7 | E2 | 4 |
| 8 | E3 | 5 |
WITH RECURSIVE org_hierarchy AS (
-- 基础查询:顶级经理(没有上级)
SELECT
id,
name,
manager_id,
1 AS level,
CAST(name AS CHAR(1000)) AS path
FROM employees
WHERE manager_id IS NULL

UNION ALL

-- 递归查询:找到下属
SELECT
e.id,
e.name,
e.manager_id,
h.level + 1 AS level,
CONCAT(h.path, ' -> ', e.name) AS path
FROM employees e
JOIN org_hierarchy h ON e.manager_id = h.id
)
SELECT
level,
name,
path
FROM org_hierarchy
ORDER BY level, name;

结果:

| level | name | path                    |
|-------|------|-------------------------|
| 1 | CEO | CEO |
| 2 | VP1 | CEO -> VP1 |
| 2 | VP2 | CEO -> VP2 |
| 3 | M1 | CEO -> VP1 -> M1 |
| 3 | M2 | CEO -> VP2 -> M2 |
| 4 | E1 | CEO -> VP1 -> M1 -> E1 |
| 4 | E2 | CEO -> VP1 -> M1 -> E2 |
| 4 | E3 | CEO -> VP2 -> M2 -> E3 |

示例:生成数字序列

WITH RECURSIVE numbers AS (
-- 基础查询:从 1 开始
SELECT 1 AS n

UNION ALL

-- 递归查询:每次加 1
SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;

结果:

| n  |
|----|
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| 10 |

示例:生成日期序列

WITH RECURSIVE date_series AS (
SELECT DATE('2024-01-01') AS date
UNION ALL
SELECT DATE_ADD(date, INTERVAL 1 DAY)
FROM date_series
WHERE date < '2024-01-10'
)
SELECT date FROM date_series;

示例:物料清单(BOM)展开

-- 产品组成表:product_id 由 component_id 组成,数量为 quantity
WITH RECURSIVE bom_explosion AS (
-- 基础查询:顶级组件
SELECT
product_id,
component_id,
quantity,
1 AS level
FROM bill_of_materials
WHERE product_id = 'PRODUCT_A'

UNION ALL

-- 递归查询:展开子组件
SELECT
b.product_id,
b.component_id,
b.quantity * bom.quantity AS quantity,
bom.level + 1 AS level
FROM bill_of_materials b
JOIN bom_explosion bom ON b.product_id = bom.component_id
)
SELECT * FROM bom_explosion;

CTE 在数据修改中的应用

INSERT 使用 CTE

WITH new_users AS (
SELECT
name,
email,
created_at
FROM temp_users
WHERE valid = 1
)
INSERT INTO users (name, email, created_at)
SELECT * FROM new_users;

UPDATE 使用 CTE

WITH user_totals AS (
SELECT
user_id,
SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
)
UPDATE users u
JOIN user_totals t ON u.id = t.user_id
SET u.total_spent = t.total_amount,
u.customer_level = CASE
WHEN t.total_amount >= 10000 THEN 'VIP'
WHEN t.total_amount >= 5000 THEN 'Gold'
ELSE 'Regular'
END;

DELETE 使用 CTE

WITH duplicate_records AS (
SELECT id
FROM (
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at) AS rn
FROM users
) t
WHERE rn > 1
)
DELETE FROM users
WHERE id IN (SELECT id FROM duplicate_records);

实战案例

案例1:多级聚合统计

-- 计算每个用户的月度统计,然后找出月度消费最高的用户
WITH monthly_stats AS (
SELECT
user_id,
DATE_FORMAT(order_date, '%Y-%m') AS month,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM orders
GROUP BY user_id, DATE_FORMAT(order_date, '%Y-%m')
),
top_users AS (
SELECT
user_id,
SUM(total_amount) AS yearly_total,
AVG(total_amount) AS avg_monthly
FROM monthly_stats
GROUP BY user_id
)
SELECT
u.name,
t.yearly_total,
t.avg_monthly,
RANK() OVER (ORDER BY t.yearly_total DESC) AS spending_rank
FROM top_users t
JOIN users u ON t.user_id = u.id
ORDER BY t.yearly_total DESC
LIMIT 10;

案例2:同比环比分析

WITH monthly_sales AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
SUM(amount) AS total_sales
FROM orders
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
),
with_comparison AS (
SELECT
month,
total_sales,
LAG(total_sales, 1) OVER (ORDER BY month) AS prev_month,
LAG(total_sales, 12) OVER (ORDER BY month) AS prev_year_month
FROM monthly_sales
)
SELECT
month,
total_sales,
prev_month,
ROUND((total_sales - prev_month) / prev_month * 100, 2) AS mom_growth,
prev_year_month,
ROUND((total_sales - prev_year_month) / prev_year_month * 100, 2) AS yoy_growth
FROM with_comparison
WHERE prev_month IS NOT NULL;

案例3:分类树结构

-- 商品分类树
WITH RECURSIVE category_tree AS (
-- 顶级分类
SELECT
id,
name,
parent_id,
0 AS depth,
CAST(name AS CHAR(1000)) AS full_path
FROM categories
WHERE parent_id IS NULL

UNION ALL

-- 子分类
SELECT
c.id,
c.name,
c.parent_id,
t.depth + 1,
CONCAT(t.full_path, ' > ', c.name)
FROM categories c
JOIN category_tree t ON c.parent_id = t.id
)
SELECT
depth,
REPEAT(' ', depth) AS indent,
name,
full_path
FROM category_tree
ORDER BY full_path;

案例4:分页统计

WITH paginated_data AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY created_at DESC) AS row_num
FROM articles
WHERE status = 'published'
)
SELECT * FROM paginated_data
WHERE row_num BETWEEN 11 AND 20;

注意事项

1. CTE 必须被引用

定义的 CTE 如果不在主查询中使用,会导致错误(某些数据库)或警告。

-- 错误:unused_cte 未被使用
WITH used_cte AS (SELECT 1),
unused_cte AS (SELECT 2)
SELECT * FROM used_cte;

2. CTE 的作用域

CTE 只能在定义后的主查询中引用,不能在其他 CTE 之前引用(除非是递归 CTE)。

-- 错误:cte2 不能引用后面定义的 cte3
WITH
cte1 AS (SELECT 1),
cte2 AS (SELECT * FROM cte3), -- 错误!
cte3 AS (SELECT 2)
SELECT * FROM cte1;

3. 递归 CTE 必须有终止条件

递归 CTE 的递归部分必须有条件限制,否则会无限循环。

-- 危险:可能无限循环!
WITH RECURSIVE infinite AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM infinite -- 没有 WHERE 条件!
)
SELECT * FROM infinite LIMIT 100; -- LIMIT 只能限制最终结果

4. 性能考虑

虽然 CTE 提高了可读性,但在某些数据库中可能会影响性能:

  • 某些数据库会将 CTE 物化(存储临时结果)
  • 多次引用同一 CTE 可能导致多次计算
  • 对于简单查询,子查询可能更高效

小结

本章我们学习了:

  1. CTE 基本语法:使用 WITH 子句定义临时结果集
  2. 多个 CTE:用逗号分隔,可相互引用
  3. 递归 CTE:处理层级数据、生成序列
  4. CTE 在 DML 中使用:INSERT、UPDATE、DELETE
  5. 实战案例:多级聚合、同比环比、分类树

练习

  1. 使用 CTE 重写一个复杂的嵌套子查询
  2. 使用递归 CTE 展示组织架构层级
  3. 使用 CTE 计算用户月度消费的同比和环比
  4. 使用递归 CTE 生成一个日期范围
  5. 使用 CTE 实现分页查询