跳到主要内容

PostgreSQL 递归查询与层级数据处理

递归查询是 PostgreSQL 最强大的特性之一,它能够处理树形结构、图遍历、层级数据等传统 SQL 难以解决的问题。通过 WITH RECURSIVE 语法,你可以在 SQL 中实现类似编程语言中递归函数的效果。

递归查询的核心概念

为什么需要递归查询?

在关系型数据库中,处理层级数据(如组织架构、分类树、评论回复)一直是个挑战。传统方法需要多次查询或复杂的自连接,而递归查询提供了一种优雅的解决方案。

考虑一个简单的场景:查询某个员工的所有下属(包括直接下属和间接下属)。如果没有递归查询,你可能需要:

  • 方法一:预先知道最大层级,写 N 层自连接
  • 方法二:在应用层循环查询
  • 方法三:使用存储过程

递归查询让这一切变得简单:一条 SQL 语句就能完成。

递归 CTE 的基本结构

递归 CTE 使用 WITH RECURSIVE 语法,包含两个部分:

WITH RECURSIVE cte_name AS (
-- 基础查询(非递归部分):锚点,提供初始数据
SELECT ...

UNION [ALL]

-- 递归查询:引用自身,基于上一轮结果生成新数据
SELECT ... FROM cte_name WHERE ...
)
SELECT * FROM cte_name;

关键要点

  1. 基础查询:非递归部分,只执行一次,提供初始数据
  2. UNION ALL:连接基础查询和递归查询,UNION ALL 保留所有行,UNION 会去重
  3. 递归查询:引用 CTE 自身,基于上一轮结果生成新数据
  4. 终止条件:递归查询返回空结果集时停止

执行过程详解

理解递归查询的关键在于理解它的执行过程:

第 1 步:执行基础查询
结果:初始数据 → 放入工作表(working table)

第 2 步:执行递归查询(使用工作表数据)
结果:新数据 → 放入中间表(intermediate table)

第 3 步:检查中间表是否为空
- 不为空:将中间表数据追加到最终结果,工作表 = 中间表,重复第 2 步
- 为空:结束递归

最终:返回所有累积的结果

用一个简单的例子来说明:

-- 生成 1 到 5 的数字序列
WITH RECURSIVE numbers AS (
-- 基础查询:从 1 开始
SELECT 1 AS n

UNION ALL

-- 递归查询:每次加 1
SELECT n + 1 FROM numbers WHERE n < 5
)
SELECT * FROM numbers;

执行过程

迭代工作表递归查询结果中间表累积结果
11SELECT 1+1 WHERE 1<521
22SELECT 2+1 WHERE 2<531,2
33SELECT 3+1 WHERE 3<541,2,3
44SELECT 4+1 WHERE 4<551,2,3,4
55SELECT 5+1 WHERE 5<51,2,3,4,5
6停止-1,2,3,4,5

树形结构查询

树形结构是递归查询最常见的应用场景。让我们通过一个组织架构的例子来深入理解。

准备测试数据

-- 创建员工表(包含上级关系)
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(50) NOT NULL,
title VARCHAR(50),
manager_id INTEGER REFERENCES employees(id),
hire_date DATE
);

-- 插入组织架构数据
INSERT INTO employees (name, title, manager_id, hire_date) VALUES
('张CEO', '首席执行官', NULL, '2010-01-01'),
('李CTO', '首席技术官', 1, '2011-03-15'),
('王CFO', '首席财务官', 1, '2011-06-01'),
('赵总监', '技术总监', 2, '2012-02-01'),
('钱总监', '产品总监', 2, '2012-05-01'),
('孙经理', '财务经理', 3, '2013-01-10'),
('周主管', '开发主管', 4, '2014-03-20'),
('吴主管', '测试主管', 4, '2014-06-15'),
('郑工程师', '高级工程师', 7, '2015-01-05'),
('冯工程师', '工程师', 7, '2016-07-01'),
('陈工程师', '工程师', 7, '2017-03-15'),
('楚工程师', '测试工程师', 8, '2016-09-01');

查询下属树

查询某个员工的所有下属(包括直接和间接下属):

-- 查询 CTO 的所有下属
WITH RECURSIVE subordinate_tree AS (
-- 基础查询:找到 CTO(id = 2)
SELECT
id,
name,
title,
manager_id,
1 AS level,
name::TEXT AS path,
ARRAY[id] AS id_path
FROM employees
WHERE id = 2

UNION ALL

-- 递归查询:找到每个上级的直接下属
SELECT
e.id,
e.name,
e.title,
e.manager_id,
st.level + 1,
st.path || ' > ' || e.name,
st.id_path || e.id
FROM employees e
INNER JOIN subordinate_tree st ON e.manager_id = st.id
)
SELECT
level,
REPEAT(' ', level - 1) || name AS org_chart,
title,
path
FROM subordinate_tree
ORDER BY id_path;

结果

 level |        org_chart         |    title     |             path
-------+--------------------------+--------------+-------------------------------
1 | 李CTO | 首席技术官 | 李CTO
2 | 赵总监 | 技术总监 | 李CTO > 赵总监
2 | 钱总监 | 产品总监 | 李CTO > 钱总监
3 | 周主管 | 开发主管 | 李CTO > 赵总监 > 周主管
3 | 吴主管 | 测试主管 | 李CTO > 赵总监 > 吴主管
4 | 郑工程师 | 高级工程师 | 李CTO > 赵总监 > 周主管 > 郑工程师
4 | 冯工程师 | 工程师 | 李CTO > 赵总监 > 周主管 > 冯工程师
4 | 陈工程师 | 工程师 | 李CTO > 赵总监 > 周主管 > 陈工程师
4 | 楚工程师 | 测试工程师 | 李CTO > 赵总监 > 吴主管 > 楚工程师

查询上级链

查询某个员工的所有上级(从直接上级到顶级):

-- 查询某员工的所有上级
WITH RECURSIVE manager_chain AS (
-- 基础查询:从目标员工开始
SELECT
id,
name,
title,
manager_id,
1 AS level
FROM employees
WHERE id = 10 -- 冯工程师

UNION ALL

-- 递归查询:向上查找上级
SELECT
e.id,
e.name,
e.title,
e.manager_id,
mc.level + 1
FROM employees e
INNER JOIN manager_chain mc ON e.id = mc.manager_id
)
SELECT
level,
name,
title
FROM manager_chain
ORDER BY level DESC;

结果

 level |  name   |    title
-------+---------+--------------
4 | 张CEO | 首席执行官
3 | 李CTO | 首席技术官
2 | 赵总监 | 技术总监
1 | 周主管 | 开发主管

计算层级深度

-- 计算每个员工到 CEO 的层级深度
WITH RECURSIVE depth_calc AS (
-- 基础查询:CEO 深度为 0
SELECT
id,
name,
manager_id,
0 AS depth
FROM employees
WHERE manager_id IS NULL

UNION ALL

-- 递归查询:每层深度加 1
SELECT
e.id,
e.name,
e.manager_id,
dc.depth + 1
FROM employees e
INNER JOIN depth_calc dc ON e.manager_id = dc.id
)
SELECT
name,
title,
depth
FROM depth_calc d
JOIN employees e ON d.id = e.id
ORDER BY depth, name;

分类树查询

电商系统中的商品分类是典型的树形结构。

准备测试数据

-- 创建分类表
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(50) NOT NULL,
parent_id INTEGER REFERENCES categories(id),
level INTEGER DEFAULT 1
);

-- 插入分类数据
INSERT INTO categories (name, parent_id, level) VALUES
('电子产品', NULL, 1),
('服装', NULL, 1),
('食品', NULL, 1),
('手机', 1, 2),
('电脑', 1, 2),
('平板', 1, 2),
('男装', 2, 2),
('女装', 2, 2),
('智能手机', 4, 3),
('功能手机', 4, 3),
('笔记本', 5, 3),
('台式机', 5, 3),
('T恤', 7, 3),
('衬衫', 7, 3),
('连衣裙', 8, 3);

查询分类树

-- 查询完整分类树
WITH RECURSIVE category_tree AS (
-- 基础查询:顶级分类
SELECT
id,
name,
parent_id,
level,
name::TEXT AS path,
ARRAY[id] AS id_path
FROM categories
WHERE parent_id IS NULL

UNION ALL

-- 递归查询:子分类
SELECT
c.id,
c.name,
c.parent_id,
c.level,
ct.path || ' > ' || c.name,
ct.id_path || c.id
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT
REPEAT(' ', level - 1) || name AS tree_view,
path
FROM category_tree
ORDER BY id_path;

查询某分类的所有父级

-- 查询"智能手机"分类的完整路径
WITH RECURSIVE parent_chain AS (
SELECT id, name, parent_id
FROM categories
WHERE id = 9 -- 智能手机

UNION ALL

SELECT c.id, c.name, c.parent_id
FROM categories c
INNER JOIN parent_chain pc ON c.id = pc.parent_id
)
SELECT string_agg(name, ' > ' ORDER BY id) AS full_path
FROM parent_chain;
-- 结果:电子产品 > 手机 > 智能手机

查询某分类的所有子分类

-- 查询"电子产品"下的所有子分类
WITH RECURSIVE subcategories AS (
SELECT id, name, parent_id, level
FROM categories
WHERE id = 1 -- 电子产品

UNION ALL

SELECT c.id, c.name, c.parent_id, c.level
FROM categories c
INNER JOIN subcategories sc ON c.parent_id = sc.id
)
SELECT * FROM subcategories WHERE id != 1; -- 排除自身

图遍历

图结构比树结构更复杂,因为节点之间可能存在循环引用。递归查询同样可以处理图遍历问题。

准备测试数据

-- 创建图结构表(社交网络关注关系)
CREATE TABLE follows (
user_id INTEGER,
follows_id INTEGER,
created_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (user_id, follows_id)
);

-- 插入关注关系(可能存在循环)
INSERT INTO follows (user_id, follows_id) VALUES
(1, 2),
(1, 3),
(2, 3),
(2, 4),
(3, 4),
(3, 5),
(4, 5),
(5, 1); -- 形成循环:5 -> 1

CREATE TABLE users (
id INTEGER PRIMARY KEY,
name VARCHAR(50)
);

INSERT INTO users (id, name) VALUES
(1, '张三'),
(2, '李四'),
(3, '王五'),
(4, '赵六'),
(5, '孙七');

简单图遍历(可能无限循环)

-- 警告:这个查询会无限循环!
WITH RECURSIVE reach AS (
SELECT user_id, follows_id, 1 AS depth
FROM follows
WHERE user_id = 1

UNION ALL

SELECT f.user_id, f.follows_id, r.depth + 1
FROM follows f
INNER JOIN reach r ON f.user_id = r.follows_id
WHERE r.depth < 10 -- 必须限制深度
)
SELECT u1.name AS follower, u2.name AS following, depth
FROM reach r
JOIN users u1 ON r.user_id = u1.id
JOIN users u2 ON r.follows_id = u2.id
ORDER BY depth;

使用循环检测

PostgreSQL 提供了内置的 CYCLE 子句来检测循环:

-- 使用 CYCLE 子句自动检测循环
WITH RECURSIVE reach AS (
SELECT user_id, follows_id, 1 AS depth
FROM follows
WHERE user_id = 1

UNION ALL

SELECT f.user_id, f.follows_id, r.depth + 1
FROM follows f
INNER JOIN reach r ON f.user_id = r.follows_id
WHERE r.depth < 10
) CYCLE follows_id SET is_cycle USING path
SELECT
u1.name AS from_user,
u2.name AS to_user,
depth,
is_cycle,
path
FROM reach r
JOIN users u1 ON r.user_id = u1.id
JOIN users u2 ON r.follows_id = u2.id
WHERE NOT is_cycle -- 排除循环部分
ORDER BY depth;

手动实现循环检测

-- 手动实现循环检测
WITH RECURSIVE reach AS (
SELECT
user_id,
follows_id,
1 AS depth,
ARRAY[follows_id] AS visited,
FALSE AS is_cycle
FROM follows
WHERE user_id = 1

UNION ALL

SELECT
f.user_id,
f.follows_id,
r.depth + 1,
r.visited || f.follows_id,
f.follows_id = ANY(r.visited) -- 检查是否已访问
FROM follows f
INNER JOIN reach r ON f.user_id = r.follows_id
WHERE NOT r.is_cycle -- 如果已检测到循环,停止
AND r.depth < 10
)
SELECT
u1.name AS from_user,
u2.name AS to_user,
depth,
visited AS path_ids
FROM reach r
JOIN users u1 ON r.user_id = u1.id
JOIN users u2 ON r.follows_id = u2.id
WHERE NOT is_cycle
ORDER BY depth;

搜索顺序控制

递归查询默认按广度优先顺序执行。但你可以控制结果的输出顺序。

深度优先搜索

使用 SEARCH DEPTH FIRST 子句:

-- 深度优先搜索
WITH RECURSIVE org_tree AS (
SELECT
id,
name,
manager_id,
1 AS level
FROM employees
WHERE manager_id IS NULL

UNION ALL

SELECT
e.id,
e.name,
e.manager_id,
ot.level + 1
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.id
) SEARCH DEPTH FIRST BY id SET order_col
SELECT
REPEAT(' ', level - 1) || name AS tree_view,
level
FROM org_tree
ORDER BY order_col;

深度优先的特点:先遍历完一个分支再遍历下一个分支。

广度优先搜索

使用 SEARCH BREADTH FIRST 子句:

-- 广度优先搜索
WITH RECURSIVE org_tree AS (
SELECT
id,
name,
manager_id,
1 AS level
FROM employees
WHERE manager_id IS NULL

UNION ALL

SELECT
e.id,
e.name,
e.manager_id,
ot.level + 1
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.id
) SEARCH BREADTH FIRST BY id SET order_col
SELECT
REPEAT(' ', level - 1) || name AS tree_view,
level
FROM org_tree
ORDER BY order_col;

广度优先的特点:按层级顺序遍历,先遍历同一层级的所有节点。

手动实现搜索顺序

-- 手动实现深度优先(使用路径数组)
WITH RECURSIVE org_tree AS (
SELECT
id,
name,
manager_id,
1 AS level,
ARRAY[id] AS path
FROM employees
WHERE manager_id IS NULL

UNION ALL

SELECT
e.id,
e.name,
e.manager_id,
ot.level + 1,
ot.path || e.id
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT
REPEAT(' ', level - 1) || name AS tree_view,
level
FROM org_tree
ORDER BY path;

-- 手动实现广度优先(使用层级)
WITH RECURSIVE org_tree AS (
SELECT
id,
name,
manager_id,
1 AS level
FROM employees
WHERE manager_id IS NULL

UNION ALL

SELECT
e.id,
e.name,
e.manager_id,
ot.level + 1
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT
REPEAT(' ', level - 1) || name AS tree_view,
level
FROM org_tree
ORDER BY level, name;

实际应用场景

1. 菜单权限继承

-- 菜单表
CREATE TABLE menus (
id SERIAL PRIMARY KEY,
name VARCHAR(50),
parent_id INTEGER REFERENCES menus(id),
permission_code VARCHAR(50)
);

-- 查询用户可访问的所有菜单(包含继承的权限)
WITH RECURSIVE menu_tree AS (
-- 基础查询:用户直接有权限的菜单
SELECT m.*, 1 AS level
FROM menus m
JOIN user_permissions up ON m.permission_code = up.permission_code
WHERE up.user_id = :user_id

UNION

-- 递归查询:父菜单(有子菜单权限则可以访问父菜单)
SELECT m.*, mt.level + 1
FROM menus m
INNER JOIN menu_tree mt ON m.id = mt.parent_id
)
SELECT DISTINCT * FROM menu_tree ORDER BY level DESC;

2. 物料清单展开

-- 产品组件表
CREATE TABLE bill_of_materials (
product_id INTEGER,
component_id INTEGER,
quantity NUMERIC(10, 2),
PRIMARY KEY (product_id, component_id)
);

-- 展开产品所需的所有组件
WITH RECURSIVE component_explosion AS (
SELECT
product_id,
component_id,
quantity,
1 AS level,
ARRAY[component_id] AS path
FROM bill_of_materials
WHERE product_id = :product_id

UNION ALL

SELECT
bom.product_id,
bom.component_id,
ce.quantity * bom.quantity,
ce.level + 1,
ce.path || bom.component_id
FROM bill_of_materials bom
INNER JOIN component_explosion ce ON bom.product_id = ce.component_id
WHERE bom.component_id != ALL(ce.path) -- 防止循环
)
SELECT
component_id,
SUM(quantity) AS total_quantity,
MAX(level) AS max_depth
FROM component_explosion
GROUP BY component_id;

3. 评论回复树

-- 评论表
CREATE TABLE comments (
id SERIAL PRIMARY KEY,
article_id INTEGER,
parent_id INTEGER REFERENCES comments(id),
user_id INTEGER,
content TEXT,
created_at TIMESTAMP
);

-- 查询文章的所有评论(树形结构)
WITH RECURSIVE comment_tree AS (
SELECT
id,
parent_id,
user_id,
content,
created_at,
1 AS level,
ARRAY[id] AS path
FROM comments
WHERE article_id = :article_id AND parent_id IS NULL

UNION ALL

SELECT
c.id,
c.parent_id,
c.user_id,
c.content,
c.created_at,
ct.level + 1,
ct.path || c.id
FROM comments c
INNER JOIN comment_tree ct ON c.parent_id = ct.id
)
SELECT
REPEAT(' ', level - 1) || content AS comment_tree,
created_at
FROM comment_tree
ORDER BY path;

4. 生成时间序列

-- 生成日期序列
WITH RECURSIVE date_series AS (
SELECT '2024-01-01'::DATE AS date

UNION ALL

SELECT date + INTERVAL '1 day'
FROM date_series
WHERE date < '2024-12-31'
)
SELECT date FROM date_series;

-- 生成时间槽(每 15 分钟)
WITH RECURSIVE time_slots AS (
SELECT '00:00:00'::TIME AS slot

UNION ALL

SELECT slot + INTERVAL '15 minutes'
FROM time_slots
WHERE slot < '23:45:00'
)
SELECT slot FROM time_slots;

5. 分解数字

-- 分解整数为各位数字
WITH RECURSIVE digits AS (
SELECT
12345 AS original,
12345 AS remaining,
ARRAY[]::INTEGER[] AS result

UNION ALL

SELECT
original,
remaining / 10,
result || (remaining % 10)
FROM digits
WHERE remaining > 0
)
SELECT original, array_reverse(result) AS digits
FROM digits
WHERE remaining = 0;

性能优化

添加深度限制

-- 始终添加深度限制,防止无限递归
WITH RECURSIVE tree AS (
SELECT id, parent_id, 1 AS depth
FROM nodes
WHERE parent_id IS NULL

UNION ALL

SELECT n.id, n.parent_id, t.depth + 1
FROM nodes n
INNER JOIN tree t ON n.parent_id = t.id
WHERE t.depth < 20 -- 限制最大深度
)
SELECT * FROM tree;

使用索引

-- 为递归查询中的连接条件创建索引
CREATE INDEX idx_employees_manager ON employees(manager_id);
CREATE INDEX idx_categories_parent ON categories(parent_id);

物化控制

-- 对于多次引用的 CTE,考虑强制物化
WITH RECURSIVE tree AS MATERIALIZED (
SELECT id, parent_id FROM nodes WHERE parent_id IS NULL
UNION ALL
SELECT n.id, n.parent_id FROM nodes n, tree t WHERE n.parent_id = t.id
)
SELECT * FROM tree t1 JOIN tree t2 ON ...;

使用 LIMIT 测试

-- 开发测试时使用 LIMIT 防止无限循环
WITH RECURSIVE infinite AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM infinite
)
SELECT * FROM infinite LIMIT 100;

最佳实践

1. 明确终止条件

-- 好的做法:明确的终止条件
WITH RECURSIVE tree AS (
SELECT id, parent_id, 1 AS depth
FROM nodes WHERE condition

UNION ALL

SELECT n.id, n.parent_id, t.depth + 1
FROM nodes n, tree t
WHERE n.parent_id = t.id
AND t.depth < 20 -- 明确限制
AND some_other_condition -- 其他终止条件
)
SELECT * FROM tree;

2. 使用路径数组

-- 路径数组用于排序和循环检测
WITH RECURSIVE tree AS (
SELECT
id,
parent_id,
ARRAY[id] AS path
FROM nodes WHERE parent_id IS NULL

UNION ALL

SELECT n.id, n.parent_id, t.path || n.id
FROM nodes n, tree t
WHERE n.parent_id = t.id
AND n.id != ALL(t.path) -- 防止循环
)
SELECT * FROM tree ORDER BY path;

3. 返回层级信息

-- 返回层级信息便于前端展示
WITH RECURSIVE tree AS (
SELECT
id,
parent_id,
name,
1 AS level,
name::TEXT AS path_str
FROM nodes WHERE parent_id IS NULL

UNION ALL

SELECT
n.id,
n.parent_id,
n.name,
t.level + 1,
t.path_str || ' > ' || n.name
FROM nodes n, tree t
WHERE n.parent_id = t.id
)
SELECT
REPEAT(' ', level - 1) || name AS indented_name,
level,
path_str
FROM tree;

小结

本章我们学习了 PostgreSQL 递归查询的核心概念和应用:

  1. 基本结构:基础查询 + UNION ALL + 递归查询
  2. 执行过程:迭代执行,直到递归查询返回空结果
  3. 树形结构:组织架构、分类树、评论回复
  4. 图遍历:社交网络、引用关系、循环检测
  5. 搜索顺序:深度优先、广度优先
  6. 实际应用:权限继承、物料清单、时间序列生成
  7. 性能优化:深度限制、索引、物化控制

递归查询是处理层级数据的强大工具,掌握它能让你在面对复杂的数据结构时游刃有余。

练习

  1. 创建一个递归查询,计算斐波那契数列的前 20 项
  2. 查询员工表中某员工的所有上级链,并显示完整路径
  3. 使用递归查询生成一个日历表,包含 2024 年每一天及其所属的周、月、季度
  4. 实现一个递归查询,找出社交网络中两个人之间的最短路径
  5. 使用递归查询将一个 JSON 字符串解析为键值对

参考资源