跳到主要内容

PostgreSQL 查询详解

SELECT 语句是 SQL 最核心的操作,本章将详细介绍 PostgreSQL 中的各种查询技术。

基础查询

SELECT 语法结构

SELECT [DISTINCT] columns [AS alias]
FROM table_name
[JOIN ...]
[WHERE conditions]
[GROUP BY columns]
[HAVING conditions]
[ORDER BY columns [ASC|DESC]]
[LIMIT number] [OFFSET number];

基本查询示例

首先创建测试数据:

-- 创建示例表
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(50) NOT NULL,
department VARCHAR(50),
salary NUMERIC(10, 2),
hire_date DATE,
status VARCHAR(20) DEFAULT 'active'
);

-- 插入测试数据
INSERT INTO employees (name, department, salary, hire_date, status) VALUES
('张三', '技术部', 15000, '2020-01-15', 'active'),
('李四', '技术部', 12000, '2021-03-20', 'active'),
('王五', '市场部', 10000, '2019-06-10', 'active'),
('赵六', '市场部', 11000, '2022-02-05', 'inactive'),
('孙七', '财务部', 13000, '2020-08-01', 'active'),
('周八', '财务部', 9000, '2023-01-10', 'active');

查询所有列

-- 查询所有数据
SELECT * FROM employees;

-- 查询指定列
SELECT name, department, salary FROM employees;

使用表达式

-- 计算字段
SELECT
name,
salary,
salary * 12 AS annual_salary
FROM employees;

-- 使用函数
SELECT
name,
UPPER(name) AS name_upper,
LOWER(department) AS dept_lower
FROM employees;

-- 条件表达式
SELECT
name,
salary,
CASE
WHEN salary >= 12000 THEN '高收入'
WHEN salary >= 10000 THEN '中等收入'
ELSE '低收入'
END AS salary_level
FROM employees;

DISTINCT 去重

-- 查询所有部门(去重)
SELECT DISTINCT department FROM employees;

-- 多列去重
SELECT DISTINCT department, status FROM employees;

-- 使用 DISTINCT ON(PostgreSQL 特有)
SELECT DISTINCT ON (department) * FROM employees
ORDER BY department, salary DESC;

WHERE 条件查询

基础条件

-- 等于
SELECT * FROM employees WHERE department = '技术部';

-- 不等于
SELECT * FROM employees WHERE department != '财务部';

-- 大于、小于
SELECT * FROM employees WHERE salary > 10000;
SELECT * FROM employees WHERE hire_date < '2021-01-01';

-- 大于等于、小于等于
SELECT * FROM employees WHERE salary >= 10000;
SELECT * FROM employees WHERE salary <= 15000;

组合条件

-- AND 条件
SELECT * FROM employees
WHERE department = '技术部' AND salary > 10000;

-- OR 条件
SELECT * FROM employees
WHERE department = '技术部' OR department = '市场部';

-- NOT 条件
SELECT * FROM employees WHERE NOT status = 'active';

-- 复杂组合
SELECT * FROM employees
WHERE (department = '技术部' OR department = '市场部')
AND salary >= 10000
AND status = 'active';

IN 和 BETWEEN

-- IN:多个值匹配
SELECT * FROM employees
WHERE department IN ('技术部', '市场部', '财务部');

-- NOT IN
SELECT * FROM employees
WHERE department NOT IN ('技术部');

-- BETWEEN:范围查询(包含边界)
SELECT * FROM employees
WHERE salary BETWEEN 10000 AND 15000;

-- NOT BETWEEN
SELECT * FROM employees
WHERE salary NOT BETWEEN 10000 AND 15000;

LIKE 和 ILIKE

-- LIKE:区分大小写的模式匹配
-- %:任意字符(0个或多个)
-- _:任意单个字符

-- 以"张"开头
SELECT * FROM employees WHERE name LIKE '张%';

-- 以"张"开头且名字只有2个字
SELECT * FROM employees WHERE name LIKE '张_';

-- 包含"三"
SELECT * FROM employees WHERE name LIKE '%三%';

-- 不以"张"开头
SELECT * FROM employees WHERE name NOT LIKE '张%';

-- ILIKE:不区分大小写
SELECT * FROM employees WHERE name ILIKE '%ZHANG%';

NULL 处理

-- 创建带 NULL 的测试表
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(50),
price NUMERIC(10, 2),
discount NUMERIC(10, 2)
);

INSERT INTO products (name, price, discount) VALUES
('A', 100, 10),
('B', 200, NULL),
('C', NULL, 20);

-- IS NULL:查找 NULL 值
SELECT * FROM products WHERE discount IS NULL;

-- IS NOT NULL:查找非 NULL 值
SELECT * FROM products WHERE discount IS NOT NULL;

-- COALESCE:返回第一个非 NULL 值
SELECT name, COALESCE(price, 0) AS price FROM products;
SELECT name, COALESCE(price, 0) - COALESCE(discount, 0) AS final_price FROM products;

-- NULLIF:返回 NULL 如果两个值相等
SELECT NULLIF(10, 10); -- NULL
SELECT NULLIF(10, 20); -- 10

ORDER BY 排序

-- 升序(默认)
SELECT * FROM employees ORDER BY salary;

-- 降序
SELECT * FROM employees ORDER BY salary DESC;

-- 多列排序
SELECT * FROM employees
ORDER BY department ASC, salary DESC;

-- 按表达式排序
SELECT * FROM employees
ORDER BY LENGTH(name);

-- 按列位置排序(第2列)
SELECT * FROM employees ORDER BY 2;

-- NULL 值排序位置
SELECT * FROM employees ORDER BY discount NULLS LAST;
SELECT * FROM employees ORDER BY discount NULLS FIRST;

LIMIT 和 OFFSET 分页

-- LIMIT:限制返回行数
SELECT * FROM employees LIMIT 3;

-- OFFSET:跳过前 N 行
SELECT * FROM employees OFFSET 3;

-- 分页查询
SELECT * FROM employees
ORDER BY id
LIMIT 10 OFFSET 0; -- 第1页

SELECT * FROM employees
ORDER BY id
LIMIT 10 OFFSET 10; -- 第2页

-- PostgreSQL 特有:FETCH(兼容 SQL 标准)
SELECT * FROM employees
ORDER BY id
FETCH FIRST 10 ROWS ONLY;

-- 获取随机行
SELECT * FROM employees ORDER BY RANDOM() LIMIT 1;

GROUP BY 分组

基本分组

-- 按部门分组
SELECT department FROM employees GROUP BY department;

-- 分组 + 聚合
SELECT
department,
COUNT(*) AS employee_count,
AVG(salary) AS avg_salary,
MAX(salary) AS max_salary,
MIN(salary) AS min_salary,
SUM(salary) AS total_salary
FROM employees
GROUP BY department;

HAVING 过滤分组

-- WHERE 和 HAVING 的区别
-- WHERE:过滤行(在分组前)
-- HAVING:过滤分组(在分组后)

-- 统计各部门人数,只显示人数大于1的部门
SELECT
department,
COUNT(*) AS count
FROM employees
GROUP BY department
HAVING COUNT(*) > 1;

-- 统计各部门平均工资,只显示平均工资大于10000的
SELECT
department,
AVG(salary) AS avg_salary
FROM employees
WHERE status = 'active'
GROUP BY department
HAVING AVG(salary) > 10000
ORDER BY avg_salary DESC;

GROUP BY 扩展

-- GROUP BY ROLLUP:生成小计和总计
SELECT
COALESCE(department, '总计') AS department,
COUNT(*) AS count,
SUM(salary) AS total
FROM employees
GROUP BY ROLLUP(department);

-- GROUP BY CUBE:生成所有组合的小计
SELECT
department,
status,
COUNT(*) AS count
FROM employees
GROUP BY CUBE(department, status);

-- GROUPING SETS:指定分组集合
SELECT
department,
status,
COUNT(*) AS count
FROM employees
GROUP BY GROUPING SETS (
(department), -- 按部门
(status), -- 按状态
() -- 总计
);

-- 使用 GROUPING 判断是否是小计行
SELECT
GROUPING(department) AS is_dept_subtotal,
department,
COUNT(*) AS count
FROM employees
GROUP BY ROLLUP(department);

聚合函数

常用聚合函数

-- COUNT:计数
SELECT COUNT(*) FROM employees; -- 总行数
SELECT COUNT(discount) FROM products; -- 非 NULL 行数
SELECT COUNT(DISTINCT department) FROM employees; -- 不同值数量

-- SUM:求和
SELECT SUM(salary) FROM employees;

-- AVG:平均值
SELECT AVG(salary) FROM employees;

-- MAX/MIN:最大值/最小值
SELECT MAX(salary), MIN(salary) FROM employees;

-- 字符串聚合
SELECT STRING_AGG(name, ',') FROM employees;
SELECT STRING_AGG(name, ',' ORDER BY salary DESC) FROM employees;
SELECT STRING_AGG(DISTINCT department, ',') FROM employees;

数组聚合

-- ARRAY_AGG:聚合为数组
SELECT ARRAY_AGG(name) FROM employees;
SELECT ARRAY_AGG(name ORDER BY salary DESC) FROM employees;

-- JSON_AGG:聚合为 JSON 数组
SELECT JSON_AGG(name) FROM employees;
SELECT JSON_AGG(*) FROM employees;

-- JSONB_AGG:聚合为 JSONB 数组
SELECT JSONB_AGG(name) FROM employees;

-- JSON_OBJECT_AGG:聚合为 JSON 对象
SELECT JSON_OBJECT_AGG(name, salary) FROM employees;

窗口聚合

-- 窗口函数:在不分组的情况下计算聚合
SELECT
name,
department,
salary,
AVG(salary) OVER () AS avg_salary, -- 全部平均
SUM(salary) OVER (PARTITION BY department) AS dept_total -- 部门合计
FROM employees;

-- 排名函数
SELECT
name,
department,
salary,
RANK() OVER (ORDER BY salary DESC) AS rank, -- 跳跃排名
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank, -- 连续排名
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num -- 行号
FROM employees;

-- 前后值
SELECT
name,
salary,
LAG(salary) OVER (ORDER BY salary DESC) AS prev_salary,
LEAD(salary) OVER (ORDER BY salary DESC) AS next_salary
FROM employees;

-- 首尾值
SELECT
name,
salary,
FIRST_VALUE(salary) OVER (ORDER BY salary DESC) AS highest,
LAST_VALUE(salary) OVER (ORDER BY salary DESC) AS lowest
FROM employees;

-- 百分位数
SELECT
department,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) AS median,
PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY salary) AS median_disc
FROM employees
GROUP BY department;```

## 窗口函数详解

窗口函数是 PostgreSQL 最强大的特性之一,它允许你在保持原有行数不变的情况下进行聚合计算。理解窗口函数的关键在于理解"窗口"的概念——窗口定义了函数操作的数据范围。

### 窗口函数的基本语法

```sql
function_name([arguments]) OVER (
[PARTITION BY partition_expression]
[ORDER BY sort_expression [ASC|DESC] [NULLS {FIRST|LAST}]]
[frame_clause]
)

各部分说明

  • PARTITION BY:将数据分组,每个分组独立计算窗口函数。如果不指定,整个结果集作为一个分组。
  • ORDER BY:在分组内排序,影响某些窗口函数的计算结果(如排名函数)。
  • frame_clause:定义窗口帧,进一步限制窗口内的数据范围。

PARTITION BY 的作用

PARTITION BY 类似于 GROUP BY,但它不会减少结果行数。每个分区内的行独立计算:

-- 计算每个员工的工资在其部门中的排名
SELECT
name,
department,
salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank,
-- 部门平均工资
ROUND(AVG(salary) OVER (PARTITION BY department), 2) AS dept_avg,
-- 部门人数
COUNT(*) OVER (PARTITION BY department) AS dept_count
FROM employees;

-- 结果说明:
-- name | department | salary | dept_rank | dept_avg | dept_count
-- ------+------------+---------+-----------+----------+------------
-- 张三 | 技术部 | 15000 | 1 | 13500 | 2
-- 李四 | 技术部 | 12000 | 2 | 13500 | 2
-- 王五 | 市场部 | 10000 | 2 | 10500 | 2
-- 赵六 | 市场部 | 11000 | 1 | 10500 | 2

窗口帧(Window Frame)

窗口帧是窗口函数中最灵活但也最容易被忽视的部分。它定义了在当前行周围计算聚合的行范围。

帧语法

{ROWS | RANGE | GROUPS} frame_start [frame_exclusion]
{ROWS | RANGE | GROUPS} BETWEEN frame_start AND frame_end [frame_exclusion]

帧边界选项

边界说明
UNBOUNDED PRECEDING分区第一行
n PRECEDING当前行之前的第 n 行
CURRENT ROW当前行
n FOLLOWING当前行之后的第 n 行
UNBOUNDED FOLLOWING分区最后一行

ROWS vs RANGE 的关键区别

  • ROWS:基于物理行位置,与实际数据值无关
  • RANGE:基于逻辑值范围,考虑 ORDER BY 列的值
-- ROWS:计算当前行及前两行的移动平均
SELECT
name,
salary,
ROUND(AVG(salary) OVER (
ORDER BY salary
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
), 2) AS moving_avg
FROM employees;

-- 结果示例:
-- name | salary | moving_avg
-- ------+--------+------------
-- 周八 | 9000 | 9000.00 -- 只有当前行
-- 王五 | 10000 | 9500.00 -- (9000+10000)/2
-- 赵六 | 11000 | 10000.00 -- (9000+10000+11000)/3
-- 李四 | 12000 | 11000.00 -- (10000+11000+12000)/3
-- 孙七 | 13000 | 12000.00 -- (11000+12000+13000)/3
-- 张三 | 15000 | 13333.33 -- (12000+13000+15000)/3

-- RANGE:基于值的范围计算
-- 查找工资在当前值 ±1000 范围内的平均值
SELECT
name,
salary,
ROUND(AVG(salary) OVER (
ORDER BY salary
RANGE BETWEEN 1000 PRECEDING AND 1000 FOLLOWING
), 2) AS range_avg
FROM employees;

常用窗口帧模式

-- 1. 累计求和(从分区开始到当前行)
SELECT
name,
department,
salary,
SUM(salary) OVER (
PARTITION BY department
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM employees;

-- 2. 移动平均(最近 3 行)
SELECT
name,
salary,
ROUND(AVG(salary) OVER (
ORDER BY salary
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
), 2) AS moving_avg_3
FROM employees;

-- 3. 滑动窗口(前后各一行)
SELECT
name,
salary,
MIN(salary) OVER (
ORDER BY salary
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS local_min,
MAX(salary) OVER (
ORDER BY salary
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS local_max
FROM employees;

-- 4. 累计最大值
SELECT
name,
salary,
MAX(salary) OVER (
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_max
FROM employees;

排名函数详解

三种排名函数的区别:

SELECT 
name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
RANK() OVER (ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;

-- 假设有两个员工工资相同(比如都是 12000)
-- 结果对比:
-- name | salary | row_num | rank | dense_rank
-- -----+--------+---------+------+-----------
-- 张三 | 15000 | 1 | 1 | 1
-- 李四 | 12000 | 2 | 2 | 2
-- 王五 | 12000 | 3 | 2 | 2 ← 相同工资,排名相同
-- 赵六 | 11000 | 4 | 4 | 3 ← RANK 跳跃,DENSE_RANK 连续
-- 孙七 | 10000 | 5 | 5 | 4

-- ROW_NUMBER:每行唯一编号,不考虑重复值
-- RANK:相同值排名相同,但会跳跃(1,2,2,4,5)
-- DENSE_RANK:相同值排名相同,不跳跃(1,2,2,3,4)

LAG 和 LEAD 函数

这两个函数用于访问相邻行的数据,非常适合计算增长率、比较前后值:

-- 计算工资增长率
WITH salary_history AS (
SELECT
name,
EXTRACT(YEAR FROM hire_date) AS year,
salary
FROM employees
)
SELECT
name,
year,
salary,
LAG(salary) OVER (PARTITION BY name ORDER BY year) AS prev_salary,
ROUND(
(salary - LAG(salary) OVER (PARTITION BY name ORDER BY year)) /
LAG(salary) OVER (PARTITION BY name ORDER BY year) * 100,
2) AS growth_rate
FROM salary_history;

-- LAG/LEAD 带默认值
SELECT
name,
salary,
LAG(salary, 1, 0) OVER (ORDER BY salary) AS prev_with_default,
LEAD(salary, 2, NULL) OVER (ORDER BY salary) AS next_next
FROM employees;

-- LAG(salary, 1, 0):取前一行的 salary,如果没有则默认为 0
-- LEAD(salary, 2, NULL):取后两行的 salary,如果没有则默认为 NULL

NTILE 函数:数据分组

NTILE(n) 将数据分成 n 个大致相等的组:

-- 将员工按工资分成 4 个等级(每组人数大致相等)
SELECT
name,
salary,
NTILE(4) OVER (ORDER BY salary DESC) AS quartile
FROM employees;

-- 结果:
-- name | salary | quartile
-- -----+--------+----------
-- 张三 | 15000 | 1 -- 前 25%
-- 李四 | 12000 | 1
-- 孙七 | 13000 | 2 -- 25%-50%
-- 赵六 | 11000 | 2
-- 王五 | 10000 | 3 -- 50%-75%
-- 周八 | 9000 | 4 -- 后 25%

窗口函数与聚合函数结合

-- 计算每个员工工资占部门总工资的比例
SELECT
name,
department,
salary,
ROUND(salary / SUM(salary) OVER (PARTITION BY department) * 100, 2) AS pct_of_dept,
-- 工资占公司总工资的比例
ROUND(salary / SUM(salary) OVER () * 100, 2) AS pct_of_total
FROM employees;

-- 计算员工工资与部门平均工资的差异
SELECT
name,
department,
salary,
ROUND(AVG(salary) OVER (PARTITION BY department), 2) AS dept_avg,
ROUND(salary - AVG(salary) OVER (PARTITION BY department), 2) AS diff_from_avg
FROM employees;

使用命名窗口

当多个窗口函数使用相同的窗口定义时,可以使用命名窗口简化代码:

SELECT 
name,
department,
salary,
AVG(salary) OVER w AS avg_salary,
SUM(salary) OVER w AS total_salary,
COUNT(*) OVER w AS emp_count,
RANK() OVER w AS salary_rank
FROM employees
WINDOW w AS (PARTITION BY department ORDER BY salary DESC);

-- 也可以定义多个窗口
SELECT
name,
department,
salary,
AVG(salary) OVER w1 AS dept_avg,
RANK() OVER w2 AS overall_rank
FROM employees
WINDOW
w1 AS (PARTITION BY department),
w2 AS (ORDER BY salary DESC);

CASE 表达式

简单 CASE

SELECT 
name,
department,
CASE department
WHEN '技术部' THEN '研发'
WHEN '市场部' THEN '营销'
WHEN '财务部' THEN '财务'
ELSE '其他'
END AS dept_cn
FROM employees;

搜索 CASE

SELECT 
name,
salary,
CASE
WHEN salary >= 15000 THEN '高薪'
WHEN salary >= 12000 THEN '中等'
WHEN salary >= 10000 THEN '一般'
ELSE '偏低'
END AS salary_level
FROM employees;

CASE 在聚合中应用

-- 统计各部门男女比例
SELECT
department,
COUNT(*) FILTER (WHERE gender = '男') AS male_count,
COUNT(*) FILTER (WHERE gender = '女') AS female_count,
COUNT(*) FILTER (WHERE gender = '其他') AS other_count
FROM employees
GROUP BY department;

-- 使用 SUM 替代 COUNT...FILTER
SELECT
SUM(CASE WHEN gender = '男' THEN 1 ELSE 0 END) AS male_count
FROM employees;

联合查询

UNION 和 UNION ALL

-- UNION:合并并去重
SELECT name, salary FROM employees WHERE department = '技术部'
UNION
SELECT name, salary FROM employees WHERE department = '市场部';

-- UNION ALL:合并但不去重(更快)
SELECT name, salary FROM employees WHERE department = '技术部'
UNION ALL
SELECT name, salary FROM employees WHERE department = '市场部';

-- UNION 排序
SELECT name, salary FROM employees WHERE department = '技术部'
UNION
SELECT name, salary FROM employees WHERE department = '市场部'
ORDER BY salary DESC;

INTERSECT 和 EXCEPT

-- INTERSECT:交集
SELECT name FROM employees WHERE salary > 10000
INTERSECT
SELECT name FROM employees WHERE department = '技术部';

-- EXCEPT:差集
SELECT name FROM employees
EXCEPT
SELECT name FROM employees WHERE status = 'inactive';

LATERAL JOIN(横向连接)

LATERAL JOIN 是 PostgreSQL 中一个强大但常被忽视的查询特性。它允许子查询引用其左侧表中的列,就像编程语言中的循环一样,为每一行执行一次子查询。理解 LATERAL JOIN 的关键在于理解"横向"的概念——普通的 JOIN 是纵向组合数据(增加行),而 LATERAL JOIN 可以横向展开数据(为每行生成多行)。

为什么需要 LATERAL JOIN

考虑这样一个需求:找出每个客户最近的两笔订单。传统的解决方案面临两难选择:

方案一:使用窗口函数

SELECT * FROM (
SELECT
c.name,
o.id AS order_id,
o.amount,
ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY o.created_at DESC) AS rn
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
) t
WHERE rn <= 2;

这种方式需要扫描所有订单,然后在内存中排序和编号,当数据量大时效率不高。

方案二:使用 correlated subquery(相关子查询)

SELECT 
c.name,
(SELECT o.id FROM orders o WHERE o.customer_id = c.id ORDER BY o.created_at DESC LIMIT 1) AS latest_order,
(SELECT o.id FROM orders o WHERE o.customer_id = c.id ORDER BY o.created_at DESC LIMIT 1 OFFSET 1) AS second_order
FROM customers c;

这种方式每个客户需要执行两次子查询,当需要 N 条记录时就需要 N 个子查询,代码冗长且性能差。

LATERAL JOIN 提供了完美的解决方案:一次性为每个客户执行子查询,返回任意数量的行。

LATERAL JOIN 的工作原理

LATERAL 关键字使得右侧的子查询可以引用左侧表中的列。PostgreSQL 会依次处理左侧表的每一行,为每行执行一次右侧的子查询。

普通 JOIN:
左表 ──┐
├── 组合 ── 结果
右表 ──┘

LATERAL JOIN:
左表第1行 ──→ 执行子查询 ──→ 生成多行结果
左表第2行 ──→ 执行子查询 ──→ 生成多行结果
...

基本语法

SELECT columns
FROM table1
[LEFT] JOIN LATERAL (subquery) alias ON true;

关键点

  • LATERAL 关键字放在子查询前面
  • 子查询可以引用 table1 中的列
  • 通常使用 ON true 作为连接条件
  • 支持 JOIN LATERALLEFT JOIN LATERAL(后者保证左表行即使子查询无结果也会保留)

经典应用:Top-N 查询

获取每个客户最近的两笔订单:

-- 创建测试数据
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(50)
);

CREATE TABLE orders (
id SERIAL PRIMARY KEY,
customer_id INTEGER REFERENCES customers(id),
amount NUMERIC(10, 2),
created_at TIMESTAMP DEFAULT NOW()
);

INSERT INTO customers (name) VALUES ('张三'), ('李四'), ('王五');
INSERT INTO orders (customer_id, amount, created_at) VALUES
(1, 100, '2024-01-01'),
(1, 200, '2024-01-15'),
(1, 50, '2024-02-01'),
(2, 300, '2024-01-20'),
(2, 150, '2024-02-10');

-- 使用 LATERAL JOIN 获取每个客户最近两笔订单
SELECT
c.name,
o.id AS order_id,
o.amount,
o.created_at
FROM customers c
LEFT JOIN LATERAL (
SELECT id, amount, created_at
FROM orders
WHERE customer_id = c.id
ORDER BY created_at DESC
LIMIT 2
) o ON true
ORDER BY c.name, o.created_at DESC;

-- 结果:
-- name | order_id | amount | created_at
-- -----+----------+--------+------------
-- 张三 | 3 | 50.00 | 2024-02-01
-- 张三 | 2 | 200.00 | 2024-01-15
-- 李四 | 5 | 150.00 | 2024-02-10
-- 李四 | 4 | 300.00 | 2024-01-20
-- 王五 | NULL | NULL | NULL -- 无订单的客户也会显示

与集合返回函数配合

PostgreSQL 有许多集合返回函数(set-returning functions),如 generate_seriesunnestjsonb_array_elements 等。LATERAL JOIN 可以让这些函数基于每一行的数据执行。

示例:生成时间序列数据

-- 为每个员工生成未来 7 天的排班表
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(50)
);

INSERT INTO employees (name) VALUES ('张三'), ('李四');

SELECT
e.name,
schedule_date,
'排班' AS task
FROM employees e
CROSS JOIN LATERAL generate_series(
CURRENT_DATE,
CURRENT_DATE + INTERVAL '6 days',
INTERVAL '1 day'
) AS schedule_date
ORDER BY e.name, schedule_date;

-- 结果(部分):
-- name | schedule_date | task
-- -----+---------------+------
-- 张三 | 2024-03-01 | 排班
-- 张三 | 2024-03-02 | 排班
-- ...
-- 李四 | 2024-03-01 | 排班
-- ...

-- 注:CROSS JOIN LATERAL 等价于 JOIN LATERAL ... ON true
-- 当确定子查询总是返回结果时可以使用

示例:展开 JSON 数组

-- 每个产品的标签展开为多行
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(50),
tags JSONB
);

INSERT INTO products (name, tags) VALUES
('笔记本电脑', '["电子", "办公", "高端"]'::jsonb),
('鼠标', '["电子", "配件"]'::jsonb);

SELECT
p.name,
tag
FROM products p
JOIN LATERAL jsonb_array_elements_text(p.tags) AS tag ON true
ORDER BY p.name, tag;

-- 结果:
-- name | tag
-- ------------+------
-- 笔记本电脑 | 办公
-- 笔记本电脑 | 电子
-- 笔记本电脑 | 高端
-- 鼠标 | 电子
-- 鼠标 | 配件

复杂计算场景

LATERAL JOIN 非常适合需要为每行执行复杂计算的场景。

示例:计算每个客户的统计信息

-- 获取每个客户的订单统计和最近订单信息
SELECT
c.name,
stats.total_orders,
stats.total_amount,
stats.avg_amount,
latest.order_id AS latest_order_id,
latest.amount AS latest_amount
FROM customers c
LEFT JOIN LATERAL (
SELECT
COUNT(*) AS total_orders,
SUM(amount) AS total_amount,
AVG(amount) AS avg_amount
FROM orders
WHERE customer_id = c.id
) stats ON true
LEFT JOIN LATERAL (
SELECT id AS order_id, amount
FROM orders
WHERE customer_id = c.id
ORDER BY created_at DESC
LIMIT 1
) latest ON true;

-- 结果:
-- name | total_orders | total_amount | avg_amount | latest_order_id | latest_amount
-- -----+--------------+--------------+------------+-----------------+---------------
-- 张三 | 3 | 350.00 | 116.67 | 3 | 50.00
-- 李四 | 2 | 450.00 | 225.00 | 5 | 150.00
-- 王五 | 0 | NULL | NULL | NULL | NULL

分组后取 Top-N

这是一个非常实用的模式:先分组,然后在每个组内取前 N 条记录。

-- 获取每个部门工资最高的两名员工
SELECT
d.dept_name,
top_employees.name,
top_employees.salary
FROM departments d
LEFT JOIN LATERAL (
SELECT e.name, e.salary
FROM employees e
WHERE e.department_id = d.id
ORDER BY e.salary DESC
LIMIT 2
) top_employees ON true
ORDER BY d.dept_name, top_employees.salary DESC;

LATERAL JOIN 与递归 CTE 配合

LATERAL JOIN 可以与递归 CTE 结合,处理树形结构数据。

-- 查找每个分类及其所有子分类的路径
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(50),
parent_id INTEGER REFERENCES categories(id)
);

INSERT INTO categories (name, parent_id) VALUES
('电子产品', NULL),
('手机', 1),
('笔记本电脑', 1),
('智能手机', 2),
('功能机', 2);

WITH RECURSIVE category_tree AS (
-- 基础查询:顶级分类
SELECT id, name, parent_id, name::TEXT AS path, 1 AS depth
FROM categories
WHERE parent_id IS NULL

UNION ALL

-- 递归查询:子分类
SELECT c.id, c.name, c.parent_id, ct.path || ' > ' || c.name, ct.depth + 1
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT
ct.name,
ct.path,
product_count.count
FROM category_tree ct
LEFT JOIN LATERAL (
SELECT COUNT(*) AS count
FROM products p
WHERE p.category_id = ct.id
) product_count ON true
ORDER BY ct.path;

性能优化建议

使用索引优化 LATERAL 子查询

-- 为 LATERAL 子查询中的条件创建索引
CREATE INDEX idx_orders_customer_date ON orders(customer_id, created_at DESC);

-- 这样 LATERAL 子查询可以使用索引快速定位
SELECT c.name, o.amount
FROM customers c
JOIN LATERAL (
SELECT amount
FROM orders
WHERE customer_id = c.id
ORDER BY created_at DESC
LIMIT 1
) o ON true;

避免 LATERAL 子查询返回过多行

-- 好的做法:使用 LIMIT 限制返回行数
SELECT * FROM customers c
JOIN LATERAL (
SELECT * FROM orders
WHERE customer_id = c.id
ORDER BY created_at DESC
LIMIT 10 -- 限制每行最多返回 10 条
) o ON true;

-- 不好的做法:可能返回大量数据
SELECT * FROM customers c
JOIN LATERAL (
SELECT * FROM orders WHERE customer_id = c.id
) o ON true; -- 某些客户可能有成千上万订单

LATERAL JOIN vs 其他方案对比

方案优点缺点
LATERAL JOIN灵活、可读性好、支持任意 N 条记录每行执行一次子查询
窗口函数单次扫描、性能好需要扫描全部数据
相关子查询简单场景下简洁只能返回单个值
递归 CTE适合层级数据语法复杂

选择建议

  • 需要为每行返回少量相关记录:使用 LATERAL JOIN
  • 需要对所有数据进行排名或聚合:使用窗口函数
  • 只需要单个值:使用相关子查询

公用表表达式(CTE)

公用表表达式(Common Table Expression,简称 CTE)是 PostgreSQL 中一种强大的查询构造方式。它允许你为复杂查询定义临时的命名结果集,这些结果集只在当前查询中有效。CTE 不仅可以简化复杂查询的编写,还能实现递归查询等普通 SQL 无法完成的功能。

CTE 的核心价值

理解 CTE 的关键在于理解它解决的问题。当我们面对复杂的查询需求时,往往会遇到以下困境:

问题一:嵌套子查询难以阅读和维护

-- 嵌套子查询:难以理解每层的作用
SELECT * FROM (
SELECT * FROM (
SELECT department, AVG(salary) AS avg_sal
FROM employees
GROUP BY department
) dept_avg
WHERE avg_sal > 10000
) high_paid_depts;

问题二:相同的子查询需要重复编写

-- 同一个子查询写了两次
SELECT
(SELECT AVG(salary) FROM employees WHERE department = '技术部') AS tech_avg,
(SELECT AVG(salary) FROM employees WHERE department = '技术部') + 1000 AS tech_target;

CTE 优雅地解决了这些问题:

-- 使用 CTE:结构清晰,易于理解
WITH dept_avg AS (
SELECT department, AVG(salary) AS avg_sal
FROM employees
GROUP BY department
),
high_paid_depts AS (
SELECT * FROM dept_avg WHERE avg_sal > 10000
)
SELECT * FROM high_paid_depts;

基本 CTE 语法

CTE 使用 WITH 关键字定义,基本语法结构如下:

WITH cte_name AS (
-- CTE 查询定义
SELECT ...
)
-- 主查询,可以引用 CTE
SELECT ... FROM cte_name;

简单示例

-- 定义一个简单的 CTE
WITH high_salary_employees AS (
SELECT name, department, salary
FROM employees
WHERE salary > 12000
)
-- 在主查询中使用 CTE
SELECT * FROM high_salary_employees
WHERE department = '技术部';

多个 CTE

可以在一个 WITH 子句中定义多个 CTE,后面的 CTE 可以引用前面的 CTE:

-- 多个 CTE 示例:分析销售数据
WITH
-- 第一个 CTE:计算各地区销售额
regional_sales AS (
SELECT region, SUM(amount) AS total_sales
FROM orders
GROUP BY region
),
-- 第二个 CTE:找出高销售地区(引用第一个 CTE)
top_regions AS (
SELECT region
FROM regional_sales
WHERE total_sales > (SELECT AVG(total_sales) FROM regional_sales)
)
-- 主查询:返回高销售地区的产品销售明细
SELECT
o.region,
o.product,
SUM(o.quantity) AS product_units,
SUM(o.amount) AS product_sales
FROM orders o
WHERE o.region IN (SELECT region FROM top_regions)
GROUP BY o.region, o.product;

CTE 的执行顺序:多个 CTE 按定义顺序依次计算,后面的 CTE 可以引用前面的结果,但前面的 CTE 不能引用后面的。

CTE vs 子查询 vs 临时表

特性CTE子查询临时表
可读性
可复用性同一查询内可复用需要重复编写整个会话可复用
性能可能物化通常内联持久化存储
作用域当前查询当前位置当前会话
索引支持无(除非物化)
-- CTE 可以在同一个查询中多次引用
WITH active_users AS (
SELECT * FROM users WHERE status = 'active'
)
SELECT
(SELECT COUNT(*) FROM active_users) AS total,
(SELECT AVG(age) FROM active_users) AS avg_age;
-- active_users 只计算一次,两次引用共享结果

递归 CTE(WITH RECURSIVE)

递归 CTE 是 CTE 最强大的功能,它可以引用自身的输出,用于处理层级数据、图遍历等场景。

递归 CTE 的基本结构

WITH RECURSIVE cte_name AS (
-- 基础查询(非递归部分):初始数据
SELECT ...

UNION ALL

-- 递归查询:引用自身
SELECT ... FROM cte_name WHERE ...
)
SELECT * FROM cte_name;

执行过程

  1. 执行基础查询,得到初始结果集
  2. 将初始结果作为"工作表"
  3. 递归查询使用工作表数据,产生新结果
  4. 新结果成为新的工作表
  5. 重复步骤 3-4,直到递归查询不再产生新数据

示例一:生成数字序列

-- 生成 1 到 10 的数字序列
WITH RECURSIVE numbers AS (
-- 基础查询:从 1 开始
SELECT 1 AS n

UNION ALL

-- 递归查询:每次加 1
SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;

-- 结果:1, 2, 3, 4, 5, 6, 7, 8, 9, 10

示例二:组织架构层级查询

-- 创建员工表(包含上级关系)
CREATE TABLE staff (
id INTEGER PRIMARY KEY,
name VARCHAR(50),
manager_id INTEGER REFERENCES staff(id)
);

INSERT INTO staff VALUES
(1, 'CEO', NULL),
(2, 'CTO', 1),
(3, 'CFO', 1),
(4, '开发经理', 2),
(5, '测试经理', 2),
(6, '会计', 3),
(7, '开发工程师A', 4),
(8, '开发工程师B', 4),
(9, '测试工程师', 5);

-- 递归查询完整的组织架构
WITH RECURSIVE org_tree AS (
-- 基础查询:顶级员工(没有上级)
SELECT
id,
name,
manager_id,
1 AS level,
name::TEXT AS path
FROM staff
WHERE manager_id IS NULL

UNION ALL

-- 递归查询:下级员工
SELECT
s.id,
s.name,
s.manager_id,
ot.level + 1,
ot.path || ' > ' || s.name
FROM staff s
INNER JOIN org_tree ot ON s.manager_id = ot.id
)
SELECT
REPEAT(' ', level - 1) || name AS org_chart,
level,
path
FROM org_tree
ORDER BY path;

-- 结果:
-- org_chart | level | path
-- ---------------+-------+-----------------------------
-- CEO | 1 | CEO
-- CTO | 2 | CEO > CTO
-- 开发经理 | 3 | CEO > CTO > 开发经理
-- 开发工程师A| 4 | CEO > CTO > 开发经理 > 开发工程师A
-- 开发工程师B| 4 | CEO > CTO > 开发经理 > 开发工程师B
-- 测试经理 | 3 | CEO > CTO > 测试经理
-- 测试工程师| 4 | CEO > CTO > 测试经理 > 测试工程师
-- CFO | 2 | CEO > CFO
-- 会计 | 3 | CEO > CFO > 会计

示例三:计算阶乘

-- 计算 5 的阶乘
WITH RECURSIVE factorial AS (
SELECT 1 AS n, 1 AS result

UNION ALL

SELECT n + 1, result * (n + 1)
FROM factorial
WHERE n < 5
)
SELECT result FROM factorial WHERE n = 5;
-- 结果:120 (即 5! = 1 * 2 * 3 * 4 * 5)

搜索顺序:深度优先与广度优先

处理树形结构时,可能需要控制遍历顺序。PostgreSQL 提供了 SEARCH 子句来指定搜索顺序。

深度优先搜索(DFS)

-- 深度优先:先遍历完一个分支再遍历下一个
WITH RECURSIVE org_tree AS (
SELECT id, name, manager_id
FROM staff
WHERE manager_id IS NULL

UNION ALL

SELECT s.id, s.name, s.manager_id
FROM staff s
INNER JOIN org_tree ot ON s.manager_id = ot.id
) SEARCH DEPTH FIRST BY id SET order_col
SELECT name, order_col FROM org_tree ORDER BY order_col;

广度优先搜索(BFS)

-- 广度优先:按层级顺序遍历
WITH RECURSIVE org_tree AS (
SELECT id, name, manager_id
FROM staff
WHERE manager_id IS NULL

UNION ALL

SELECT s.id, s.name, s.manager_id
FROM staff s
INNER JOIN org_tree ot ON s.manager_id = ot.id
) SEARCH BREADTH FIRST BY id SET order_col
SELECT name, order_col FROM org_tree ORDER BY order_col;

手动实现搜索顺序

-- 手动实现深度优先:使用路径数组
WITH RECURSIVE org_tree AS (
SELECT
id,
name,
manager_id,
ARRAY[id] AS path -- 初始路径
FROM staff
WHERE manager_id IS NULL

UNION ALL

SELECT
s.id,
s.name,
s.manager_id,
ot.path || s.id -- 追加到路径
FROM staff s
INNER JOIN org_tree ot ON s.manager_id = ot.id
)
SELECT name, path FROM org_tree ORDER BY path;

-- 手动实现广度优先:使用层级计数
WITH RECURSIVE org_tree AS (
SELECT
id,
name,
manager_id,
0 AS depth -- 顶层深度为 0
FROM staff
WHERE manager_id IS NULL

UNION ALL

SELECT
s.id,
s.name,
s.manager_id,
ot.depth + 1 -- 子节点深度加 1
FROM staff s
INNER JOIN org_tree ot ON s.manager_id = ot.id
)
SELECT name, depth FROM org_tree ORDER BY depth, name;

循环检测

处理图数据时,可能存在循环引用。PostgreSQL 提供 CYCLE 子句自动检测循环。

使用 CYCLE 子句

-- 创建图数据表
CREATE TABLE graph (
id INTEGER,
link INTEGER, -- 指向的节点
data TEXT
);

INSERT INTO graph VALUES
(1, 2, 'A'),
(2, 3, 'B'),
(3, 1, 'C'), -- 形成循环:1 -> 2 -> 3 -> 1
(4, 5, 'D'),
(5, NULL, 'E');

-- 使用 CYCLE 检测循环
WITH RECURSIVE search_graph AS (
SELECT id, link, data
FROM graph
WHERE id = 1 -- 从节点 1 开始

UNION ALL

SELECT g.id, g.link, g.data
FROM graph g
INNER JOIN search_graph sg ON g.id = sg.link
) CYCLE id SET is_cycle USING path
SELECT * FROM search_graph;

-- 结果:
-- id | link | data | is_cycle | path
-- ---+------+------+----------+-------
-- 1 | 2 | A | false | {1}
-- 2 | 3 | B | false | {1,2}
-- 3 | 1 | C | false | {1,2,3}
-- 1 | 2 | A | true | {1,2,3,1} -- 检测到循环!

手动实现循环检测

WITH RECURSIVE search_graph AS (
SELECT
id,
link,
data,
ARRAY[id] AS path, -- 已访问的节点
FALSE AS is_cycle
FROM graph
WHERE id = 1

UNION ALL

SELECT
g.id,
g.link,
g.data,
sg.path || g.id,
g.id = ANY(sg.path) -- 检查是否已访问
FROM graph g
INNER JOIN search_graph sg ON g.id = sg.link
WHERE NOT sg.is_cycle -- 如果已检测到循环,停止递归
)
SELECT * FROM search_graph;

CTE 物化(MATERIALIZED)

PostgreSQL 优化器会自动决定是否将 CTE 结果物化(存储到临时存储)。你可以使用 MATERIALIZEDNOT MATERIALIZED 显式控制此行为。

MATERIALIZED(物化)

-- 强制物化:CTE 结果存储到临时存储
-- 适合:CTE 被多次引用,或包含昂贵计算
WITH expensive_calc AS MATERIALIZED (
SELECT id, very_expensive_function(data) AS result
FROM large_table
)
SELECT * FROM expensive_calc e1
JOIN expensive_calc e2 ON e1.result = e2.result;
-- very_expensive_function 只执行一次

NOT MATERIALIZED(不物化)

-- 不物化:CTE 内联到主查询
-- 适合:CTE 只被引用一次,且主查询有过滤条件
WITH w AS NOT MATERIALIZED (
SELECT * FROM large_table
)
SELECT * FROM w WHERE key = 123;
-- 等价于:SELECT * FROM large_table WHERE key = 123
-- 如果有索引,会使用索引

默认行为

-- 默认行为:
-- - CTE 只被引用一次:不物化(PostgreSQL 12+)
-- - CTE 被多次引用:物化

-- 被多次引用,默认物化
WITH w AS (
SELECT * FROM large_table
)
SELECT * FROM w w1 JOIN w w2 ON w1.key = w2.ref;
-- 没有索引优化,全表扫描后自连接

-- 使用 NOT MATERIALIZED 优化
WITH w AS NOT MATERIALIZED (
SELECT * FROM large_table
)
SELECT * FROM w w1 JOIN w w2 ON w1.key = w2.ref
WHERE w2.key = 123;
-- 可以利用索引

数据修改语句中的 CTE

CTE 不仅可以用于 SELECT,还可以包含 INSERT、UPDATE、DELETE 语句,实现复杂的数据操作。

基本示例

-- 将数据从一个表移动到另一个表
WITH moved_rows AS (
DELETE FROM products
WHERE created_at < '2020-01-01'
RETURNING *
)
INSERT INTO products_archive
SELECT * FROM moved_rows;

复杂操作示例

-- 更新并返回新旧值对比
WITH
old_data AS (
SELECT id, price FROM products WHERE id = 1
),
update_result AS (
UPDATE products
SET price = price * 1.1
WHERE id = 1
RETURNING id, price
)
SELECT
o.id,
o.price AS old_price,
u.price AS new_price,
u.price - o.price AS price_diff
FROM old_data o
CROSS JOIN update_result u;

多表操作

-- 复杂的订单处理流程
WITH
-- 标记订单为已处理
processed AS (
UPDATE orders
SET status = 'processed', processed_at = NOW()
WHERE status = 'pending'
RETURNING id, customer_id, total_amount
),
-- 更新客户统计
customer_update AS (
UPDATE customers c
SET
total_orders = total_orders + 1,
total_spent = total_spent + p.total_amount
FROM processed p
WHERE c.id = p.customer_id
RETURNING c.id, c.name
),
-- 记录处理日志
log_insert AS (
INSERT INTO order_logs (order_id, action, created_at)
SELECT id, 'processed', NOW() FROM processed
RETURNING order_id
)
SELECT
p.id AS order_id,
cu.name AS customer_name,
p.total_amount
FROM processed p
JOIN customer_update cu ON p.customer_id = cu.id;

注意事项

-- 数据修改 CTE 中的语句总是执行完毕
-- 即使主查询没有读取所有结果

WITH t AS (
UPDATE products SET price = price * 1.05 RETURNING *
)
SELECT * FROM products; -- 返回更新前的数据

WITH t AS (
UPDATE products SET price = price * 1.05 RETURNING *
)
SELECT * FROM t; -- 返回更新后的数据

-- 多个数据修改 CTE 同时执行,使用相同的快照
-- 因此它们不能"看到"彼此的修改
-- 只有通过 RETURNING 传递的数据才能被其他部分看到

CTE 最佳实践

1. 使用 CTE 提高可读性

-- 好的做法:分步骤清晰表达逻辑
WITH
monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(amount) AS total
FROM orders
GROUP BY month
),
avg_sales AS (
SELECT AVG(total) AS avg_total FROM monthly_sales
)
SELECT m.month, m.total,
ROUND(m.total / a.avg_total * 100, 2) AS pct_of_avg
FROM monthly_sales m
CROSS JOIN avg_sales a
ORDER BY m.month;

2. 避免过度使用 CTE

-- 过度使用:简单的查询不需要 CTE
WITH users AS (
SELECT * FROM employees WHERE department = '技术部'
)
SELECT * FROM users WHERE salary > 10000;

-- 直接写更简洁
SELECT * FROM employees
WHERE department = '技术部' AND salary > 10000;

3. 使用 CTE 避免重复计算

-- 不好的做法:重复计算
SELECT
(SELECT COUNT(*) FROM orders WHERE status = 'completed') AS completed,
(SELECT COUNT(*) FROM orders WHERE status = 'completed') * 100.0 /
(SELECT COUNT(*) FROM orders) AS completed_pct;

-- 好的做法:使用 CTE 避免重复
WITH order_stats AS (
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE status = 'completed') AS completed
FROM orders
)
SELECT
completed,
ROUND(completed * 100.0 / total, 2) AS completed_pct
FROM order_stats;

4. 递归 CTE 添加深度限制

-- 添加深度限制防止无限递归
WITH RECURSIVE org_tree AS (
SELECT id, name, manager_id, 1 AS depth
FROM staff
WHERE manager_id IS NULL

UNION ALL

SELECT s.id, s.name, s.manager_id, ot.depth + 1
FROM staff s
INNER JOIN org_tree ot ON s.manager_id = ot.id
WHERE ot.depth < 10 -- 限制最大深度
)
SELECT * FROM org_tree;

JSON_TABLE:将 JSON 转换为表(PostgreSQL 17+)

PostgreSQL 17 引入了 SQL 标准的 JSON_TABLE 函数,它可以将 JSON 数据转换为关系表格式,使得查询 JSON 数据变得更加直观和高效。

基本语法

JSON_TABLE(
json_data, -- JSON 数据源
'$.path' -- JSON 路径表达式
COLUMNS (
column_name data_type PATH 'json_path',
...
)
) AS alias

基本示例

-- 创建包含 JSON 数据的表
CREATE TABLE orders_json (
id SERIAL PRIMARY KEY,
data JSONB
);

INSERT INTO orders_json (data) VALUES
('{
"order_id": "ORD-001",
"customer": {
"name": "张三",
"email": "[email protected]"
},
"items": [
{"product": "笔记本电脑", "quantity": 1, "price": 5999},
{"product": "鼠标", "quantity": 2, "price": 99}
],
"total": 6197
}');

-- 使用 JSON_TABLE 解析 JSON 为表格
SELECT *
FROM orders_json o,
JSON_TABLE(
o.data,
'$'
COLUMNS (
order_id TEXT PATH '$.order_id',
customer_name TEXT PATH '$.customer.name',
customer_email TEXT PATH '$.customer.email',
total NUMERIC PATH '$.total'
)
) AS jt;

-- 结果:
-- order_id | customer_name | customer_email | total
-- ----------+---------------+----------------------+-------
-- ORD-001 | 张三 | [email protected] | 6197

处理 JSON 数组

-- 使用 JSON_TABLE 解析数组中的每个元素
SELECT
o.id,
jt.*
FROM orders_json o,
JSON_TABLE(
o.data,
'$.items[*]'
COLUMNS (
product TEXT PATH '$.product',
quantity INTEGER PATH '$.quantity',
price NUMERIC PATH '$.price'
)
) AS jt;

-- 结果:
-- id | product | quantity | price
-- ---+--------------+----------+------
-- 1 | 笔记本电脑 | 1 | 5999
-- 1 | 鼠标 | 2 | 99

嵌套 JSON 解析

-- 使用 NESTED 解析嵌套结构
SELECT *
FROM orders_json o,
JSON_TABLE(
o.data,
'$'
COLUMNS (
order_id TEXT PATH '$.order_id',
NESTED PATH '$.items[*]'
COLUMNS (
product TEXT PATH '$.product',
quantity INTEGER PATH '$.quantity',
price NUMERIC PATH '$.price'
)
)
) AS jt;

-- 结果包含订单信息和每个商品明细

处理可能缺失的字段

-- 使用 DEFAULT ON EMPTY 处理缺失值
SELECT *
FROM orders_json o,
JSON_TABLE(
o.data,
'$'
COLUMNS (
order_id TEXT PATH '$.order_id',
-- 如果字段不存在,使用默认值
discount NUMERIC PATH '$.discount' DEFAULT 0 ON EMPTY,
-- 如果值不是有效数字,使用默认值
note TEXT PATH '$.note' DEFAULT '无备注' ON EMPTY
)
) AS jt;

错误处理

-- 使用 ERROR ON ERROR 在数据类型转换失败时报错
SELECT *
FROM orders_json o,
JSON_TABLE(
o.data,
'$'
COLUMNS (
order_id TEXT PATH '$.order_id',
-- 如果转换失败则报错
total INTEGER PATH '$.total' ERROR ON ERROR
)
) AS jt;

-- 使用 NULL ON ERROR 在转换失败时返回 NULL
SELECT *
FROM orders_json o,
JSON_TABLE(
o.data,
'$'
COLUMNS (
-- 如果转换失败返回 NULL
some_value INTEGER PATH '$.maybe_number' NULL ON ERROR
)
) AS jt;

实际应用场景

-- 场景:解析 API 返回的复杂 JSON 响应
CREATE TABLE api_responses (
id SERIAL PRIMARY KEY,
endpoint VARCHAR(100),
response JSONB,
created_at TIMESTAMP DEFAULT NOW()
);

INSERT INTO api_responses (endpoint, response) VALUES
('users', '{
"data": [
{"id": 1, "name": "Alice", "email": "[email protected]", "roles": ["admin", "user"]},
{"id": 2, "name": "Bob", "email": "[email protected]", "roles": ["user"]}
],
"meta": {"total": 2, "page": 1}
}');

-- 解析用户数据
SELECT
r.endpoint,
jt.*
FROM api_responses r,
JSON_TABLE(
r.response,
'$.data[*]'
COLUMNS (
user_id INTEGER PATH '$.id',
name TEXT PATH '$.name',
email TEXT PATH '$.email',
-- 将数组转换为文本
roles TEXT PATH '$.roles'
)
) AS jt;

JSON_TABLE 与其他 JSON 函数结合

-- 结合 JSON_ARRAY_ELEMENTS 和 JSON_TABLE
WITH flattened AS (
SELECT
id,
JSON_ARRAY_ELEMENTS(data->'items') AS item
FROM orders_json
)
SELECT
f.id,
jt.*
FROM flattened f,
JSON_TABLE(
f.item,
'$'
COLUMNS (
product TEXT PATH '$.product',
quantity INTEGER PATH '$.quantity',
price NUMERIC PATH '$.price'
)
) AS jt;

RETURNING 增强(PostgreSQL 18+)

PostgreSQL 18 增强了 RETURNING 子句,允许在 INSERT、UPDATE、DELETE 和 MERGE 语句中使用 OLD 和 NEW 别名。

UPDATE 返回新旧值

-- 创建测试表
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
price NUMERIC(10, 2),
updated_at TIMESTAMP DEFAULT NOW()
);

INSERT INTO products (name, price) VALUES ('笔记本电脑', 5999);

-- 更新并返回修改前后的值
UPDATE products
SET price = price * 1.1
WHERE name = '笔记本电脑'
RETURNING
name,
OLD.price AS old_price,
NEW.price AS new_price,
NEW.price - OLD.price AS price_change;

-- 结果:
-- name | old_price | new_price | price_change
-- ------------+-----------+-----------+-------------
-- 笔记本电脑 | 5999.00 | 6598.90 | 599.90

DELETE 返回被删除的值

-- 删除并返回被删除的数据
DELETE FROM products
WHERE id = 1
RETURNING
OLD.id,
OLD.name,
OLD.price;

-- 对于 DELETE,NEW 返回 NULL
DELETE FROM products
WHERE price < 100
RETURNING
OLD.name AS deleted_name,
NEW.name AS new_name; -- NULL

INSERT 返回新值

-- 对于 INSERT,OLD 返回 NULL,NEW 返回新值
INSERT INTO products (name, price)
VALUES ('键盘', 299)
RETURNING
OLD.id AS old_id, -- NULL
NEW.id AS new_id, -- 新生成的 ID
NEW.name,
NEW.price;

MERGE 返回操作信息

-- 创建目标表和源表
CREATE TABLE target_products (
id INTEGER PRIMARY KEY,
name VARCHAR(100),
price NUMERIC(10, 2)
);

CREATE TABLE source_products (
id INTEGER PRIMARY KEY,
name VARCHAR(100),
price NUMERIC(10, 2)
);

INSERT INTO target_products VALUES (1, '产品A', 100);
INSERT INTO source_products VALUES (1, '产品A-更新', 120);
INSERT INTO source_products VALUES (2, '产品B', 200);

-- 使用 MERGE 并返回详细信息
MERGE INTO target_products t
USING source_products s ON t.id = s.id
WHEN MATCHED THEN
UPDATE SET name = s.name, price = s.price
WHEN NOT MATCHED THEN
INSERT (id, name, price) VALUES (s.id, s.name, s.price)
RETURNING
action,
OLD.id AS old_id,
OLD.name AS old_name,
OLD.price AS old_price,
NEW.id AS new_id,
NEW.name AS new_name,
NEW.price AS new_price;

实际应用:审计日志

-- 创建审计日志表
CREATE TABLE price_audit_log (
id SERIAL PRIMARY KEY,
product_id INTEGER,
old_price NUMERIC,
new_price NUMERIC,
change_time TIMESTAMP DEFAULT NOW()
);

-- 更新价格并自动记录审计日志
WITH updated AS (
UPDATE products
SET price = price * 1.05
WHERE id = 1
RETURNING
id,
OLD.price AS old_price,
NEW.price AS new_price
)
INSERT INTO price_audit_log (product_id, old_price, new_price)
SELECT id, old_price, new_price FROM updated;

-- 查看审计日志
SELECT * FROM price_audit_log;

小结

本章我们学习了 PostgreSQL 查询的各个方面:

  1. 基础查询:SELECT、列选择、表达式计算
  2. 条件查询:WHERE、LIKE、NULL 处理
  3. 排序:ORDER BY、多列排序、NULL 位置
  4. 分页:LIMIT、OFFSET
  5. 分组:GROUP BY、HAVING、ROLLUP、CUBE
  6. 聚合函数:COUNT、SUM、AVG、MAX、MIN、字符串聚合
  7. 窗口函数:排名、LAG/LEAD、FIRST/LAST_VALUE、PERCENTILE
  8. CASE 表达式:条件转换
  9. 联合查询:UNION、INTERSECT、EXCEPT
  10. LATERAL JOIN:横向连接、Top-N 查询、与集合返回函数配合
  11. 公用表表达式:基本 CTE、递归查询、循环检测、物化控制、数据修改语句中的 CTE
  12. JSON_TABLE:将 JSON 数据转换为关系表(PostgreSQL 17+)
  13. RETURNING 增强:使用 OLD/NEW 获取修改前后的值(PostgreSQL 18+)

练习

  1. 查询所有工资在 10000-15000 之间的员工
  2. 按部门统计平均工资,只显示平均工资最高的部门
  3. 查询每个部门的工资前三名
  4. 计算每个员工与所在部门平均工资的差距
  5. 统计各部门不同状态员工的数量
  6. 使用 CTE 重写一个包含多层嵌套子查询的复杂查询
  7. 使用递归 CTE 查询员工的完整汇报链(从 CEO 到最底层员工)
  8. 使用数据修改 CTE 实现数据归档:将一年前的订单移动到历史表
  9. 使用 LATERAL JOIN 查询每个客户金额最大的三笔订单
  10. 使用 LATERAL JOIN 配合 jsonb_array_elements 展开每个产品标签为独立行