聚合函数
聚合函数对一组值进行计算,返回单个值。本章将详细介绍常用的聚合函数。
什么是聚合函数?
聚合函数对多行数据进行汇总计算,常用于统计和分析:
| 函数 | 说明 |
|---|---|
| COUNT() | 计数 |
| SUM() | 求和 |
| AVG() | 平均值 |
| MAX() | 最大值 |
| MIN() | 最小值 |
COUNT 计数
基本用法
-- 统计行数
SELECT COUNT(*) FROM users; -- 统计所有行
SELECT COUNT(id) FROM users; -- 统计非 NULL 的 id
SELECT COUNT(phone) FROM users; -- 统计有电话的用户
-- 统计不同值
SELECT COUNT(DISTINCT city) FROM users; -- 统计不同城市数
COUNT(*) vs COUNT(column)
-- COUNT(*):统计所有行,包括 NULL
SELECT COUNT(*) FROM users; -- 100
-- COUNT(column):统计非 NULL 值
SELECT COUNT(phone) FROM users; -- 85(有 15 个用户没有电话)
-- COUNT(1):与 COUNT(*) 等价
SELECT COUNT(1) FROM users; -- 100
条件计数
-- 使用 CASE WHEN
SELECT
COUNT(*) AS total,
COUNT(CASE WHEN status = 'active' THEN 1 END) AS active_count,
COUNT(CASE WHEN status = 'inactive' THEN 1 END) AS inactive_count
FROM users;
-- MySQL 特有:SUM(条件)
SELECT
COUNT(*) AS total,
SUM(status = 'active') AS active_count,
SUM(status = 'inactive') AS inactive_count
FROM users;
SUM 求和
-- 计算总和
SELECT SUM(amount) FROM orders;
-- 条件求和
SELECT SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END)
FROM orders;
-- 计算总价
SELECT SUM(price * quantity) FROM order_items;
处理 NULL
-- SUM 忽略 NULL 值
SELECT SUM(bonus) FROM employees; -- 只计算非 NULL 的奖金
-- 使用 COALESCE 处理 NULL
SELECT SUM(COALESCE(bonus, 0)) FROM employees;
AVG 平均值
-- 计算平均值
SELECT AVG(age) FROM users;
SELECT AVG(price) FROM products;
-- 保留小数位
SELECT ROUND(AVG(price), 2) FROM products;
-- 条件平均值
SELECT AVG(CASE WHEN category = '电子产品' THEN price END)
FROM products;
NULL 对 AVG 的影响
-- 数据: 10, 20, NULL, 30
SELECT AVG(score); -- (10 + 20 + 30) / 3 = 20,NULL 不参与计算
-- 如果希望 NULL 视为 0
SELECT AVG(COALESCE(score, 0)); -- (10 + 20 + 0 + 30) / 4 = 15
MAX 和 MIN
-- 最大值和最小值
SELECT MAX(price), MIN(price) FROM products;
-- 最新和最早日期
SELECT MAX(created_at), MIN(created_at) FROM orders;
-- 最长和最短名称
SELECT MAX(LENGTH(name)), MIN(LENGTH(name)) FROM products;
获取极值对应的行
-- 方法1:子查询
SELECT * FROM products
WHERE price = (SELECT MAX(price) FROM products);
-- 方法2:ORDER BY + LIMIT
SELECT * FROM products ORDER BY price DESC LIMIT 1;
GROUP BY 分组
聚合函数通常与 GROUP BY 配合使用:
-- 按城市统计用户数
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city;
-- 按分类统计平均价格
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;
-- 多列分组
SELECT city, status, COUNT(*)
FROM users
GROUP BY city, status;
GROUP BY 规则
-- 错误:SELECT 中的非聚合列必须在 GROUP BY 中
SELECT city, name, COUNT(*) FROM users GROUP BY city;
-- Error: name 不在 GROUP BY 中
-- 正确
SELECT city, COUNT(*) FROM users GROUP BY city;
-- 或
SELECT city, name, COUNT(*) FROM users GROUP BY city, name;
HAVING 过滤分组
HAVING 用于过滤分组后的结果:
-- 查询用户数超过 100 的城市
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;
-- HAVING vs WHERE
SELECT city, COUNT(*) AS user_count
FROM users
WHERE age > 18 -- 过滤行(分组前)
GROUP BY city
HAVING COUNT(*) > 100; -- 过滤分组(分组后)
HAVING 中使用聚合函数
-- 平均价格超过 100 的分类
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 100;
-- 总销售额超过 10000 的用户
SELECT user_id, SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
HAVING SUM(amount) > 10000;
组合使用
完整的聚合查询
SELECT
city,
COUNT(*) AS user_count,
AVG(age) AS avg_age,
MAX(age) AS max_age,
MIN(age) AS min_age
FROM users
WHERE status = 'active'
GROUP BY city
HAVING COUNT(*) >= 10
ORDER BY user_count DESC
LIMIT 10;
执行顺序
高级用法
ROLLUP 分组汇总
-- MySQL / PostgreSQL
SELECT
city,
status,
COUNT(*) AS count
FROM users
GROUP BY ROLLUP(city, status);
-- 结果包含:
-- (city, status) 各组统计
-- (city, NULL) 每个城市的小计
-- (NULL, NULL) 总计
CUBE 多维汇总
-- PostgreSQL
SELECT
city,
status,
COUNT(*) AS count
FROM users
GROUP BY CUBE(city, status);
-- 结果包含所有组合的汇总
GROUPING SETS
-- PostgreSQL
SELECT
city,
status,
COUNT(*) AS count
FROM users
GROUP BY GROUPING SETS (
(city, status),
(city),
(status),
()
);
统计分析示例
用户统计
SELECT
DATE(created_at) AS date,
COUNT(*) AS new_users,
COUNT(DISTINCT city) AS cities
FROM users
GROUP BY DATE(created_at)
ORDER BY date;
销售统计
SELECT
DATE(order_date) AS date,
COUNT(*) AS order_count,
SUM(amount) AS total_amount,
AVG(amount) AS avg_amount
FROM orders
WHERE status = 'completed'
GROUP BY DATE(order_date)
HAVING SUM(amount) > 10000
ORDER BY date;
排名统计
-- MySQL 8.0+ / PostgreSQL
SELECT
user_id,
SUM(amount) AS total_amount,
RANK() OVER (ORDER BY SUM(amount) DESC) AS rank
FROM orders
GROUP BY user_id
ORDER BY total_amount DESC;
NULL 值处理
-- 聚合函数通常忽略 NULL
SELECT COUNT(bonus) FROM employees; -- 不计算 NULL
SELECT AVG(bonus) FROM employees; -- NULL 不参与平均
-- 使用 COALESCE 替换 NULL
SELECT AVG(COALESCE(bonus, 0)) FROM employees;
-- 分组时 NULL 作为一组
SELECT manager_id, COUNT(*)
FROM employees
GROUP BY manager_id; -- NULL(没有上级的员工)作为一组
小结
本章我们学习了:
- COUNT 计数
- SUM 求和
- AVG 平均值
- MAX/MIN 最大/最小值
- GROUP BY 分组
- HAVING 过滤分组
- ROLLUP/CUBE 高级分组
- NULL 值处理
练习
- 统计用户总数和有电话的用户数
- 计算每个分类的产品数量和平均价格
- 查询订单数超过 10 的用户
- 统计每天的销售总额
- 查询每个城市用户年龄的最大值、最小值和平均值