跳到主要内容

聚合函数

聚合函数对一组值进行计算,返回单个值。本章将详细介绍常用的聚合函数。

什么是聚合函数?

聚合函数对多行数据进行汇总计算,常用于统计和分析:

函数说明
COUNT()计数
SUM()求和
AVG()平均值
MAX()最大值
MIN()最小值

COUNT 计数

基本用法

-- 统计行数
SELECT COUNT(*) FROM users; -- 统计所有行
SELECT COUNT(id) FROM users; -- 统计非 NULL 的 id
SELECT COUNT(phone) FROM users; -- 统计有电话的用户

-- 统计不同值
SELECT COUNT(DISTINCT city) FROM users; -- 统计不同城市数

COUNT(*) vs COUNT(column)

-- COUNT(*):统计所有行,包括 NULL
SELECT COUNT(*) FROM users; -- 100

-- COUNT(column):统计非 NULL 值
SELECT COUNT(phone) FROM users; -- 85(有 15 个用户没有电话)

-- COUNT(1):与 COUNT(*) 等价
SELECT COUNT(1) FROM users; -- 100

条件计数

-- 使用 CASE WHEN
SELECT
COUNT(*) AS total,
COUNT(CASE WHEN status = 'active' THEN 1 END) AS active_count,
COUNT(CASE WHEN status = 'inactive' THEN 1 END) AS inactive_count
FROM users;

-- MySQL 特有:SUM(条件)
SELECT
COUNT(*) AS total,
SUM(status = 'active') AS active_count,
SUM(status = 'inactive') AS inactive_count
FROM users;

SUM 求和

-- 计算总和
SELECT SUM(amount) FROM orders;

-- 条件求和
SELECT SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END)
FROM orders;

-- 计算总价
SELECT SUM(price * quantity) FROM order_items;

处理 NULL

-- SUM 忽略 NULL 值
SELECT SUM(bonus) FROM employees; -- 只计算非 NULL 的奖金

-- 使用 COALESCE 处理 NULL
SELECT SUM(COALESCE(bonus, 0)) FROM employees;

AVG 平均值

-- 计算平均值
SELECT AVG(age) FROM users;
SELECT AVG(price) FROM products;

-- 保留小数位
SELECT ROUND(AVG(price), 2) FROM products;

-- 条件平均值
SELECT AVG(CASE WHEN category = '电子产品' THEN price END)
FROM products;

NULL 对 AVG 的影响

-- 数据: 10, 20, NULL, 30
SELECT AVG(score); -- (10 + 20 + 30) / 3 = 20,NULL 不参与计算

-- 如果希望 NULL 视为 0
SELECT AVG(COALESCE(score, 0)); -- (10 + 20 + 0 + 30) / 4 = 15

MAX 和 MIN

-- 最大值和最小值
SELECT MAX(price), MIN(price) FROM products;

-- 最新和最早日期
SELECT MAX(created_at), MIN(created_at) FROM orders;

-- 最长和最短名称
SELECT MAX(LENGTH(name)), MIN(LENGTH(name)) FROM products;

获取极值对应的行

-- 方法1:子查询
SELECT * FROM products
WHERE price = (SELECT MAX(price) FROM products);

-- 方法2:ORDER BY + LIMIT
SELECT * FROM products ORDER BY price DESC LIMIT 1;

GROUP BY 分组

聚合函数通常与 GROUP BY 配合使用:

-- 按城市统计用户数
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city;

-- 按分类统计平均价格
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;

-- 多列分组
SELECT city, status, COUNT(*)
FROM users
GROUP BY city, status;

GROUP BY 规则

-- 错误:SELECT 中的非聚合列必须在 GROUP BY 中
SELECT city, name, COUNT(*) FROM users GROUP BY city;
-- Error: name 不在 GROUP BY 中

-- 正确
SELECT city, COUNT(*) FROM users GROUP BY city;
-- 或
SELECT city, name, COUNT(*) FROM users GROUP BY city, name;

HAVING 过滤分组

HAVING 用于过滤分组后的结果:

-- 查询用户数超过 100 的城市
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;

-- HAVING vs WHERE
SELECT city, COUNT(*) AS user_count
FROM users
WHERE age > 18 -- 过滤行(分组前)
GROUP BY city
HAVING COUNT(*) > 100; -- 过滤分组(分组后)

HAVING 中使用聚合函数

-- 平均价格超过 100 的分类
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 100;

-- 总销售额超过 10000 的用户
SELECT user_id, SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
HAVING SUM(amount) > 10000;

组合使用

完整的聚合查询

SELECT 
city,
COUNT(*) AS user_count,
AVG(age) AS avg_age,
MAX(age) AS max_age,
MIN(age) AS min_age
FROM users
WHERE status = 'active'
GROUP BY city
HAVING COUNT(*) >= 10
ORDER BY user_count DESC
LIMIT 10;

执行顺序

高级用法

ROLLUP 分组汇总

-- MySQL / PostgreSQL
SELECT
city,
status,
COUNT(*) AS count
FROM users
GROUP BY ROLLUP(city, status);

-- 结果包含:
-- (city, status) 各组统计
-- (city, NULL) 每个城市的小计
-- (NULL, NULL) 总计

CUBE 多维汇总

-- PostgreSQL
SELECT
city,
status,
COUNT(*) AS count
FROM users
GROUP BY CUBE(city, status);

-- 结果包含所有组合的汇总

GROUPING SETS

-- PostgreSQL
SELECT
city,
status,
COUNT(*) AS count
FROM users
GROUP BY GROUPING SETS (
(city, status),
(city),
(status),
()
);

统计分析示例

用户统计

SELECT 
DATE(created_at) AS date,
COUNT(*) AS new_users,
COUNT(DISTINCT city) AS cities
FROM users
GROUP BY DATE(created_at)
ORDER BY date;

销售统计

SELECT 
DATE(order_date) AS date,
COUNT(*) AS order_count,
SUM(amount) AS total_amount,
AVG(amount) AS avg_amount
FROM orders
WHERE status = 'completed'
GROUP BY DATE(order_date)
HAVING SUM(amount) > 10000
ORDER BY date;

排名统计

-- MySQL 8.0+ / PostgreSQL
SELECT
user_id,
SUM(amount) AS total_amount,
RANK() OVER (ORDER BY SUM(amount) DESC) AS rank
FROM orders
GROUP BY user_id
ORDER BY total_amount DESC;

NULL 值处理

-- 聚合函数通常忽略 NULL
SELECT COUNT(bonus) FROM employees; -- 不计算 NULL
SELECT AVG(bonus) FROM employees; -- NULL 不参与平均

-- 使用 COALESCE 替换 NULL
SELECT AVG(COALESCE(bonus, 0)) FROM employees;

-- 分组时 NULL 作为一组
SELECT manager_id, COUNT(*)
FROM employees
GROUP BY manager_id; -- NULL(没有上级的员工)作为一组

小结

本章我们学习了:

  1. COUNT 计数
  2. SUM 求和
  3. AVG 平均值
  4. MAX/MIN 最大/最小值
  5. GROUP BY 分组
  6. HAVING 过滤分组
  7. ROLLUP/CUBE 高级分组
  8. NULL 值处理

练习

  1. 统计用户总数和有电话的用户数
  2. 计算每个分类的产品数量和平均价格
  3. 查询订单数超过 10 的用户
  4. 统计每天的销售总额
  5. 查询每个城市用户年龄的最大值、最小值和平均值