MySQL 分组函数全面详解与最佳实践
MySQL 分组函数(聚合函数)的核心知识、注意事项和高级应用技巧:
📊 分组函数核心列表
函数 | 描述 | 示例 |
---|---|---|
COUNT() | 计算行数 | COUNT(*) |
SUM() | 计算数值总和 | SUM(salary) |
AVG() | 计算平均值 | AVG(score) |
MAX() | 获取最大值 | MAX(price) |
MIN() | 获取最小值 | MIN(price) |
GROUP_CONCAT() | 连接分组字符串 | GROUP_CONCAT(name) |
STDDEV() | 计算标准差 | STDDEV(price) |
VAR_POP() | 计算总体方差 | VAR_POP(sales) |
⚠️ 分组函数十大注意事项
1. NULL 值处理
SELECT COUNT(*), -- 所有行数(包含NULL)COUNT(bonus), -- 非NULL行数AVG(COALESCE(bonus, 0)) -- NULL转为0计算
FROM employees;
2. 分组字段选择
-- 错误:非分组字段出现在SELECT
SELECT department, name, AVG(salary)
FROM employees; -- 报错或未定义行为-- 正确:所有非聚合字段必须出现在GROUP BY
SELECT department, name, AVG(salary)
FROM employees
GROUP BY department, name;
3. WHERE vs HAVING
-- WHERE:分组前过滤行
SELECT department, AVG(salary)
FROM employees
WHERE hire_date > '2020-01-01' -- 先过滤
GROUP BY department;-- HAVING:分组后过滤组
SELECT department, AVG(salary) avg_sal
FROM employees
GROUP BY department
HAVING avg_sal > 5000; -- 后过滤
4. 性能优化策略
-- 低效:全表扫描
SELECT department, AVG(salary)
FROM employees
GROUP BY department;-- 高效:添加索引
ALTER TABLE employees ADD INDEX idx_dept (department);
5. 隐式排序问题
-- 结果顺序不保证
SELECT department, COUNT(*)
FROM employees
GROUP BY department;-- 显式排序
SELECT department, COUNT(*) AS emp_count
FROM employees
GROUP BY department
ORDER BY emp_count DESC;
6. 分组函数嵌套限制
-- 允许:单层分组函数
SELECT AVG(MAX(salary)) -- ❌ 错误嵌套-- 正确:使用子查询
SELECT AVG(max_sal)
FROM (SELECT department, MAX(salary) AS max_salFROM employeesGROUP BY department
) dept_max;
7. DISTINCT 用法
-- 统计不重复值
SELECT COUNT(DISTINCT department), -- 不同部门数量COUNT(DISTINCT CASE WHEN salary > 5000 THEN 1 END) -- 高薪人数
FROM employees;
8. 空分组处理
-- 使用 COALESCE 处理空分组
SELECT COALESCE(department, '未分配') AS dept,COUNT(*)
FROM employees
GROUP BY department;
9. 多列分组顺序
-- 分组顺序影响结果
SELECT YEAR(hire_date) AS hire_year,department,COUNT(*)
FROM employees
GROUP BY hire_year, department; -- 先按年再按部门
10. GROUP_CONCAT 限制
-- 默认截断长度1024字符
SET SESSION group_concat_max_len = 10000;SELECT department,GROUP_CONCAT(name ORDER BY salary DESC SEPARATOR '|')
FROM employees
GROUP BY department;
🚀 高级分组技巧
1. 多级分组分析
SELECT YEAR(order_date) AS order_year,QUARTER(order_date) AS quarter,product_category,SUM(amount) AS total_sales,COUNT(DISTINCT customer_id) AS customers
FROM orders
GROUP BY order_year, quarter, product_category
WITH ROLLUP; -- 添加小计和总计行
2. 分组百分比计算
SELECT department,COUNT(*) AS emp_count,ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS pct
FROM employees
GROUP BY department;
3. 分组排名
SELECT department,name,salary,RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank
FROM employees;
4. 分组比较分析
SELECT department,AVG(salary) AS avg_salary,AVG(salary) - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees
GROUP BY department;
5. 时间序列分组
SELECT DATE_FORMAT(order_date, '%Y-%m') AS month,SUM(amount) AS monthly_sales,LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) AS prev_month
FROM orders
GROUP BY month;
🔧 性能优化指南
1. 索引策略
-- 复合索引优化分组
ALTER TABLE orders ADD INDEX idx_category_date (product_category, order_date);-- 覆盖索引
EXPLAIN SELECT product_category, COUNT(*)
FROM orders
GROUP BY product_category; -- 使用索引
2. 临时表优化
-- 增大临时表内存
SET tmp_table_size = 256*1024*1024; -- 256MB
SET max_heap_table_size = 256*1024*1024;-- 监控临时表使用
SHOW STATUS LIKE 'Created_tmp%';
3. 分区表优化
-- 按日期分区
CREATE TABLE sales (sale_id INT,sale_date DATE,amount DECIMAL(10,2)
) PARTITION BY RANGE (YEAR(sale_date)) (PARTITION p2020 VALUES LESS THAN (2021),PARTITION p2021 VALUES LESS THAN (2022),PARTITION p2022 VALUES LESS THAN (2023)
);-- 分区分组查询
SELECT YEAR(sale_date), SUM(amount)
FROM sales
GROUP BY YEAR(sale_date); -- 仅扫描相关分区
4. 物化视图(MySQL 8.0+)
-- 创建分组结果缓存
CREATE TABLE sales_summary AS
SELECT product_id, YEAR(order_date) AS year, SUM(amount) AS total
FROM orders
GROUP BY product_id, year;-- 定期刷新
REPLACE INTO sales_summary
SELECT product_id, YEAR(order_date), SUM(amount)
FROM orders
WHERE order_date > (SELECT MAX(order_date) FROM sales_summary)
GROUP BY product_id, YEAR(order_date);
💡 最佳实践总结
1. 分组设计原则
-- 明确分组粒度
SELECT DATE(order_date) AS day, -- 按天HOUR(order_time) AS hour, -- 按小时COUNT(*)
FROM orders
GROUP BY day, hour;
2. 安全处理大数据集
-- 分页处理大结果集
SELECT department, AVG(salary)
FROM employees
GROUP BY department
LIMIT 10 OFFSET 20; -- 第三页
3. 结果验证技巧
-- 验证分组总数
SELECT COUNT(DISTINCT department)
FROM employees; -- 应与分组行数一致-- 交叉验证
SELECT (SELECT COUNT(*) FROM employees) AS total,SUM(emp_count) AS group_total
FROM (SELECT department, COUNT(*) AS emp_countFROM employeesGROUP BY department
) dept_groups;
4. 执行计划分析
-- 检查分组性能
EXPLAIN
SELECT department, AVG(salary)
FROM employees
GROUP BY department;-- 关注以下指标:
-- 1. Using temporary (是否使用临时表)
-- 2. Using filesort (是否文件排序)
-- 3. key (使用的索引)
5. 避免常见陷阱
-- 陷阱1:错误处理NULL
SELECT department, AVG(bonus) -- 忽略NULL
FROM employees;-- 陷阱2:混淆WHERE和HAVING
SELECT department, AVG(salary)
FROM employees
WHERE AVG(salary) > 5000; -- 错误!WHERE不能使用聚合函数-- 陷阱3:未排序的分页
SELECT department, COUNT(*)
FROM employees
GROUP BY department
LIMIT 10; -- 结果随机
🚀 综合应用案例
销售分析报告
SELECT c.country,p.category,YEAR(o.order_date) AS order_year,COUNT(DISTINCT o.customer_id) AS customers,COUNT(*) AS orders,SUM(o.amount) AS revenue,AVG(o.amount) AS avg_order_value,GROUP_CONCAT(DISTINCT p.product_name ORDER BY p.product_name SEPARATOR ', ') AS products
FROM orders o
JOIN products p ON o.product_id = p.id
JOIN customers c ON o.customer_id = c.id
WHERE o.order_date BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY c.country, p.category, order_year WITH ROLLUP
HAVING revenue > 10000
ORDER BY country, category, order_year DESC;