一种基于MYSQL、MariaDB计算均值、中位数、众数的方法
需求说明
使用Mysql、MariaDB按照部门维度统计工资的均值、中位数、众数。数据及表结构见下:
CREATE TABLE `emp2` (
`sal` decimal(7, 2) NULL DEFAULT NULL COMMENT '雇员的基本工资',
`deptno` int(10) UNSIGNED NULL DEFAULT NULL COMMENT '所在部门'
) ENGINE = InnoDB CHARACTER SET = utf8mb4;
INSERT INTO `emp2` VALUES (800.00, 20);
INSERT INTO `emp2` VALUES (1600.00, 30);
INSERT INTO `emp2` VALUES (1250.00, 30);
INSERT INTO `emp2` VALUES (2975.00, 20);
INSERT INTO `emp2` VALUES (1250.00, 30);
INSERT INTO `emp2` VALUES (2850.00, 30);
INSERT INTO `emp2` VALUES (2450.00, 10);
INSERT INTO `emp2` VALUES (3000.00, 20);
INSERT INTO `emp2` VALUES (5000.00, 10);
INSERT INTO `emp2` VALUES (1500.00, 30);
INSERT INTO `emp2` VALUES (1100.00, 20);
INSERT INTO `emp2` VALUES (950.00, 30);
INSERT INTO `emp2` VALUES (3000.00, 20);
INSERT INTO `emp2` VALUES (1300.00, 10);
INSERT INTO `emp2` VALUES (4500.00, 20);
INSERT INTO `emp2` VALUES (4500.00, 20);
代码实现
/*
1、取所有部门信息
2、统计部门工资重复次数
3、按照重复次数降序排序
4、对有无众数打标识
5、取出有众数的结果并合并重复值
6、部门所有信息(1)关联5得最终结果
*/
WITH d AS(
SELECT DISTINCT deptno FROM emp -- 取出所有的部门信息,有的部门对应的sal可能没有众数
), d_cnt AS (
SELECT
count(sal) over(PARTITION BY deptno,sal) as cnt, -- 对每个部门计算工资重复次数
deptno,
sal
FROM emp2
),d_deny AS
(
SELECT
deptno,
sal,
dense_rank() over(PARTITION BY deptno order by cnt DESC ) as rank,-- 对每个部门按照工资重复次数降序排列,rank为1即对应众数
AVG(cnt) OVER(PARTITION BY deptno) continuous -- 计算工资重复次数的均值,如果众数则该值为1
FROM d_cnt
),d_flag AS(
SELECT *,
CASE WHEN continuous =1 THEN 0 ELSE 1 END contflag -- 给有众数的打上标签(无为0,有为1)
FROM d_deny),
d_mode AS(
SELECT deptno,
GROUP_CONCAT(DISTINCT ROUND(sal,0) ORDER BY sal DESC) modeval
FROM d_flag WHERE contflag =1 AND rank = 1 -- 取有众数的结果集,并合重复的众数
GROUP BY deptno)
SELECT d.deptno,NVL(e.modeval,'NA') finamode from d -- 通过部门表关联上一步的“众数”,得最终结果。部门的工资没有众数的显示为"NA"
LEFT JOIN d_mode e
ON d.deptno = e.deptno