Let us see how we can perform aggregations with in a partition or group using Windowing/Analytics Functions.
For simple aggregations where we have to get grouping key and aggregated results we can use GROUP BY.
If we want to get the raw data along with aggregated results, then using GROUP BY is not possible or overly complicated.
Using aggregate functions with OVER Clause not only simplifies the process of writing query, but also better with respect to performance.
Let us take an example of getting employee salary percentage when compared to department salary expense.
%%sql
SELECT e.employee_id, e.department_id, e.salary,
ae.department_salary_expense,
ae.avg_salary_expense
FROM employees e JOIN (
SELECT department_id,
sum(salary) AS department_salary_expense,
round(avg(salary)::numeric, 2) AS avg_salary_expense
FROM employees
GROUP BY department_id
) ae
ON e.department_id = ae.department_id
ORDER BY department_id, salary
LIMIT 10
employee_id | department_id | salary | department_salary_expense | avg_salary_expense |
---|---|---|---|---|
200 | 10 | 4400.00 | 4400.00 | 4400.00 |
202 | 20 | 6000.00 | 19000.00 | 9500.00 |
201 | 20 | 13000.00 | 19000.00 | 9500.00 |
119 | 30 | 2500.00 | 24900.00 | 4150.00 |
118 | 30 | 2600.00 | 24900.00 | 4150.00 |
117 | 30 | 2800.00 | 24900.00 | 4150.00 |
116 | 30 | 2900.00 | 24900.00 | 4150.00 |
115 | 30 | 3100.00 | 24900.00 | 4150.00 |
114 | 30 | 11000.00 | 24900.00 | 4150.00 |
203 | 40 | 6500.00 | 6500.00 | 6500.00 |
%%sql
SELECT e.employee_id, e.department_id, e.salary,
ae.department_salary_expense,
ae.avg_salary_expense,
round(e.salary/ae.department_salary_expense * 100, 2) pct_salary
FROM employees e JOIN (
SELECT department_id,
sum(salary) AS department_salary_expense,
round(avg(salary)::numeric, 2) AS avg_salary_expense
FROM employees
GROUP BY department_id
) ae
ON e.department_id = ae.department_id
ORDER BY department_id, salary
LIMIT 10
employee_id | department_id | salary | department_salary_expense | avg_salary_expense | pct_salary |
---|---|---|---|---|---|
200 | 10 | 4400.00 | 4400.00 | 4400.00 | 100.00 |
202 | 20 | 6000.00 | 19000.00 | 9500.00 | 31.58 |
201 | 20 | 13000.00 | 19000.00 | 9500.00 | 68.42 |
119 | 30 | 2500.00 | 24900.00 | 4150.00 | 10.04 |
118 | 30 | 2600.00 | 24900.00 | 4150.00 | 10.44 |
117 | 30 | 2800.00 | 24900.00 | 4150.00 | 11.24 |
116 | 30 | 2900.00 | 24900.00 | 4150.00 | 11.65 |
115 | 30 | 3100.00 | 24900.00 | 4150.00 | 12.45 |
114 | 30 | 11000.00 | 24900.00 | 4150.00 | 44.18 |
203 | 40 | 6500.00 | 6500.00 | 6500.00 | 100.00 |
We can use all standard aggregate functions such as count
, sum
, min
, max
, avg
etc.
employee_id | department_id | salary | department_salary_expense |
---|---|---|---|
200 | 10 | 4400.00 | 4400.00 |
201 | 20 | 13000.00 | 19000.00 |
202 | 20 | 6000.00 | 19000.00 |
114 | 30 | 11000.00 | 24900.00 |
115 | 30 | 3100.00 | 24900.00 |
116 | 30 | 2900.00 | 24900.00 |
117 | 30 | 2800.00 | 24900.00 |
118 | 30 | 2600.00 | 24900.00 |
119 | 30 | 2500.00 | 24900.00 |
203 | 40 | 6500.00 | 6500.00 |
employee_id | department_id | salary | department_salary_expense | pct_salary |
---|---|---|---|---|
200 | 10 | 4400.00 | 4400.00 | 100.00 |
202 | 20 | 6000.00 | 19000.00 | 31.58 |
201 | 20 | 13000.00 | 19000.00 | 68.42 |
119 | 30 | 2500.00 | 24900.00 | 10.04 |
118 | 30 | 2600.00 | 24900.00 | 10.44 |
117 | 30 | 2800.00 | 24900.00 | 11.24 |
116 | 30 | 2900.00 | 24900.00 | 11.65 |
115 | 30 | 3100.00 | 24900.00 | 12.45 |
114 | 30 | 11000.00 | 24900.00 | 44.18 |
203 | 40 | 6500.00 | 6500.00 | 100.00 |
%%sql
SELECT e.employee_id, e.department_id, e.salary,
sum(e.salary) OVER (
PARTITION BY e.department_id
) AS sum_sal_expense,
round(avg(e.salary) OVER (
PARTITION BY e.department_id
), 2) AS avg_sal_expense,
min(e.salary) OVER (
PARTITION BY e.department_id
) AS min_sal_expense,
max(e.salary) OVER (
PARTITION BY e.department_id
) AS max_sal_expense,
count(e.salary) OVER (
PARTITION BY e.department_id
) AS cnt_sal_expense
FROM employees e
ORDER BY e.department_id,
e.salary
LIMIT 10
employee_id | department_id | salary | sum_sal_expense | avg_sal_expense | min_sal_expense | max_sal_expense | cnt_sal_expense |
---|---|---|---|---|---|---|---|
200 | 10 | 4400.00 | 4400.00 | 4400.00 | 4400.00 | 4400.00 | 1 |
202 | 20 | 6000.00 | 19000.00 | 9500.00 | 6000.00 | 13000.00 | 2 |
201 | 20 | 13000.00 | 19000.00 | 9500.00 | 6000.00 | 13000.00 | 2 |
119 | 30 | 2500.00 | 24900.00 | 4150.00 | 2500.00 | 11000.00 | 6 |
118 | 30 | 2600.00 | 24900.00 | 4150.00 | 2500.00 | 11000.00 | 6 |
117 | 30 | 2800.00 | 24900.00 | 4150.00 | 2500.00 | 11000.00 | 6 |
116 | 30 | 2900.00 | 24900.00 | 4150.00 | 2500.00 | 11000.00 | 6 |
115 | 30 | 3100.00 | 24900.00 | 4150.00 | 2500.00 | 11000.00 | 6 |
114 | 30 | 11000.00 | 24900.00 | 4150.00 | 2500.00 | 11000.00 | 6 |
203 | 40 | 6500.00 | 6500.00 | 6500.00 | 6500.00 | 6500.00 | 1 |
%%sql
SELECT
order_date,
order_item_product_id,
revenue,
sum(revenue) OVER (PARTITION BY order_date) AS sum_revenue,
min(revenue) OVER (PARTITION BY order_date) AS min_revenue,
max(revenue) OVER (PARTITION BY order_date) AS max_revenue
FROM daily_product_revenue
ORDER BY order_date,
revenue DESC
LIMIT 10
order_date | order_item_product_id | revenue | sum_revenue | min_revenue | max_revenue |
---|---|---|---|---|---|
2013-07-25 00:00:00 | 1004 | 5599.72 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 191 | 5099.49 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 957 | 4499.70 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 365 | 3359.44 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 1073 | 2999.85 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 1014 | 2798.88 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 403 | 1949.85 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 502 | 1650.00 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 627 | 1079.73 | 31547.23 | 49.98 | 5599.72 |
2013-07-25 00:00:00 | 226 | 599.99 | 31547.23 | 49.98 | 5599.72 |