Solutions – Managing Partitioning Tables using Postgresql

Partitioning Tables

Here is the exercise to get comfort with partitioning. We will be using range partitioning.

  • Use retail database. Make sure orders table already exists.
In [1]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/uAkrpaJmbx0?rel=0&amp;controls=1&amp;showinfo=0" frameborder="0" allowfullscreen></iframe>

In [1]:
%load_ext sql
In [2]:
%env DATABASE_URL=postgresql://itversity_retail_user:retail_password@localhost:5432/itversity_retail_db
env: DATABASE_URL=postgresql://itversity_retail_user:retail_password@localhost:5432/itversity_retail_db

Exercise 1

Create table orders_part with the same columns as orders.

  • Partition the table by month using range partitioning on order_date.
  • Add 14 partitions – 13 based up on the data and 1 default. Here is the naming convention.
    • Default – orders_part_default
    • Partition for 2014 January – orders_part_201401
In [117]:
%sql DROP TABLE IF EXISTS orders_part
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[117]:
[]
In [118]:
%%sql

CREATE TABLE orders_part (
    order_id INT,
    order_date DATE,
    order_customer_id INT,
    order_status VARCHAR(45)
) PARTITION BY RANGE (order_date)
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[118]:
[]
In [119]:
%sql SELECT * FROM orders LIMIT 10
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
10 rows affected.
Out[119]:
order_id order_date order_customer_id order_status
1 2013-07-25 00:00:00 11599 CLOSED
2 2013-07-25 00:00:00 256 PENDING_PAYMENT
3 2013-07-25 00:00:00 12111 COMPLETE
4 2013-07-25 00:00:00 8827 CLOSED
5 2013-07-25 00:00:00 11318 COMPLETE
6 2013-07-25 00:00:00 7130 COMPLETE
7 2013-07-25 00:00:00 4530 COMPLETE
8 2013-07-25 00:00:00 2911 PROCESSING
9 2013-07-25 00:00:00 5657 PENDING_PAYMENT
10 2013-07-25 00:00:00 5648 PENDING_PAYMENT
In [120]:
%%sql

SELECT count(1)
FROM orders
WHERE order_date >= '2014-01-01' AND order_date < '2014-02-01'
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[120]:
count
5908
In [121]:
%%sql

CREATE TABLE orders_part_default
PARTITION OF orders_part DEFAULT
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[121]:
[]
In [122]:
%%sql

CREATE TABLE orders_part_201307
PARTITION OF orders_part
FOR VALUES FROM ('2013-07-01') TO ('2013-08-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[122]:
[]
In [123]:
%%sql

CREATE TABLE orders_part_201308
PARTITION OF orders_part
FOR VALUES FROM ('2013-08-01') TO ('2013-09-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[123]:
[]
In [124]:
%%sql

CREATE TABLE orders_part_201309
PARTITION OF orders_part
FOR VALUES FROM ('2013-09-01') TO ('2013-10-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[124]:
[]
In [125]:
%%sql

CREATE TABLE orders_part_201310
PARTITION OF orders_part
FOR VALUES FROM ('2013-10-01') TO ('2013-11-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[125]:
[]
In [126]:
%%sql

CREATE TABLE orders_part_201311
PARTITION OF orders_part
FOR VALUES FROM ('2013-11-01') TO ('2013-12-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[126]:
[]
In [127]:
%%sql

CREATE TABLE orders_part_201312
PARTITION OF orders_part
FOR VALUES FROM ('2013-12-01') TO ('2014-01-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[127]:
[]
In [128]:
%%sql

CREATE TABLE orders_part_201401
PARTITION OF orders_part
FOR VALUES FROM ('2014-01-01') TO ('2014-02-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[128]:
[]
In [129]:
%%sql

CREATE TABLE orders_part_201402
PARTITION OF orders_part
FOR VALUES FROM ('2014-02-01') TO ('2014-03-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[129]:
[]
In [130]:
%%sql

CREATE TABLE orders_part_201403
PARTITION OF orders_part
FOR VALUES FROM ('2014-03-01') TO ('2014-04-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[130]:
[]
In [131]:
%%sql

CREATE TABLE orders_part_201404
PARTITION OF orders_part
FOR VALUES FROM ('2014-04-01') TO ('2014-05-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[131]:
[]
In [132]:
%%sql

CREATE TABLE orders_part_201405
PARTITION OF orders_part
FOR VALUES FROM ('2014-05-01') TO ('2014-06-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[132]:
[]
In [133]:
%%sql

CREATE TABLE orders_part_201406
PARTITION OF orders_part
FOR VALUES FROM ('2014-06-01') TO ('2014-07-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[133]:
[]
In [134]:
%%sql

CREATE TABLE orders_part_201407
PARTITION OF orders_part
FOR VALUES FROM ('2014-07-01') TO ('2014-08-01')
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
Done.
Out[134]:
[]

Exercise 2

Let us load and validate data in the partitioned table.

  • Load the data from orders into orders_part.
  • Get count on orders_part as well as all the 14 partitions. You should get 0 for default partition and all the records should be distributed using the other 13 partitions.
In [135]:
%%sql

INSERT INTO orders_part
SELECT * FROM orders
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
68883 rows affected.
Out[135]:
[]
In [136]:
%%sql

SELECT count(1) FROM orders
WHERE to_char(order_date, 'yyyy-MM') = '2014-01'
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[136]:
count
5908
In [137]:
%%sql

SELECT count(1) FROM orders_part
WHERE to_char(order_date, 'yyyy-MM') = '2014-01'
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[137]:
count
5908
In [138]:
%sql SELECT count(1) FROM orders
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[138]:
count
68883
In [139]:
%sql SELECT count(1) FROM orders_part
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[139]:
count
68883
In [140]:
%sql SELECT * FROM orders_part_default LIMIT 10
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
0 rows affected.
Out[140]:
order_id order_date order_customer_id order_status
In [141]:
%sql SELECT count(1) FROM orders_part_default
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[141]:
count
0
In [142]:
%sql SELECT count(1) FROM orders_part_201307
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[142]:
count
1533
In [143]:
%sql SELECT count(1) FROM orders_part_201308
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[143]:
count
5680
In [144]:
%sql SELECT count(1) FROM orders_part_201309
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[144]:
count
5841
In [145]:
%sql SELECT count(1) FROM orders_part_201310
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[145]:
count
5335
In [146]:
%sql SELECT count(1) FROM orders_part_201311
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[146]:
count
6381
In [147]:
%sql SELECT count(1) FROM orders_part_201312
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[147]:
count
5892
In [148]:
%sql SELECT count(1) FROM orders_part_201401
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[148]:
count
5908
In [149]:
%sql SELECT count(1) FROM orders_part_201402
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[149]:
count
5635
In [150]:
%sql SELECT count(1) FROM orders_part_201403
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[150]:
count
5778
In [151]:
%sql SELECT count(1) FROM orders_part_201404
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[151]:
count
5657
In [152]:
%sql SELECT count(1) FROM orders_part_201405
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[152]:
count
5467
In [153]:
%sql SELECT count(1) FROM orders_part_201406
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[153]:
count
5308
In [154]:
%sql SELECT count(1) FROM orders_part_201407
 * postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
1 rows affected.
Out[154]:
count
4468

Share this post