Validate myFilter Function¶
Use the same examples which were used before as part of Processing Collections using loops.
In [1]:
%run 02_develop_myFilter_function.ipynb
In [2]:
myFilter
Out[2]:
<function __main__.myFilter(c, f)>
- Read orders data
In [3]:
orders_path = "/data/retail_db/orders/part-00000"
orders = open(orders_path). \
read(). \
splitlines()
In [4]:
orders[:10]
Out[4]:
['1,2013-07-25 00:00:00.0,11599,CLOSED', '2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT', '3,2013-07-25 00:00:00.0,12111,COMPLETE', '4,2013-07-25 00:00:00.0,8827,CLOSED', '5,2013-07-25 00:00:00.0,11318,COMPLETE', '6,2013-07-25 00:00:00.0,7130,COMPLETE', '7,2013-07-25 00:00:00.0,4530,COMPLETE', '8,2013-07-25 00:00:00.0,2911,PROCESSING', '9,2013-07-25 00:00:00.0,5657,PENDING_PAYMENT', '10,2013-07-25 00:00:00.0,5648,PENDING_PAYMENT']
In [5]:
order = '1,2013-07-25 00:00:00.0,11599,CLOSED'
int(order.split(',')[2]) == 11599
Out[5]:
True
- Get orders placed by customer id 12431
In [6]:
customer_orders = myFilter(orders,
lambda order: int(order.split(',')[2]) == 12431
)
In [7]:
customer_orders
Out[7]:
['3774,2013-08-16 00:00:00.0,12431,CANCELED', '3870,2013-08-17 00:00:00.0,12431,PENDING_PAYMENT', '4032,2013-08-17 00:00:00.0,12431,ON_HOLD', '22812,2013-12-12 00:00:00.0,12431,PENDING', '22927,2013-12-13 00:00:00.0,12431,CLOSED', '25614,2013-12-30 00:00:00.0,12431,CLOSED', '27585,2014-01-12 00:00:00.0,12431,PROCESSING', '28244,2014-01-15 00:00:00.0,12431,PENDING_PAYMENT', '29109,2014-01-21 00:00:00.0,12431,ON_HOLD', '29232,2014-01-21 00:00:00.0,12431,ON_HOLD', '45894,2014-05-06 00:00:00.0,12431,CLOSED', '46217,2014-05-07 00:00:00.0,12431,CLOSED', '49678,2014-05-31 00:00:00.0,12431,PENDING', '51865,2014-06-15 00:00:00.0,12431,PROCESSING', '63146,2014-02-13 00:00:00.0,12431,PENDING_PAYMENT', '67110,2014-07-14 00:00:00.0,12431,PENDING']
- Get orders placed by customer id 12431 in the month of 2014 January
In [8]:
order = '1,2013-07-25 00:00:00.0,11599,CLOSED'
int(order.split(',')[2]) == 11599 and order.split(',')[1].startswith('2013-07')
Out[8]:
True
In [9]:
customer_orders_for_month = myFilter(
orders,
lambda order: int(order.split(',')[2]) == 12431
and order.split(',')[1].startswith('2014-01')
)
customer_orders_for_month
Out[9]:
['27585,2014-01-12 00:00:00.0,12431,PROCESSING', '28244,2014-01-15 00:00:00.0,12431,PENDING_PAYMENT', '29109,2014-01-21 00:00:00.0,12431,ON_HOLD', '29232,2014-01-21 00:00:00.0,12431,ON_HOLD']
- Get orders placed by customer id 12431 in processing or pending_payment for the month of 2014 January
In [10]:
customer_orders_for_month = myFilter(
orders,
lambda order: int(order.split(',')[2]) == 12431
and order.split(',')[1].startswith('2014-01')
and order.split(',')[3] in ('PENDING_PAYMENT', 'PROCESSING')
)
In [11]:
customer_orders_for_month
Out[11]:
['27585,2014-01-12 00:00:00.0,12431,PROCESSING', '28244,2014-01-15 00:00:00.0,12431,PENDING_PAYMENT']