Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
YStrano
GitHub Repository: YStrano/DataScience_GA
Path: blob/master/april_18/lessons/lesson-15/code/Demo.ipynb
1904 views
Kernel: Python 2
import pandas as pd
data = pd.read_csv('../assets/dataset/rossmann.csv', skipinitialspace=True) data['Date'] = pd.to_datetime(data['Date']) data.set_index('Date', inplace=True) data['Year'] = data.index.year data['Month'] = data.index.month store1_data = data[data.Store == 1] data.head()
/home/user/anaconda3/envs/py2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
data.index.year
array([2015, 2015, 2015, ..., 2013, 2013, 2013], dtype=int32)
import seaborn as sb %matplotlib inline
data[['Sales']].resample('M').mean().sort_values(by='Sales')
store1_data.index
DatetimeIndex(['2015-07-31', '2015-07-30', '2015-07-29', '2015-07-28', '2015-07-27', '2015-07-26', '2015-07-25', '2015-07-24', '2015-07-23', '2015-07-22', ... '2013-01-10', '2013-01-09', '2013-01-08', '2013-01-07', '2013-01-06', '2013-01-05', '2013-01-04', '2013-01-03', '2013-01-02', '2013-01-01'], dtype='datetime64[ns]', name=u'Date', length=942, freq=None)
store1_data_2015 = store1_data['2015'] store1_data_2015[ store1_data_2015.Open==1 ][['Sales']].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f79e826de10>
Image in a Jupyter notebook
sb.factorplot( col='Open', hue='Promo', x='Month', y='Sales', data=store1_data, kind='box' )
<seaborn.axisgrid.FacetGrid at 0x7f79ff1c6e50>
Image in a Jupyter notebook
sb.factorplot( col='Open', x='DayOfWeek', y='Customers', data=store1_data, kind='box', )
<seaborn.axisgrid.FacetGrid at 0x7f79e6199ad0>
Image in a Jupyter notebook
data[['Sales']].resample('D').mean().rolling(window=15, center=False).mean().diff(1).sort_values(by='Sales')
average_daily_customers = data[['Customers']].resample('D').mean() average_daily_sales = data[['Sales', 'Open']].resample('D').mean()
average_daily_customers['DiffVsLastWeek'] = average_daily_customers.diff(periods=7) average_daily_sales['DiffVsLastWeek'] = average_daily_sales[['Sales']].diff(periods=7)
average_daily_sales[average_daily_sales.Open == 1].sort_values(by='DiffVsLastWeek')
average_daily_sales = data[['Sales', 'Open']].resample('D').mean()
average_daily_sales['Sales'].autocorr(lag=1)
-0.02585827600638357
average_daily_sales['Sales'].autocorr(lag=30)
-0.12984245822530616
data['Sales'].resample('D').mean().expanding().sum()
Date 2013-01-01 8.728456e+01 2013-01-02 6.320315e+03 2013-01-03 1.201343e+04 2013-01-04 1.796765e+04 2013-01-05 2.330540e+04 2013-01-06 2.343446e+04 2013-01-07 3.314464e+04 2013-01-08 4.099167e+04 2013-01-09 4.793929e+04 2013-01-10 5.489130e+04 2013-01-11 6.210144e+04 2013-01-12 6.749829e+04 2013-01-13 6.762748e+04 2013-01-14 7.290711e+04 2013-01-15 7.785114e+04 2013-01-16 8.259825e+04 2013-01-17 8.742924e+04 2013-01-18 9.293329e+04 2013-01-19 9.832360e+04 2013-01-20 9.845956e+04 2013-01-21 1.063170e+05 2013-01-22 1.137792e+05 2013-01-23 1.207268e+05 2013-01-24 1.273993e+05 2013-01-25 1.343775e+05 2013-01-26 1.396231e+05 2013-01-27 1.397512e+05 2013-01-28 1.450236e+05 2013-01-29 1.499139e+05 2013-01-30 1.552141e+05 ... 2015-07-02 5.272519e+06 2015-07-03 5.280170e+06 2015-07-04 5.285600e+06 2015-07-05 5.285841e+06 2015-07-06 5.292604e+06 2015-07-07 5.298337e+06 2015-07-08 5.304540e+06 2015-07-09 5.310762e+06 2015-07-10 5.317099e+06 2015-07-11 5.322715e+06 2015-07-12 5.322963e+06 2015-07-13 5.332686e+06 2015-07-14 5.341063e+06 2015-07-15 5.348916e+06 2015-07-16 5.356427e+06 2015-07-17 5.363556e+06 2015-07-18 5.369340e+06 2015-07-19 5.369608e+06 2015-07-20 5.376190e+06 2015-07-21 5.381891e+06 2015-07-22 5.387371e+06 2015-07-23 5.393368e+06 2015-07-24 5.399289e+06 2015-07-25 5.404689e+06 2015-07-26 5.404946e+06 2015-07-27 5.414549e+06 2015-07-28 5.422724e+06 2015-07-29 5.430347e+06 2015-07-30 5.438238e+06 2015-07-31 5.447305e+06 Freq: D, Name: Sales, dtype: float64
data['2013-01-01'].Sales.sum()
97235
total_daily_sales = data[['Sales']].resample('D').sum() total_daily_sales.expanding().sum()['2014-12']
total_daily_sales.index
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08', '2013-01-09', '2013-01-10', ... '2015-07-22', '2015-07-23', '2015-07-24', '2015-07-25', '2015-07-26', '2015-07-27', '2015-07-28', '2015-07-29', '2015-07-30', '2015-07-31'], dtype='datetime64[ns]', name=u'Date', length=942, freq='D')