Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

GEP475GROUPINEEDANAP

2890 views
Kernel: Python 3 (Anaconda)
Future question for Dr. Soto

Should I concatinate the dataframes on integer indices or Date-Time indices? I dont think it will make a difference.. not sure which one is easier

import pandas as pd import numpy as np
df1 = pd.read_csv('NetAtmo_2016.csv', parse_dates = True,) df1.describe()
new_index1 = pd.Series(range(1,90144))
df1['Numbered_index'] = new_index1
df1.set_index('Numbered_index', inplace = True) df1.head()
df1.drop(df1.columns[[0,2,3,5,6]], axis =1, inplace = True)
df1.head(1)
df2 = pd.read_csv('NetAtmo_2017.csv', parse_dates = True)
new_index2 = pd.Series(range(90144, 100992))
df2['numbered_index'] = new_index2
df2.set_index('numbered_index', inplace = True)
df2.drop(df2.columns[[0,2,3,5,6]], axis =1, inplace = True)
df2.head()
df1.head()
df1 = df1.rename(columns = {'Timezone : America/Los_Angeles':'Time'}) df1.head()
df2.tail()
df3 = pd.concat([df1,df2]) df3.head()
df3.tail()
df3.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fc30f81eb00>
Image in a Jupyter notebook
#df3.set_index('Time', inplace = True) #df3.head()
#df3.plot()
#df3.plot.hist()
df3.dtypes
Time object CO2 float64 dtype: object
df3.head()
df3.isnull().head()
df3['Time'] = pd.to_datetime(df3.Time)
df3.head()
df3.Time.dt.weekday_name.head()
1 Friday 2 Friday 3 Friday 4 Friday 5 Friday Name: Time, dtype: object
#isolating the seonc day Firstday = pd.to_datetime('2/20/2016 23:59:59')
df3.loc[df3.Time <= Firstday, :].tail()
#almost a full year of data! (df3.Time.max() - df3.Time.min())
Timedelta('359 days 05:21:00')
df3['Day'] = df3.Time.dt.weekday_name df3.head()
# so many questions df3.Day.value_counts()
Saturday 14598 Tuesday 14589 Sunday 14539 Monday 14488 Wednesday 14472 Friday 14438 Thursday 13867 Name: Day, dtype: int64
df3.Day.value_counts().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fc30f0ed1d0>
Image in a Jupyter notebook

Switching to df2 because it is still note recognized by datetime

df2.head()
#df3['Time2'].head()
df3.head()
df3['Time2'] = df3.Time.shift(-1)
df3.head()
df3['TimeDel'] = df3.Time2 - df3.Time df3.head()
df3.TimeDel.dt.seconds.head()
1 60.0 2 0.0 3 240.0 4 300.0 5 300.0 Name: TimeDel, dtype: float64
df3.dtypes
Time datetime64[ns] CO2 float64 Day object Time2 datetime64[ns] TimeDel timedelta64[ns] dtype: object
df3['TimeDel'] = df3.TimeDel / np.timedelta64(1, 's')
df3.dtypes
Time datetime64[ns] CO2 float64 Day object Time2 datetime64[ns] TimeDel float64 dtype: object
df3['CO2_over_TimeDiff'] = (df3.CO2 / df3.TimeDel)
# number of "not a number" in each column df3.isnull().sum()
Time 0 CO2 6 Day 0 Time2 1 TimeDel 1 CO2_over_TimeDiff 7 dtype: int64
df3[df3.CO2.isnull()]
df3.shape
(100991, 6)
# dropping rows that have "any" missing values df3.dropna(how='any', inplace = True)
df3.shape
(100984, 6)
df3.head()
df3.describe()
df3.CO2_over_TimeDiff.head()
2 inf 4 1.123333 5 1.106667 6 1.093333 7 1.023333 Name: CO2_over_TimeDiff, dtype: float64