{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "###### Future question for Dr. Soto\n", "\n", "Should I concatinate the dataframes on integer indices or Date-Time indices? I dont think it will make a difference.. not sure which one is easier" ] }, { "cell_type": "code", "execution_count": 777, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 778, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimestampTemperatureHumidityCO2NoisePressure
count9.014300e+0490143.00000090143.00000090137.00000090132.00000090143.000000
mean1.469613e+0922.76672451.291637550.20479938.9647301011.348933
std7.900773e+061.5922686.929620318.3217327.1007034.217541
min1.455917e+0917.90000027.000000201.00000035.000000995.000000
25%1.462765e+0921.70000049.000000354.00000036.0000001008.300000
50%1.469657e+0922.90000052.000000416.00000036.0000001011.000000
75%1.476459e+0923.80000055.000000639.00000038.0000001014.100000
max1.483257e+0928.50000076.0000002777.00000079.0000001027.500000
\n", "
" ] }, "execution_count": 778, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df1 = pd.read_csv('NetAtmo_2016.csv', parse_dates = True,)\n", "df1.describe()" ] }, { "cell_type": "code", "execution_count": 779, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "new_index1 = pd.Series(range(1,90144))" ] }, { "cell_type": "code", "execution_count": 780, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df1['Numbered_index'] = new_index1" ] }, { "cell_type": "code", "execution_count": 781, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimestampTimezone : America/Los_AngelesTemperatureHumidityCO2NoisePressure
Numbered_index
114559171992/19/16 13:2618.876NaNNaN1015.7
214559172552/19/16 13:2719.275718.0NaN1015.7
314559172572/19/16 13:2719.973NaNNaN1015.7
414559175132/19/16 13:3120.373337.044.01015.8
514559178142/19/16 13:3621.270332.047.01015.7
\n", "
" ] }, "execution_count": 781, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df1.set_index('Numbered_index', inplace = True)\n", "df1.head()" ] }, { "cell_type": "code", "execution_count": 782, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df1.drop(df1.columns[[0,2,3,5,6]], axis =1, inplace = True)" ] }, { "cell_type": "code", "execution_count": 783, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Timezone : America/Los_AngelesCO2
Numbered_index
12/19/16 13:26NaN
\n", "
" ] }, "execution_count": 783, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df1.head(1)" ] }, { "cell_type": "code", "execution_count": 784, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df2 = pd.read_csv('NetAtmo_2017.csv', parse_dates = True)" ] }, { "cell_type": "code", "execution_count": 785, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "new_index2 = pd.Series(range(90144, 100992))" ] }, { "cell_type": "code", "execution_count": 786, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df2['numbered_index'] = new_index2" ] }, { "cell_type": "code", "execution_count": 787, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df2.set_index('numbered_index', inplace = True)\n" ] }, { "cell_type": "code", "execution_count": 788, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df2.drop(df2.columns[[0,2,3,5,6]], axis =1, inplace = True)" ] }, { "cell_type": "code", "execution_count": 789, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
numbered_index
901441/1/17 0:00482
901451/1/17 0:05491
901461/1/17 0:11480
901471/1/17 0:16486
901481/1/17 0:21490
\n", "
" ] }, "execution_count": 789, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df2.head()" ] }, { "cell_type": "code", "execution_count": 790, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Timezone : America/Los_AngelesCO2
Numbered_index
12/19/16 13:26NaN
22/19/16 13:27718.0
32/19/16 13:27NaN
42/19/16 13:31337.0
52/19/16 13:36332.0
\n", "
" ] }, "execution_count": 790, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df1.head()" ] }, { "cell_type": "code", "execution_count": 791, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
Numbered_index
12/19/16 13:26NaN
22/19/16 13:27718.0
32/19/16 13:27NaN
42/19/16 13:31337.0
52/19/16 13:36332.0
\n", "
" ] }, "execution_count": 791, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df1 = df1.rename(columns = {'Timezone : America/Los_Angeles':'Time'})\n", "df1.head()" ] }, { "cell_type": "code", "execution_count": 792, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
numbered_index
1009872/12/17 18:27484
1009882/12/17 18:32486
1009892/12/17 18:37469
1009902/12/17 18:42485
1009912/12/17 18:47480
\n", "
" ] }, "execution_count": 792, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df2.tail()" ] }, { "cell_type": "code", "execution_count": 793, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
12/19/16 13:26NaN
22/19/16 13:27718.0
32/19/16 13:27NaN
42/19/16 13:31337.0
52/19/16 13:36332.0
\n", "
" ] }, "execution_count": 793, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3 = pd.concat([df1,df2])\n", "df3.head()" ] }, { "cell_type": "code", "execution_count": 794, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
1009872/12/17 18:27484.0
1009882/12/17 18:32486.0
1009892/12/17 18:37469.0
1009902/12/17 18:42485.0
1009912/12/17 18:47480.0
\n", "
" ] }, "execution_count": 794, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.tail()" ] }, { "cell_type": "code", "execution_count": 795, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 795, "metadata": { }, "output_type": "execute_result" }, { "data": { "image/png": "ca3b9873cafef467138c03a5f781afff01615a03" }, "execution_count": 795, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.plot()" ] }, { "cell_type": "code", "execution_count": 796, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "#df3.set_index('Time', inplace = True)\n", "#df3.head()" ] }, { "cell_type": "code", "execution_count": 797, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "#df3.plot()" ] }, { "cell_type": "code", "execution_count": 798, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "#df3.plot.hist()" ] }, { "cell_type": "code", "execution_count": 799, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Time object\n", "CO2 float64\n", "dtype: object" ] }, "execution_count": 799, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.dtypes" ] }, { "cell_type": "code", "execution_count": 800, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
12/19/16 13:26NaN
22/19/16 13:27718.0
32/19/16 13:27NaN
42/19/16 13:31337.0
52/19/16 13:36332.0
\n", "
" ] }, "execution_count": 800, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.head()" ] }, { "cell_type": "code", "execution_count": 801, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
1FalseTrue
2FalseFalse
3FalseTrue
4FalseFalse
5FalseFalse
\n", "
" ] }, "execution_count": 801, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.isnull().head()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ ] }, { "cell_type": "code", "execution_count": 802, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df3['Time'] = pd.to_datetime(df3.Time)" ] }, { "cell_type": "code", "execution_count": 803, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
12016-02-19 13:26:00NaN
22016-02-19 13:27:00718.0
32016-02-19 13:27:00NaN
42016-02-19 13:31:00337.0
52016-02-19 13:36:00332.0
\n", "
" ] }, "execution_count": 803, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.head()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "https://pandas.pydata.org/pandas-docs/stable/api.html#datetimelike-properties" ] }, { "cell_type": "code", "execution_count": 804, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 Friday\n", "2 Friday\n", "3 Friday\n", "4 Friday\n", "5 Friday\n", "Name: Time, dtype: object" ] }, "execution_count": 804, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.Time.dt.weekday_name.head()" ] }, { "cell_type": "code", "execution_count": 805, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "#isolating the seonc day\n", "Firstday = pd.to_datetime('2/20/2016 23:59:59')" ] }, { "cell_type": "code", "execution_count": 806, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
4112016-02-20 23:39:00400.0
4122016-02-20 23:44:00419.0
4132016-02-20 23:49:00407.0
4142016-02-20 23:54:00417.0
4152016-02-20 23:59:00417.0
\n", "
" ] }, "execution_count": 806, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.loc[df3.Time <= Firstday, :].tail()" ] }, { "cell_type": "code", "execution_count": 807, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Timedelta('359 days 05:21:00')" ] }, "execution_count": 807, "metadata": { }, "output_type": "execute_result" } ], "source": [ "#almost a full year of data!\n", "(df3.Time.max() - df3.Time.min())" ] }, { "cell_type": "code", "execution_count": 808, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2Day
12016-02-19 13:26:00NaNFriday
22016-02-19 13:27:00718.0Friday
32016-02-19 13:27:00NaNFriday
42016-02-19 13:31:00337.0Friday
52016-02-19 13:36:00332.0Friday
\n", "
" ] }, "execution_count": 808, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3['Day'] = df3.Time.dt.weekday_name\n", "df3.head()" ] }, { "cell_type": "code", "execution_count": 809, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Saturday 14598\n", "Tuesday 14589\n", "Sunday 14539\n", "Monday 14488\n", "Wednesday 14472\n", "Friday 14438\n", "Thursday 13867\n", "Name: Day, dtype: int64" ] }, "execution_count": 809, "metadata": { }, "output_type": "execute_result" } ], "source": [ "# so many questions\n", "df3.Day.value_counts()" ] }, { "cell_type": "code", "execution_count": 810, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 810, "metadata": { }, "output_type": "execute_result" }, { "data": { "image/png": "4d2d598854af700a098269dbb982bfd299de63df" }, "execution_count": 810, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.Day.value_counts().plot()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Switching to df2 because it is still note recognized by datetime" ] }, { "cell_type": "code", "execution_count": 811, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2
numbered_index
901441/1/17 0:00482
901451/1/17 0:05491
901461/1/17 0:11480
901471/1/17 0:16486
901481/1/17 0:21490
\n", "
" ] }, "execution_count": 811, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df2.head()" ] }, { "cell_type": "code", "execution_count": 812, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "#df3['Time2'].head()" ] }, { "cell_type": "code", "execution_count": 813, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2Day
12016-02-19 13:26:00NaNFriday
22016-02-19 13:27:00718.0Friday
32016-02-19 13:27:00NaNFriday
42016-02-19 13:31:00337.0Friday
52016-02-19 13:36:00332.0Friday
\n", "
" ] }, "execution_count": 813, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.head()" ] }, { "cell_type": "code", "execution_count": 814, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df3['Time2'] = df3.Time.shift(-1)" ] }, { "cell_type": "code", "execution_count": 815, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2DayTime2
12016-02-19 13:26:00NaNFriday2016-02-19 13:27:00
22016-02-19 13:27:00718.0Friday2016-02-19 13:27:00
32016-02-19 13:27:00NaNFriday2016-02-19 13:31:00
42016-02-19 13:31:00337.0Friday2016-02-19 13:36:00
52016-02-19 13:36:00332.0Friday2016-02-19 13:41:00
\n", "
" ] }, "execution_count": 815, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.head()" ] }, { "cell_type": "code", "execution_count": 816, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2DayTime2TimeDel
12016-02-19 13:26:00NaNFriday2016-02-19 13:27:0000:01:00
22016-02-19 13:27:00718.0Friday2016-02-19 13:27:0000:00:00
32016-02-19 13:27:00NaNFriday2016-02-19 13:31:0000:04:00
42016-02-19 13:31:00337.0Friday2016-02-19 13:36:0000:05:00
52016-02-19 13:36:00332.0Friday2016-02-19 13:41:0000:05:00
\n", "
" ] }, "execution_count": 816, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3['TimeDel'] = df3.Time2 - df3.Time\n", "df3.head()" ] }, { "cell_type": "code", "execution_count": 817, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 60.0\n", "2 0.0\n", "3 240.0\n", "4 300.0\n", "5 300.0\n", "Name: TimeDel, dtype: float64" ] }, "execution_count": 817, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.TimeDel.dt.seconds.head()" ] }, { "cell_type": "code", "execution_count": 818, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Time datetime64[ns]\n", "CO2 float64\n", "Day object\n", "Time2 datetime64[ns]\n", "TimeDel timedelta64[ns]\n", "dtype: object" ] }, "execution_count": 818, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.dtypes" ] }, { "cell_type": "code", "execution_count": 819, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df3['TimeDel'] = df3.TimeDel / np.timedelta64(1, 's')\n" ] }, { "cell_type": "code", "execution_count": 820, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Time datetime64[ns]\n", "CO2 float64\n", "Day object\n", "Time2 datetime64[ns]\n", "TimeDel float64\n", "dtype: object" ] }, "execution_count": 820, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.dtypes" ] }, { "cell_type": "code", "execution_count": 821, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "df3['CO2_over_TimeDiff'] = (df3.CO2 / df3.TimeDel)\n" ] }, { "cell_type": "code", "execution_count": 822, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Time 0\n", "CO2 6\n", "Day 0\n", "Time2 1\n", "TimeDel 1\n", "CO2_over_TimeDiff 7\n", "dtype: int64" ] }, "execution_count": 822, "metadata": { }, "output_type": "execute_result" } ], "source": [ "# number of \"not a number\" in each column\n", "df3.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 823, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2DayTime2TimeDelCO2_over_TimeDiff
12016-02-19 13:26:00NaNFriday2016-02-19 13:27:0060.0NaN
32016-02-19 13:27:00NaNFriday2016-02-19 13:31:00240.0NaN
29112016-02-29 17:03:00NaNMonday2016-02-29 17:05:00120.0NaN
329312016-06-14 05:09:00NaNTuesday2016-06-14 05:10:0060.0NaN
486782016-08-09 05:21:00NaNTuesday2016-08-09 05:22:0060.0NaN
725652016-10-31 15:40:00NaNMonday2016-10-31 15:44:00240.0NaN
\n", "
" ] }, "execution_count": 823, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3[df3.CO2.isnull()]" ] }, { "cell_type": "code", "execution_count": 824, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(100991, 6)" ] }, "execution_count": 824, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.shape" ] }, { "cell_type": "code", "execution_count": 825, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "# dropping rows that have \"any\" missing values\n", "df3.dropna(how='any', inplace = True)" ] }, { "cell_type": "code", "execution_count": 826, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(100984, 6)" ] }, "execution_count": 826, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.shape" ] }, { "cell_type": "code", "execution_count": 827, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeCO2DayTime2TimeDelCO2_over_TimeDiff
22016-02-19 13:27:00718.0Friday2016-02-19 13:27:000.0inf
42016-02-19 13:31:00337.0Friday2016-02-19 13:36:00300.01.123333
52016-02-19 13:36:00332.0Friday2016-02-19 13:41:00300.01.106667
62016-02-19 13:41:00328.0Friday2016-02-19 13:46:00300.01.093333
72016-02-19 13:46:00307.0Friday2016-02-19 13:51:00300.01.023333
\n", "
" ] }, "execution_count": 827, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.head()" ] }, { "cell_type": "code", "execution_count": 828, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CO2TimeDelCO2_over_TimeDiff
count100984.000000100984.0000001.009840e+05
mean547.647548307.336608inf
std304.5127401369.027580NaN
min201.000000-3300.000000-3.500000e-01
25%362.000000300.0000001.200000e+00
50%438.000000300.0000001.453333e+00
75%615.000000300.0000002.040000e+00
max2777.000000424320.000000inf
\n", "
" ] }, "execution_count": 828, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.describe()" ] }, { "cell_type": "code", "execution_count": 829, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2 inf\n", "4 1.123333\n", "5 1.106667\n", "6 1.093333\n", "7 1.023333\n", "Name: CO2_over_TimeDiff, dtype: float64" ] }, "execution_count": 829, "metadata": { }, "output_type": "execute_result" } ], "source": [ "df3.CO2_over_TimeDiff.head()" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (Anaconda)", "language": "python", "name": "anaconda3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 0 }