In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data = pd.read_csv('202February',
                   skiprows=3,       # ignore the first 3 rows of data
                   sep=';',          # semicolon is used to separate data values
                   index_col=1,      # use column 1 as the dates to index the data
                   parse_dates=True) # convert the date string into a date object
In [3]:
data['CO2'].plot()
Out[3]:
<matplotlib.axes.AxesSubplot at 0x7ff2814ed990>
In [10]:
def process_data(begin, end, data, plot=False):
    x = data['Timestamp'][begin:end]
    x = x - x[0]
    y = data['CO2'][begin:end]

    fit = np.polyfit(x,y,1)
    room_ppm_per_second = fit[0]
    room_ppm_per_minute = room_ppm_per_second * 60
    ppm_per_student_per_minute = 0.176
    num_students = room_ppm_per_minute / ppm_per_student_per_minute

    if plot:
        yfit = np.polyval(fit, x)
        plt.plot(x, y)
        plt.plot(x, yfit)
        plt.xlabel ("seconds")
        plt.ylabel ("ppm")

    print('Start time = {}'.format(begin))
    print('End time = {}'.format(end))
    print('Carbon dioxide rate of increase {:.2f} ppm per minute'.format(room_ppm_per_minute))    
    print('Estimated number of students: {:.0f}'.format(num_students))
    print()

class_starts = (
                ('2015-02-2 10:35', '2015-02-2 12:10'),
                ('2015-02-4 10:35', '2015-02-4 12:10'),
                ('2015-02-9 10:35', '2015-02-9 12:10'),
                ('2015-02-11 10:35', '2015-02-11 12:10'),
                ('2015-02-16 10:35', '2015-02-16 12:10'),
                ('2015-02-18 10:35', '2015-02-18 12:10'),
                ('2015-02-23 10:35', '2015-02-23 12:10'),
                ('2015-02-25 10:35', '2015-02-25 12:10'),
               )

for begin, end in class_starts:
    process_data(begin, end, data, plot=True)
Start time = 2015-02-2 10:35
End time = 2015-02-2 12:10
Carbon dioxide rate of increase 1.45 ppm per minute
Estimated number of students: 8
()
Start time = 2015-02-4 10:35
End time = 2015-02-4 12:10
Carbon dioxide rate of increase 1.23 ppm per minute
Estimated number of students: 7
()
Start time = 2015-02-9 10:35
End time = 2015-02-9 12:10
Carbon dioxide rate of increase 3.14 ppm per minute
Estimated number of students: 18
()
Start time = 2015-02-11 10:35
End time = 2015-02-11 12:10
Carbon dioxide rate of increase 2.19 ppm per minute
Estimated number of students: 12
()
Start time = 2015-02-16 10:35
End time = 2015-02-16 12:10
Carbon dioxide rate of increase -1.97 ppm per minute
Estimated number of students: -11
()
Start time = 2015-02-18 10:35
End time = 2015-02-18 12:10
Carbon dioxide rate of increase 1.51 ppm per minute
Estimated number of students: 9
()
Start time = 2015-02-23 10:35
End time = 2015-02-23 12:10
Carbon dioxide rate of increase 5.48 ppm per minute
Estimated number of students: 31
()
Start time = 2015-02-25 10:35
End time = 2015-02-25 12:10
Carbon dioxide rate of increase 2.68 ppm per minute
Estimated number of students: 15
()

In [13]:
def process_data(begin, end, data, plot=False):
    x = data['Timestamp'][begin:end]
    x = x - x[0]
    y = data['CO2'][begin:end]

    fit = np.polyfit(x,y,1)
    room_ppm_per_second = fit[0]
    room_ppm_per_minute = room_ppm_per_second * 60
    ppm_per_student_per_minute = 0.176
    num_students = room_ppm_per_minute / ppm_per_student_per_minute

    if plot:
        yfit = np.polyval(fit, x)
        plt.plot(x, y)
        plt.plot(x, yfit)
        plt.xlabel ("seconds")
        plt.ylabel ("ppm")

    print('Start time = {}'.format(begin))
    print('End time = {}'.format(end))
    print('Carbon dioxide rate of increase {:.2f} ppm per minute'.format(room_ppm_per_minute))    
    print('Estimated number of students: {:.0f}'.format(num_students))
    print()

class_starts = (
                ('2015-02-3 12:50', '2015-02-3 2:40'),
                ('2015-02-5 12:50', '2015-02-5 2:40'),
                ('2015-02-10 12:50', '2015-02-10 2:40'),
                ('2015-02-12 12:50', '2015-02-12 2:40'),
                ('2015-02-17 12:50', '2015-02-17 2:40'),
                ('2015-02-19 12:50', '2015-02-19 2:40'),
                ('2015-02-24 12:50', '2015-02-24 2:40'),
                ('2015-02-26 12:50', '2015-02-26 2:40'),
               )

for begin, end in class_starts:
    process_data(begin, end, data, plot=True)
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-13-7f9600c1aef9> in <module>()
     35 
     36 for begin, end in class_starts:
---> 37     process_data(begin, end, data, plot=True)

<ipython-input-13-7f9600c1aef9> in process_data(begin, end, data, plot)
      1 def process_data(begin, end, data, plot=False):
      2     x = data['Timestamp'][begin:end]
----> 3     x = x - x[0]
      4     y = data['CO2'][begin:end]
      5 

/usr/local/sage/sage-6.5/local/lib/python2.7/site-packages/pandas/core/series.py in __getitem__(self, key)
    507     def __getitem__(self, key):
    508         try:
--> 509             result = self.index.get_value(self, key)
    510 
    511             if not np.isscalar(result):

/usr/local/sage/sage-6.5/local/lib/python2.7/site-packages/pandas/tseries/index.py in get_value(self, series, key)
   1221 
   1222         try:
-> 1223             return _maybe_box(self, Index.get_value(self, series, key), series, key)
   1224         except KeyError:
   1225             try:

/usr/local/sage/sage-6.5/local/lib/python2.7/site-packages/pandas/core/index.py in get_value(self, series, key)
   1421 
   1422             try:
-> 1423                 return tslib.get_value_box(s, key)
   1424             except IndexError:
   1425                 raise

pandas/tslib.pyx in pandas.tslib.get_value_box (pandas/tslib.c:14029)()

pandas/tslib.pyx in pandas.tslib.get_value_box (pandas/tslib.c:13796)()

IndexError: index out of bounds
In []: