Contact Us!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

| Download
Views: 136
Kernel: Python 3 (Anaconda 5)
import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import timedelta from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix
df_all_weather = pd.read_csv('2008-2018 Weather Data.csv')
df_all_weather.head()
df_beaches = pd.read_csv('DOHMH_Beach_Water_Quality_Data.csv')
df_beaches.head()
df_beaches.fillna(0,inplace=True) df_beaches['Sample Date'] = pd.to_datetime(df_beaches['Sample Date'])
df_beaches.head()
df_man = df_beaches[df_beaches['Beach Name'] == 'MANHATTAN BEACH']
df_man.head()
df_man = df_man[['Sample Date','Enterococci Results']]
df_man.head()
df_man.columns = ['DATE','Enterococci']
df_man.head()
df_man['DATE'] = pd.to_datetime(df_man['DATE'])
df_man.sort_values(by='DATE',inplace=True)
df_man.head()
df_man.index = range(len(df_man.index))
df_man.head()
df_jfk = df_all_weather[df_all_weather.NAME == 'JFK INTERNATIONAL AIRPORT, NY US']
df_jfk.head()
df_jfk.columns
Index(['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'DATE', 'AWND', 'DAPR', 'FMTM', 'MDPR', 'MDSF', 'PGTM', 'PRCP', 'SNOW', 'SNWD', 'TAVG', 'TMAX', 'TMIN', 'TOBS', 'TSUN', 'WDF2', 'WDF5', 'WESD', 'WESF', 'WSF2', 'WSF5', 'WT01', 'WT02', 'WT03', 'WT04', 'WT05', 'WT06', 'WT07', 'WT08', 'WT09', 'WT11', 'WT13', 'WT14', 'WT15', 'WT16', 'WT17', 'WT18', 'WT19', 'WT21', 'WT22'], dtype='object')
df_jfk = df_jfk[['DATE','PRCP']]
df_jfk['DATE'] = pd.to_datetime(df_jfk['DATE'])
df_jfk.sort_values(by='DATE',inplace=True)
df_merged = pd.merge(df_man, df_jfk, how='inner', on='DATE')
df_merged.head()
df_merged.head()
def eWarn(m): if m > 104: return 1 else: return 0
df_merged['Warning'] = df_merged.Enterococci.apply(eWarn)
df_merged.head()
df_merged.Warning.value_counts()
0 649 1 38 Name: Warning, dtype: int64
df_merged.plot.scatter(x='PRCP',y='Warning',color='black') plt.title('Warning vs. PRCP')
Text(0.5,1,'Warning vs. PRCP')
Image in a Jupyter notebook
X = np.array(df_merged.PRCP) y = df_merged.Warning
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)
logmodel = LogisticRegression(solver='liblinear')
logmodel.fit(X_train.reshape(-1,1),y_train)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='warn', n_jobs=None, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
b0 = logmodel.intercept_ b1 = logmodel.coef_
X2 = sorted(X) df_merged.plot.scatter(x='PRCP',y='Warning',color='black') plt.plot(X2,1/(1+np.exp(-b0-b1*X2)).reshape(-1,1),'r') plt.title('Warning vs. PRCP')
Text(0.5,1,'Warning vs. PRCP')
Image in a Jupyter notebook
predictions = logmodel.predict(X_test.reshape(-1,1))
print(confusion_matrix(y_test,logmodel.predict(X_test.reshape(-1, 1))))
[[213 0] [ 14 0]]