{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"#Load the data:\n",
"# Notice: if reproduction of this code is of interst, delete blank columns to achieve this result.\n",
"PATH <- \"weekly_in_situ_co2_mlo.csv\"\n",
"Data <- read.csv(PATH, header = TRUE)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "6d1e728413d3439ce3dafea9ed2c7f941afac72e"
},
"execution_count": 2,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#Change the time\n",
"Time <- as.Date(Data$Date)\n",
"plot(Time,Data$CO2, xlab = \"Year\", ylab = \"CO2 Levels\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "8f108a7146da293d548d35f82c118ddef21ffcfd"
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"lm <- lm(CO2~Time, data = Data)\n",
"plot(Time,Data$CO2, xlab = \"Year\", ylab = \"CO2 Levels\")\n",
"abline(lm, col=\"red\")\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"3040"
]
},
"execution_count": 4,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#turn the time parameter into days from day one:\n",
"Time <- c(Time-Time[1])\n",
"#Sanity check: we should have a length of 3040 (weeks between )\n",
"length(Time) \n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"21770"
]
},
"execution_count": 5,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#turn time to numeric vector:\n",
"Time <- as.numeric(Time, units=\"days\")\n",
"Time[3040]\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"CO2 | Time |
\n",
"\n",
"\t316.19 | 0 |
\n",
"\t317.31 | 7 |
\n",
"\t317.69 | 14 |
\n",
"\t317.58 | 21 |
\n",
"\t316.48 | 28 |
\n",
"\t316.95 | 35 |
\n",
"\n",
"
\n"
]
},
"execution_count": 6,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#Create a numeric dataframe to work with:\n",
"numeric_data <- data.frame(Data$CO2,Time)\n",
"colnames(numeric_data) <-c(\"CO2\",\"Time\")\n",
"#Expect to get dbl type (int) for both variables:\n",
"head(numeric_data)\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" | CO2 | Time |
\n",
"\n",
"\t2129 | 369.61 | 15309 |
\n",
"\t2130 | 370.11 | 15316 |
\n",
"\t2131 | 370.09 | 15323 |
\n",
"\t2132 | 370.98 | 15330 |
\n",
"\t2133 | 370.86 | 15337 |
\n",
"\t2134 | 371.04 | 15344 |
\n",
"\n",
"
\n"
]
},
"execution_count": 7,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#model test on the esixting data:\n",
"#use the first 70% to fit and 30% to predict:\n",
"set.seed(23)\n",
"train_size <- nrow(numeric_data)* .7 #<- train size\n",
"#let's split the data to train and test:\n",
"#train <- numeric_data[1:train_size, ]\n",
"train <- numeric_data[1:train_size,]\n",
"test_rows <- (nrow(numeric_data) - nrow(train))\n",
"test <- tail(numeric_data, test_rows)\n",
"#Sanity check: we should expect the test data to start at 2129 (one item after 70% of observations , and to have 3040-2128 = 912 )\n",
"head(test)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"912"
]
},
"execution_count": 8,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"nrow(test)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"Time | seasonal_var | CO2 |
\n",
"\n",
"\t 0 | 1.0000000 | 316.19 |
\n",
"\t 7 | 0.9927586 | 317.31 |
\n",
"\t14 | 0.9711394 | 317.69 |
\n",
"\t21 | 0.9354554 | 317.58 |
\n",
"\t28 | 0.8862235 | 316.48 |
\n",
"\t35 | 0.8241566 | 316.95 |
\n",
"\n",
"
\n"
]
},
"execution_count": 9,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#Add the seasonal parameter:\n",
"seasonal_var <- ts(cos((2*pi*train$Time)/365.25))\n",
"train <- data.frame(train$Time, seasonal_var, CO2=train$CO2)\n",
"colnames(train) <-c(\"Time\",\"seasonal_var\",\"CO2\")\n",
"head(train)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"\n",
"Call:\n",
"lm(formula = CO2 ~ Time + seasonal_var, data = train)\n",
"\n",
"Residuals:\n",
" Min 1Q Median 3Q Max \n",
"-4.2844 -1.6280 -0.2005 1.2968 6.1090 \n",
"\n",
"Coefficients:\n",
" Estimate Std. Error t value Pr(>|t|) \n",
"(Intercept) 3.105e+02 9.116e-02 3406.23 <2e-16 ***\n",
"Time 3.633e-03 1.020e-05 356.23 <2e-16 ***\n",
"seasonal_var 2.544e+00 6.316e-02 40.27 <2e-16 ***\n",
"---\n",
"Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
"\n",
"Residual standard error: 2.055 on 2125 degrees of freedom\n",
"Multiple R-squared: 0.9837,\tAdjusted R-squared: 0.9837 \n",
"F-statistic: 6.422e+04 on 2 and 2125 DF, p-value: < 2.2e-16\n"
]
},
"execution_count": 10,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#Fit the model:\n",
"tslm <- lm(CO2~Time+seasonal_var, data = train)\n",
"#Later we'd want to plot the fit of the model:\n",
"seasonal_var_numeric <- as.numeric(seasonal_var)\n",
"tslm_var_for_plot <-(Time[1:nrow(train)]+seasonal_var_numeric)\n",
"summary(tslm)\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "b2d46b012701857b60818e5edaadc19523161924"
},
"execution_count": 11,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"plot(Time[1:nrow(train)],Data$CO2[1:nrow(train)], xlab = \"Indexed Time\", ylab = \"CO2 Levels\", main=\"CO2 (in black) predicted by the updated model(red) - Not bad at all!\")\n",
"lines(tslm_var_for_plot, train$CO2, col=\"red\")\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"Time | seasonal_var | CO2 |
\n",
"\n",
"\t15309 | 0.8567425 | 369.61 |
\n",
"\t15316 | 0.9124929 | 370.11 |
\n",
"\t15323 | 0.9550279 | 370.09 |
\n",
"\t15330 | 0.9837315 | 370.98 |
\n",
"\t15337 | 0.9981880 | 370.86 |
\n",
"\t15344 | 0.9981880 | 371.04 |
\n",
"\n",
"
\n"
]
},
"execution_count": 12,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"#use the model and its predictor (time, seasonal variation) to predict CO2 for different times:\n",
"#X input for the model - copmlement the test set with the seasonal var:\n",
"seasonal_var <- ts(cos((2*pi*test$Time)/365.25))\n",
"test <- data.frame(test$Time, seasonal_var, CO2=test$CO2)\n",
"colnames(test) <-c(\"Time\",\"seasonal_var\",\"CO2\")\n",
"head(test)\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"\n",
"Call:\n",
"lm(formula = CO2 ~ Time + seasonal_var, data = train)\n",
"\n",
"Residuals:\n",
" Min 1Q Median 3Q Max \n",
"-4.2844 -1.6280 -0.2005 1.2968 6.1090 \n",
"\n",
"Coefficients:\n",
" Estimate Std. Error t value Pr(>|t|) \n",
"(Intercept) 3.105e+02 9.116e-02 3406.23 <2e-16 ***\n",
"Time 3.633e-03 1.020e-05 356.23 <2e-16 ***\n",
"seasonal_var 2.544e+00 6.316e-02 40.27 <2e-16 ***\n",
"---\n",
"Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
"\n",
"Residual standard error: 2.055 on 2125 degrees of freedom\n",
"Multiple R-squared: 0.9837,\tAdjusted R-squared: 0.9837 \n",
"F-statistic: 6.422e+04 on 2 and 2125 DF, p-value: < 2.2e-16\n"
]
},
"execution_count": 13,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"tslm <- lm(CO2~Time+seasonal_var, data = train)\n",
"prediction<-predict(tslm, test)\n",
"summary(tslm)\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "6d489645a26852e3fda727993d5332534b763a07"
},
"execution_count": 14,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"\n",
"pred_data <- data.frame(test$Time, prediction)\n",
"plot(test$Time,test$CO2, xlab = \"Indexed Time\", ylab = \"CO2 Levels\", main=\"Prediction: CO2 (in black) predicted by the updated model(red), \\nnot so great...\")\n",
"lines(pred_data, col=\"red\")\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"#This model builds on the linear model that we have used so far as a mean to a normal distribution:\n",
"#priors (taken from the linear model):\n",
"c0 = 310.5; c1 = .003; c2 = 2.53; c3 = .01; c4=.5\n",
"counter = 0\n",
"seasonal_var_df <- as.array(seasonal_var_numeric)\n",
"#constract the Bayes-Gaussian Model:\n",
"bayesian_model <- function(num_days){\n",
" bys <- rnorm(1,(c0+c1*num_days+c2*cos(2*pi*num_days/365.25 + c3)),c4)\n",
" if (is.numeric(bys)){ #this subblock is just for the counter\n",
" counter <- counter+1\n",
" }\n",
" return(rnorm(1,(c0+c1*num_days+c2*cos(2*pi*num_days/365.25 + c3)),c4))\n",
"}\n",
"#Let's see how this model fits the training data:\n",
"bayesian_results_func <- function(X_input) {\n",
" #generate list for iteration:\n",
" items_for_iteration <- length(X_input)\n",
" l <- vector(\"integer\", items_for_iteration)\n",
" for (i in 1:length(X_input)){\n",
" #see: https://stackoverflow.com/questions/26508519/how-to-add-elements-to-a-list-in-r-loop\n",
" l[i] <- bayesian_model(X_input[i])\n",
" }\n",
" return (subset(l,!is.na(l)) )\n",
" #l = l[!is.na(l)]\n",
" #return(l)\n",
"}\n",
"bayesian_results <- bayesian_results_func(c(train$Time))\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\t- 313.18011608506
\n",
"\t- 312.082467171125
\n",
"\t- 313.553690562177
\n",
"\t- 312.590890964596
\n",
"\t- 312.291578041677
\n",
"\t- 313.506027261337
\n",
"
\n"
]
},
"execution_count": 36,
"metadata": {
},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"2128"
]
},
"execution_count": 36,
"metadata": {
},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1] 0\n"
]
}
],
"source": [
"#Sanity Check: we should expect 2128 obs / 7 = 304\n",
"head(bayesian_results)\n",
"length(bayesian_results)\n",
"print(counter)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"TRUE"
]
},
"execution_count": 41,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"is.numeric(bayesian_results)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"0"
]
},
"execution_count": 25,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"Time[1]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"1"
]
},
"execution_count": 26,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"seasonal_var_df[1]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"0"
]
},
"execution_count": 32,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"sum(is.na(rnorm(1000,(c0+c1*Time[1]+c2*seasonal_var_df[1]),c4)))"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ERROR",
"evalue": "Error in eval(expr, envir, enclos): object 'X_input' not found\n",
"output_type": "error",
"traceback": [
"Error in eval(expr, envir, enclos): object 'X_input' not found\nTraceback:\n"
]
}
],
"source": [
"X_input"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING: Some output was deleted.\n"
]
}
],
"source": [
"train$Time"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
]
}
],
"metadata": {
"kernelspec": {
"display_name": "SageMath (stable)",
"name": "sagemath"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
}
},
"nbformat": 4,
"nbformat_minor": 0
}