{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "#Load the data:\n",
    "# Notice: if reproduction of this code is of interst, delete blank columns to achieve this result.\n",
    "PATH <- \"weekly_in_situ_co2_mlo.csv\"\n",
    "Data <- read.csv(PATH, header = TRUE)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "6d1e728413d3439ce3dafea9ed2c7f941afac72e"
     },
     "execution_count": 2,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Change the time\n",
    "Time <- as.Date(Data$Date)\n",
    "plot(Time,Data$CO2, xlab = \"Year\", ylab = \"CO2 Levels\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "8f108a7146da293d548d35f82c118ddef21ffcfd"
     },
     "execution_count": 3,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lm <- lm(CO2~Time, data = Data)\n",
    "plot(Time,Data$CO2, xlab = \"Year\", ylab = \"CO2 Levels\")\n",
    "abline(lm, col=\"red\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "3040"
      ]
     },
     "execution_count": 4,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#turn the time parameter into days from day one:\n",
    "Time <- c(Time-Time[1])\n",
    "#Sanity check: we should have a length of 3040 (weeks between )\n",
    "length(Time) \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "21770"
      ]
     },
     "execution_count": 5,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#turn time to numeric vector:\n",
    "Time <- as.numeric(Time, units=\"days\")\n",
    "Time[3040]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th scope=col>CO2</th><th scope=col>Time</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><td>316.19</td><td> 0    </td></tr>\n",
       "\t<tr><td>317.31</td><td> 7    </td></tr>\n",
       "\t<tr><td>317.69</td><td>14    </td></tr>\n",
       "\t<tr><td>317.58</td><td>21    </td></tr>\n",
       "\t<tr><td>316.48</td><td>28    </td></tr>\n",
       "\t<tr><td>316.95</td><td>35    </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ]
     },
     "execution_count": 6,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Create a numeric dataframe to work with:\n",
    "numeric_data <- data.frame(Data$CO2,Time)\n",
    "colnames(numeric_data) <-c(\"CO2\",\"Time\")\n",
    "#Expect to get dbl type (int) for both variables:\n",
    "head(numeric_data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th></th><th scope=col>CO2</th><th scope=col>Time</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>2129</th><td>369.61</td><td>15309 </td></tr>\n",
       "\t<tr><th scope=row>2130</th><td>370.11</td><td>15316 </td></tr>\n",
       "\t<tr><th scope=row>2131</th><td>370.09</td><td>15323 </td></tr>\n",
       "\t<tr><th scope=row>2132</th><td>370.98</td><td>15330 </td></tr>\n",
       "\t<tr><th scope=row>2133</th><td>370.86</td><td>15337 </td></tr>\n",
       "\t<tr><th scope=row>2134</th><td>371.04</td><td>15344 </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ]
     },
     "execution_count": 7,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#model test on the esixting data:\n",
    "#use the first 70% to fit and 30% to predict:\n",
    "set.seed(23)\n",
    "train_size <- nrow(numeric_data)* .7 #<- train size\n",
    "#let's split the data to train and test:\n",
    "#train <- numeric_data[1:train_size, ]\n",
    "train <- numeric_data[1:train_size,]\n",
    "test_rows <- (nrow(numeric_data) - nrow(train))\n",
    "test <- tail(numeric_data, test_rows)\n",
    "#Sanity check: we should expect the test data to start at 2129 (one item after 70% of observations , and to have 3040-2128 = 912 )\n",
    "head(test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "912"
      ]
     },
     "execution_count": 8,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nrow(test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th scope=col>Time</th><th scope=col>seasonal_var</th><th scope=col>CO2</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><td> 0       </td><td>1.0000000</td><td>316.19   </td></tr>\n",
       "\t<tr><td> 7       </td><td>0.9927586</td><td>317.31   </td></tr>\n",
       "\t<tr><td>14       </td><td>0.9711394</td><td>317.69   </td></tr>\n",
       "\t<tr><td>21       </td><td>0.9354554</td><td>317.58   </td></tr>\n",
       "\t<tr><td>28       </td><td>0.8862235</td><td>316.48   </td></tr>\n",
       "\t<tr><td>35       </td><td>0.8241566</td><td>316.95   </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ]
     },
     "execution_count": 9,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Add the seasonal parameter:\n",
    "seasonal_var <- ts(cos((2*pi*train$Time)/365.25))\n",
    "train <- data.frame(train$Time, seasonal_var, CO2=train$CO2)\n",
    "colnames(train) <-c(\"Time\",\"seasonal_var\",\"CO2\")\n",
    "head(train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\n",
       "Call:\n",
       "lm(formula = CO2 ~ Time + seasonal_var, data = train)\n",
       "\n",
       "Residuals:\n",
       "    Min      1Q  Median      3Q     Max \n",
       "-4.2844 -1.6280 -0.2005  1.2968  6.1090 \n",
       "\n",
       "Coefficients:\n",
       "              Estimate Std. Error t value Pr(>|t|)    \n",
       "(Intercept)  3.105e+02  9.116e-02 3406.23   <2e-16 ***\n",
       "Time         3.633e-03  1.020e-05  356.23   <2e-16 ***\n",
       "seasonal_var 2.544e+00  6.316e-02   40.27   <2e-16 ***\n",
       "---\n",
       "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
       "\n",
       "Residual standard error: 2.055 on 2125 degrees of freedom\n",
       "Multiple R-squared:  0.9837,\tAdjusted R-squared:  0.9837 \n",
       "F-statistic: 6.422e+04 on 2 and 2125 DF,  p-value: < 2.2e-16\n"
      ]
     },
     "execution_count": 10,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Fit the model:\n",
    "tslm <- lm(CO2~Time+seasonal_var, data = train)\n",
    "#Later we'd want to plot the fit of the model:\n",
    "seasonal_var_numeric <- as.numeric(seasonal_var)\n",
    "tslm_var_for_plot <-(Time[1:nrow(train)]+seasonal_var_numeric)\n",
    "summary(tslm)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "b2d46b012701857b60818e5edaadc19523161924"
     },
     "execution_count": 11,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plot(Time[1:nrow(train)],Data$CO2[1:nrow(train)], xlab = \"Indexed Time\", ylab = \"CO2 Levels\", main=\"CO2 (in black) predicted by the updated model(red) - Not bad at all!\")\n",
    "lines(tslm_var_for_plot, train$CO2, col=\"red\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th scope=col>Time</th><th scope=col>seasonal_var</th><th scope=col>CO2</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><td>15309    </td><td>0.8567425</td><td>369.61   </td></tr>\n",
       "\t<tr><td>15316    </td><td>0.9124929</td><td>370.11   </td></tr>\n",
       "\t<tr><td>15323    </td><td>0.9550279</td><td>370.09   </td></tr>\n",
       "\t<tr><td>15330    </td><td>0.9837315</td><td>370.98   </td></tr>\n",
       "\t<tr><td>15337    </td><td>0.9981880</td><td>370.86   </td></tr>\n",
       "\t<tr><td>15344    </td><td>0.9981880</td><td>371.04   </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ]
     },
     "execution_count": 12,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#use the model and its predictor (time, seasonal variation) to predict CO2 for different times:\n",
    "#X input for the model - copmlement the test set with the seasonal var:\n",
    "seasonal_var <- ts(cos((2*pi*test$Time)/365.25))\n",
    "test <- data.frame(test$Time, seasonal_var, CO2=test$CO2)\n",
    "colnames(test) <-c(\"Time\",\"seasonal_var\",\"CO2\")\n",
    "head(test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\n",
       "Call:\n",
       "lm(formula = CO2 ~ Time + seasonal_var, data = train)\n",
       "\n",
       "Residuals:\n",
       "    Min      1Q  Median      3Q     Max \n",
       "-4.2844 -1.6280 -0.2005  1.2968  6.1090 \n",
       "\n",
       "Coefficients:\n",
       "              Estimate Std. Error t value Pr(>|t|)    \n",
       "(Intercept)  3.105e+02  9.116e-02 3406.23   <2e-16 ***\n",
       "Time         3.633e-03  1.020e-05  356.23   <2e-16 ***\n",
       "seasonal_var 2.544e+00  6.316e-02   40.27   <2e-16 ***\n",
       "---\n",
       "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
       "\n",
       "Residual standard error: 2.055 on 2125 degrees of freedom\n",
       "Multiple R-squared:  0.9837,\tAdjusted R-squared:  0.9837 \n",
       "F-statistic: 6.422e+04 on 2 and 2125 DF,  p-value: < 2.2e-16\n"
      ]
     },
     "execution_count": 13,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tslm <- lm(CO2~Time+seasonal_var, data = train)\n",
    "prediction<-predict(tslm, test)\n",
    "summary(tslm)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "6d489645a26852e3fda727993d5332534b763a07"
     },
     "execution_count": 14,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "pred_data <- data.frame(test$Time, prediction)\n",
    "plot(test$Time,test$CO2, xlab = \"Indexed Time\", ylab = \"CO2 Levels\", main=\"Prediction: CO2 (in black) predicted by the updated model(red), \\nnot so great...\")\n",
    "lines(pred_data, col=\"red\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "#This model builds on the linear model that we have used so far as a mean to a normal distribution:\n",
    "#priors (taken from the linear model):\n",
    "c0 = 310.5; c1 = .003; c2 = 2.53; c3 = .01; c4=.5\n",
    "counter = 0\n",
    "seasonal_var_df <- as.array(seasonal_var_numeric)\n",
    "#constract the Bayes-Gaussian Model:\n",
    "bayesian_model <- function(num_days){\n",
    "  bys <- rnorm(1,(c0+c1*num_days+c2*cos(2*pi*num_days/365.25 + c3)),c4)\n",
    "  if (is.numeric(bys)){   #this subblock is just for the counter\n",
    "     counter <- counter+1\n",
    "  }\n",
    "  return(rnorm(1,(c0+c1*num_days+c2*cos(2*pi*num_days/365.25 + c3)),c4))\n",
    "}\n",
    "#Let's see how this model fits the training data:\n",
    "bayesian_results_func <- function(X_input) {\n",
    "  #generate list for iteration:\n",
    "  items_for_iteration <- length(X_input)\n",
    "  l <- vector(\"integer\", items_for_iteration)\n",
    "  for (i in 1:length(X_input)){\n",
    "    #see: https://stackoverflow.com/questions/26508519/how-to-add-elements-to-a-list-in-r-loop\n",
    "    l[i] <- bayesian_model(X_input[i])\n",
    "  }\n",
    "  return (subset(l,!is.na(l)) )\n",
    "  #l = l[!is.na(l)]\n",
    "  #return(l)\n",
    "}\n",
    "bayesian_results <- bayesian_results_func(c(train$Time))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol class=list-inline>\n",
       "\t<li>313.18011608506</li>\n",
       "\t<li>312.082467171125</li>\n",
       "\t<li>313.553690562177</li>\n",
       "\t<li>312.590890964596</li>\n",
       "\t<li>312.291578041677</li>\n",
       "\t<li>313.506027261337</li>\n",
       "</ol>\n"
      ]
     },
     "execution_count": 36,
     "metadata": {
     },
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/html": [
       "2128"
      ]
     },
     "execution_count": 36,
     "metadata": {
     },
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] 0\n"
     ]
    }
   ],
   "source": [
    "#Sanity Check: we should expect 2128 obs / 7 = 304\n",
    "head(bayesian_results)\n",
    "length(bayesian_results)\n",
    "print(counter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "TRUE"
      ]
     },
     "execution_count": 41,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "is.numeric(bayesian_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "0"
      ]
     },
     "execution_count": 25,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Time[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "1"
      ]
     },
     "execution_count": 26,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seasonal_var_df[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "0"
      ]
     },
     "execution_count": 32,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(is.na(rnorm(1000,(c0+c1*Time[1]+c2*seasonal_var_df[1]),c4)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "ERROR",
     "evalue": "Error in eval(expr, envir, enclos): object 'X_input' not found\n",
     "output_type": "error",
     "traceback": [
      "Error in eval(expr, envir, enclos): object 'X_input' not found\nTraceback:\n"
     ]
    }
   ],
   "source": [
    "X_input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Some output was deleted.\n"
     ]
    }
   ],
   "source": [
    "train$Time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "SageMath (stable)",
   "name": "sagemath"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}