CoCalc -- 2.3 Case Study Stock Price Prediction using AR .ipynb

GitHub Repository: suyashi29/python-su
Path: blob/master/Time Forecasting using Python/2.3 Case Study Stock Price Prediction using AR .ipynb
³⁰⁷⁴ views

Kernel: Python 3 (ipykernel)

pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org pandas_datareader

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

import pandas_datareader as pdr
# Request data via Yahoo public API
data = pdr.get_data_yahoo(r'NVDA')
# Display Info
print(data.info())

In [2]:


# Load your stock price data
# Assuming you have a CSV file named 'stock_data.csv' with columns ['Date', 'Close']
data = pd.read_csv('stockdata.csv')
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

In [3]:

data.head(2)
data.shape

Out[3]:

(260, 1)

In [4]:

# Check for missing values
print("Number of missing values:", data.isnull().sum().sum())

Out[4]:

Number of missing values: 0

In [5]:

# Visualize the data
plt.figure(figsize=(10, 6))
plt.plot(data.index, data['Close'], label='Stock Price')
plt.title('Stock Price Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

Out[5]:

In [6]:

# Check for stationarity using Augmented Dickey-Fuller test
adf_result = sm.tsa.adfuller(data['Close'])
print("ADF Statistic:", adf_result[0])
print("p-value:", adf_result[1])
print("Critical Values:", adf_result[4])

Out[6]:

ADF Statistic: -1.633311290790917
p-value: 0.46575793126617404
Critical Values: {'1%': -3.4558530692911504, '5%': -2.872764881778665, '10%': -2.572751643088207}

In [7]:

# Plot ACF and PACF to determine the lag order
plt.figure(figsize=(18, 6))
plot_acf(data['Close'], lags=20, alpha=0.05)
plt.title('Autocorrelation Function (ACF)')
plt.xlabel('Lag')
plt.ylabel('ACF')
plt.show()

plt.figure(figsize=(18, 6))
plot_pacf(data['Close'], lags=20, alpha=0.05)
plt.title('Partial Autocorrelation Function (PACF)')
plt.xlabel('Lag')
plt.ylabel('PACF')
plt.show()

Out[7]:

<Figure size 1296x432 with 0 Axes>

<Figure size 1296x432 with 0 Axes>

In [8]:

# Choose the best lag using AIC
best_aic = np.inf
best_order = None

for p in range(1, 6):  # Maximum lag considered is 5
    model = sm.tsa.ARIMA(data['Close'], order=(p, 0, 0))
    results = model.fit()
    aic = results.aic
    if aic < best_aic:
        best_aic = aic
        best_order = (p, 0, 0)

print("Best AIC:", best_aic)
print("Best Order (p, d, q):", best_order)

Out[8]:

C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)

Best AIC: 738.7069741881983
Best Order (p, d, q): (1, 0, 0)

In [9]:

# Fit the AR model with the best order
model = sm.tsa.ARIMA(data['Close'], order=best_order)
results = model.fit()

Out[9]:

C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used.
  self._init_dates(dates, freq)

In [10]:


# If not stationary, apply differencing
# data_diff = data['Close'].diff().dropna()





# Predict future values
forecast_steps = 30
forecast = results.forecast(steps=forecast_steps)

# Visualize the results
plt.figure(figsize=(18, 6))
plt.plot(data.index, data['Close'], label='Actual')
plt.plot(pd.date_range(start=data.index[-1], periods=forecast_steps + 1, freq='B')[1:], forecast, label='Forecast')
plt.title('Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

Out[10]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
from sklearn.metrics import mean_squared_error

# Load your stock price data into a pandas DataFrame
# Assuming the data has a column named 'Close' for stock prices
# Replace 'your_stock_data.csv' with your actual data file
data = pd.read_csv('stockdata.csv')

# Convert the 'Date' column to datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Set the 'Date' column as the index
data.set_index('Date', inplace=True)

# Sort the data by date in ascending order
data.sort_index(inplace=True)

# Split the data into training and testing sets
train_data = data['Close'][:-30]  # Use all but the last 30 days for training
test_data = data['Close'][-30:]   # Use the last 30 days for testing

# Fit an Autoregressive (AR) model to the training data
# Choose the appropriate lag order for the AR model (e.g., 1 for AR(1), 2 for AR(2), etc.)
# Here, I'm using lag_order=1 for demonstration
model = AutoReg(train_data, lags=1)
model_fit = model.fit()

# Make predictions on the test data
predictions = model_fit.predict(start=len(train_data), end=len(train_data)+len(test_data)-1, dynamic=False)

# Visualize the actual vs. predicted stock prices
plt.figure(figsize=(10, 6))
plt.plot(data.index[-30:], test_data, label='Actual')
plt.plot(data.index[-30:], predictions, color='red', linestyle='--', label='Predicted')
plt.title('Actual vs. Predicted Stock Prices')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

# Evaluate the model using Mean Squared Error (MSE)
mse = mean_squared_error(test_data, predictions)
print(f'Mean Squared Error (MSE): {mse}')

Product

Resources

Company