Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/Time Forecasting using Python/2.3 Case Study Stock Price Prediction using AR .ipynb
3074 views
Kernel: Python 3 (ipykernel)

pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org pandas_datareader

import numpy as np import pandas as pd import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.tsa.stattools import acf, pacf from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import pandas_datareader as pdr # Request data via Yahoo public API data = pdr.get_data_yahoo(r'NVDA') # Display Info print(data.info())
# Load your stock price data # Assuming you have a CSV file named 'stock_data.csv' with columns ['Date', 'Close'] data = pd.read_csv('stockdata.csv') data['Date'] = pd.to_datetime(data['Date']) data.set_index('Date', inplace=True)
data.head(2) data.shape
(260, 1)
# Check for missing values print("Number of missing values:", data.isnull().sum().sum())
Number of missing values: 0
# Visualize the data plt.figure(figsize=(10, 6)) plt.plot(data.index, data['Close'], label='Stock Price') plt.title('Stock Price Over Time') plt.xlabel('Date') plt.ylabel('Price') plt.legend() plt.show()
Image in a Jupyter notebook
# Check for stationarity using Augmented Dickey-Fuller test adf_result = sm.tsa.adfuller(data['Close']) print("ADF Statistic:", adf_result[0]) print("p-value:", adf_result[1]) print("Critical Values:", adf_result[4])
ADF Statistic: -1.633311290790917 p-value: 0.46575793126617404 Critical Values: {'1%': -3.4558530692911504, '5%': -2.872764881778665, '10%': -2.572751643088207}
# Plot ACF and PACF to determine the lag order plt.figure(figsize=(18, 6)) plot_acf(data['Close'], lags=20, alpha=0.05) plt.title('Autocorrelation Function (ACF)') plt.xlabel('Lag') plt.ylabel('ACF') plt.show() plt.figure(figsize=(18, 6)) plot_pacf(data['Close'], lags=20, alpha=0.05) plt.title('Partial Autocorrelation Function (PACF)') plt.xlabel('Lag') plt.ylabel('PACF') plt.show()
<Figure size 1296x432 with 0 Axes>
Image in a Jupyter notebook
<Figure size 1296x432 with 0 Axes>
Image in a Jupyter notebook
# Choose the best lag using AIC best_aic = np.inf best_order = None for p in range(1, 6): # Maximum lag considered is 5 model = sm.tsa.ARIMA(data['Close'], order=(p, 0, 0)) results = model.fit() aic = results.aic if aic < best_aic: best_aic = aic best_order = (p, 0, 0) print("Best AIC:", best_aic) print("Best Order (p, d, q):", best_order)
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq)
Best AIC: 738.7069741881983 Best Order (p, d, q): (1, 0, 0)
# Fit the AR model with the best order model = sm.tsa.ARIMA(data['Close'], order=best_order) results = model.fit()
C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq) C:\Users\suyashi144893\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency B will be used. self._init_dates(dates, freq)
# If not stationary, apply differencing # data_diff = data['Close'].diff().dropna() # Predict future values forecast_steps = 30 forecast = results.forecast(steps=forecast_steps) # Visualize the results plt.figure(figsize=(18, 6)) plt.plot(data.index, data['Close'], label='Actual') plt.plot(pd.date_range(start=data.index[-1], periods=forecast_steps + 1, freq='B')[1:], forecast, label='Forecast') plt.title('Stock Price Prediction') plt.xlabel('Date') plt.ylabel('Price') plt.legend() plt.show()
Image in a Jupyter notebook
import pandas as pd import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.ar_model import AutoReg from sklearn.metrics import mean_squared_error # Load your stock price data into a pandas DataFrame # Assuming the data has a column named 'Close' for stock prices # Replace 'your_stock_data.csv' with your actual data file data = pd.read_csv('stockdata.csv') # Convert the 'Date' column to datetime format data['Date'] = pd.to_datetime(data['Date']) # Set the 'Date' column as the index data.set_index('Date', inplace=True) # Sort the data by date in ascending order data.sort_index(inplace=True) # Split the data into training and testing sets train_data = data['Close'][:-30] # Use all but the last 30 days for training test_data = data['Close'][-30:] # Use the last 30 days for testing # Fit an Autoregressive (AR) model to the training data # Choose the appropriate lag order for the AR model (e.g., 1 for AR(1), 2 for AR(2), etc.) # Here, I'm using lag_order=1 for demonstration model = AutoReg(train_data, lags=1) model_fit = model.fit() # Make predictions on the test data predictions = model_fit.predict(start=len(train_data), end=len(train_data)+len(test_data)-1, dynamic=False) # Visualize the actual vs. predicted stock prices plt.figure(figsize=(10, 6)) plt.plot(data.index[-30:], test_data, label='Actual') plt.plot(data.index[-30:], predictions, color='red', linestyle='--', label='Predicted') plt.title('Actual vs. Predicted Stock Prices') plt.xlabel('Date') plt.ylabel('Stock Price') plt.legend() plt.show() # Evaluate the model using Mean Squared Error (MSE) mse = mean_squared_error(test_data, predictions) print(f'Mean Squared Error (MSE): {mse}')