Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/Time Forecasting using Python/Day 8 Headcount using Forecasting Analysis multivariant .ipynb
3074 views
Kernel: Python 3 (ipykernel)
import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt headcount_data=pd.read_csv("headcount_data.csv")
headcount_data.head()
headcount_data.describe()
headcount_data.isnull().sum()
Avg_Exp 0 count_emp 0 Year 0 dtype: int64
# Prepare the data headcount_data['count_emp_lag1'] = headcount_data['count_emp'].shift(1) # Lag-1 of employee count headcount_data['AvgExp_lag1'] = headcount_data['Avg_Exp'].shift(1) # Lag-1 for AvgExp headcount_data.dropna(inplace=True)
headcount_data.head()
# Features and target X = headcount_data[['count_emp_lag1', 'AvgExp_lag1', 'Year']] y = headcount_data['count_emp']
# Split the data into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
# Create and train the Linear Regression model headcount_pred = LinearRegression() headcount_pred.fit(X_train, y_train)
headcount_pred.intercept_
587.2474967834119
headcount_pred.coef_
array([-0.0675335 , 0.16847947, -0.2546715 ])
headcount=-0.06(cml)+0.168(AVLa)-.25(Year)+587
X = headcount_data[['count_emp_lag1', 'AvgExp_lag1', 'Year']] ## Head_pred = -0.06(count_emp_lag1)+0.168(AvgExp_lag1)-0.2546715 (Year)+587+212
# Make predictions y_pred_train = headcount_pred.predict(X_train) y_pred_test = headcount_pred.predict(X_test)
# Evaluate the model train_mse = mean_squared_error(y_train, y_pred_train) test_mse = mean_squared_error(y_test, y_pred_test) print(f'Train MSE: {train_mse:.4f}') print(f'Test MSE: {test_mse:.4f}')
Train MSE: 212.8808 Test MSE: 178.1756
# Create a table of actual and forecasted values forecast_table = pd.DataFrame({'Actual': y_test, 'Forecasted': y_pred_test}) # Plot the results #plt.figure(figsize=(14, 7)) #plt.plot(headcount_data['Year'], headcount_data['count_emp'], label='True') #plt.plot(X_train['Year'], y_pred_train, label='Train Prediction') #plt.plot(X_test['Year'], y_pred_test, label='Test Prediction', color='r') #plt.title('Headcount Forecasting with Linear Regression') #plt.xlabel('Time') #plt.ylabel('Employee Count') #plt.legend() #plt.show() # Print actual and forecasted values table print("\nActual and Forecasted Values:") print(forecast_table)
Actual and Forecasted Values: Actual Forecasted 293 79 76.418850 294 99 75.484546 295 83 73.641381 296 68 76.521465 297 67 77.041972 .. ... ... 362 64 75.242928 363 72 77.914728 364 78 78.155322 365 70 78.021641 366 68 78.578757 [74 rows x 2 columns]