Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/ML Regression Analysis/Lab 1 Salary Prediction using Regression.ipynb
3074 views
Kernel: Python 3 (ipykernel)

Salary Prediction using Linear Regression

# IPython magic to plot interactively on the notebook %matplotlib notebook
# Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd
# Importing the dataset sal = pd.read_excel('Salary.xlsx') sal.head(2) #sal.shape
X = sal.iloc[:, :-1].values # create matrix of features y = sal.iloc[:, 1].values # create dependent variable vector
# Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0) # Change test size
## Fitting Simple Linear Regression to the Training Set from sklearn.linear_model import LinearRegression ## Create object of Linear Regression class by calling it reg = LinearRegression()
## Fit regressor object to training set by usinig the .fit method of the LinearRegression class # Fitting Simple Linear Regression to the Training set # We created a machine (simple linear regression model) and made it learn correlations on the training set so that the machine can predict salary based on its learning experience. model = reg.fit(X_train,y_train)
## Predicting the Test set results y_pred = reg.predict(X_test) # vector of predictions of dependent variable
(reg.intercept_,reg.coef_)
(13311.00883470787, array([5361.78229353]))
# Visualizing the Training set results plt.scatter(X_train, y_train,color = 'red') # plot real values plt.plot(X_train, reg.predict(X_train), color = 'blue') # plot regression line of predicted values on training set plt.title('Salary vs Experience (Training set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show() # Visualizing the Test set results plt.scatter(X_test, y_test, color = 'red') # plot real values of test set plt.plot(X_train, reg.predict(X_train), color = 'blue') # plot predicted values of test set # Same regression line as above plt.title('Salary vs Experience (Test Set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show()
<IPython.core.display.Javascript object>
# Evaluate results from sklearn import metrics # Calculate metrics print('MAE',metrics.mean_absolute_error(y_test, y_pred)) #MAE is the easiest to understand, because it's the average error print('MSE',metrics.mean_squared_error(y_test, y_pred)) #MSE is more popular than MAE, because MSE "punishes" larger errors, which tends to be useful in the real world print('RMSE',np.sqrt(metrics.mean_squared_error(y_test, y_pred))) #RMSE is even more popular than MSE, because RMSE is interpretable in the "y" units (target units)
MAE 5653.761026635242 MSE 55105733.870278984 RMSE 7423.3236404106065
##Calculated R Squared print('R^2 =',metrics.explained_variance_score(y_test,y_pred))
R^2 = 0.9829807213710279