Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/ML/Revise Linear Regression.ipynb
3074 views
Kernel: Python 3

Linear regression is a statistical approach for modelling relationship between a dependent variable with a given set of independent variables

image.png

# IPython magic to plot interactively on the notebook %matplotlib notebook
from scipy import linspace, polyval, polyfit, sqrt, stats, randn from matplotlib.pyplot import plot, title, show, legend # Linear regression example # This is a very simple example of using two scipy tools # for linear regression, polyfit and stats.linregress # Sample data creation # number of points n = 50 t = linspace(-5,5,n) # parameters a = 0.8 b = -4 x = polyval([a, b], t) # add some noise xn = x + randn(n) # Linear regressison -polyfit - polyfit can be used other orders polys (ar, br) = polyfit(t, xn, 1) xr = polyval([ar, br], t) # compute the mean square error err = sqrt(sum((xr - xn)**2)/n) print('Linear regression using polyfit') print('parameters: a=%.2f b=%.2f \nregression: a=%.2f b=%.2f, ms error= %.3f' % (a, b, ar, br, err)) print('\n') # Linear regression using stats.linregress (a_s, b_s, r, tt, stderr) = stats.linregress(t, xn) print('Linear regression using stats.linregress') print('parameters: a=%.2f b=%.2f \nregression: a=%.2f b=%.2f, std error= %.3f' % (a,b,a_s,b_s,stderr)) print('\n') # matplotlib ploting title('Linear Regression Example') plot(t, x,'g.--') plot(t, xn, 'k.') plot(t, xr, 'r.-') legend(['original','plus noise', 'regression']) show();
Linear regression using polyfit parameters: a=0.80 b=-4.00 regression: a=0.81 b=-3.76, ms error= 0.965 Linear regression using stats.linregress parameters: a=0.80 b=-4.00 regression: a=0.81 b=-3.76, std error= 0.047
<IPython.core.display.Javascript object>
# Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd
# Importing the dataset sal = pd.read_excel('Salary.xlsx') X = sal.iloc[:, :-1].values # create matrix of features y = sal.iloc[:, 1].values # create dependent variable vector
# Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0) # Change test size
## Feature Scaling not needed because the library used to build simple linear regressionc model will take care of that """from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) sc_y = StandardScaler() y_train = sc_y.fit_transform(y_train)"""
'from sklearn.preprocessing import StandardScaler\nsc_X = StandardScaler()\nX_train = sc_X.fit_transform(X_train)\nX_test = sc_X.transform(X_test)\nsc_y = StandardScaler()\ny_train = sc_y.fit_transform(y_train)'
## Fitting Simple Linear Regression to the Training Set from sklearn.linear_model import LinearRegression ## Create object of Linear Regression class by calling it reg = LinearRegression()
## Fit regressor object to training set by usinig the .fit method of the LinearRegression class # Fitting Simple Linear Regression to the Training set # We created a machine (simple linear regression model) and made it learn correlations on the training set so that the machine can predict salary based on its learning experience. model = reg.fit(X_train,y_train)
## Predicting the Test set results y_pred = reg.predict(X_test) # vector of predictions of dependent variable
(reg.intercept_,reg.coef_)
(13311.00883470787, array([5361.78229353]))
# Visualizing the Training set results plt.scatter(X_train, y_train,color = 'red') # plot real values plt.plot(X_train, reg.predict(X_train), color = 'blue') # plot regression line of predicted values on training set plt.title('Salary vs Experience (Training set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show() # Visualizing the Test set results plt.scatter(X_test, y_test, color = 'red') # plot real values of test set plt.plot(X_train, reg.predict(X_train), color = 'blue') # plot predicted values of test set # Same regression line as above plt.title('Salary vs Experience (Test Set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show()
Image in a Jupyter notebookImage in a Jupyter notebook
# Evaluate results from sklearn import metrics # Calculate metrics print('MAE',metrics.mean_absolute_error(y_test, y_pred)) #MAE is the easiest to understand, because it's the average error print('MSE',metrics.mean_squared_error(y_test, y_pred)) #MSE is more popular than MAE, because MSE "punishes" larger errors, which tends to be useful in the real world print('RMSE',np.sqrt(metrics.mean_squared_error(y_test, y_pred))) #RMSE is even more popular than MSE, because RMSE is interpretable in the "y" units (target units)
MAE 5653.761026635242 MSE 55105733.870278984 RMSE 7423.3236404106065
##Calculated R Squared print('R^2 =',metrics.explained_variance_score(y_test,y_pred))
R^2 = 0.9829807213710279