Path: blob/master/Part 1 - Data Preprocessing/categorical_data.py
1002 views
# Data Preprocessing12# Importing the libraries3import numpy as np4import matplotlib.pyplot as plt5import pandas as pd67# Importing the dataset8dataset = pd.read_csv('Data.csv')9X = dataset.iloc[:, :-1].values10y = dataset.iloc[:, 3].values1112# Taking care of missing data13from sklearn.preprocessing import Imputer14imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)15imputer = imputer.fit(X[:, 1:3])16X[:, 1:3] = imputer.transform(X[:, 1:3])1718# Encoding categorical data19# Encoding the Independent Variable20from sklearn.preprocessing import LabelEncoder, OneHotEncoder21labelencoder_X = LabelEncoder()22X[:, 0] = labelencoder_X.fit_transform(X[:, 0])23onehotencoder = OneHotEncoder(categorical_features = [0])24X = onehotencoder.fit_transform(X).toarray()25# Encoding the Dependent Variable26labelencoder_y = LabelEncoder()27y = labelencoder_y.fit_transform(y)2829