Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
debakarr
GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 1 - Data Preprocessing/categorical_data.py
1002 views
1
# Data Preprocessing
2
3
# Importing the libraries
4
import numpy as np
5
import matplotlib.pyplot as plt
6
import pandas as pd
7
8
# Importing the dataset
9
dataset = pd.read_csv('Data.csv')
10
X = dataset.iloc[:, :-1].values
11
y = dataset.iloc[:, 3].values
12
13
# Taking care of missing data
14
from sklearn.preprocessing import Imputer
15
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
16
imputer = imputer.fit(X[:, 1:3])
17
X[:, 1:3] = imputer.transform(X[:, 1:3])
18
19
# Encoding categorical data
20
# Encoding the Independent Variable
21
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
22
labelencoder_X = LabelEncoder()
23
X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
24
onehotencoder = OneHotEncoder(categorical_features = [0])
25
X = onehotencoder.fit_transform(X).toarray()
26
# Encoding the Dependent Variable
27
labelencoder_y = LabelEncoder()
28
y = labelencoder_y.fit_transform(y)
29