CoCalc -- xgboost.py

GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 10 - Model Selection And Boosting/XGBoost/xgboost.py
¹³⁴¹ views

1
# XGBoost
2

3
# Install xgboost following the instructions on this link: http://xgboost.readthedocs.io/en/latest/build.html#
4

5
# Importing the libraries
6
import numpy as np
7
import matplotlib.pyplot as plt
8
import pandas as pd
9

10
# Importing the dataset
11
dataset = pd.read_csv('Churn_Modelling.csv')
12
X = dataset.iloc[:, 3:13].values
13
y = dataset.iloc[:, 13].values
14

15
# Encoding categorical data
16
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
17
labelencoder_X_1 = LabelEncoder()
18
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
19
labelencoder_X_2 = LabelEncoder()
20
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
21
onehotencoder = OneHotEncoder(categorical_features = [1])
22
X = onehotencoder.fit_transform(X).toarray()
23
X = X[:, 1:]
24

25
# Splitting the dataset into the Training set and Test set
26
from sklearn.model_selection import train_test_split
27
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
28

29
# Fitting XGBoost to the Training set
30
from xgboost import XGBClassifier
31
classifier = XGBClassifier()
32
classifier.fit(X_train, y_train)
33

34
# Predicting the Test set results
35
y_pred = classifier.predict(X_test)
36

37
# Making the Confusion Matrix
38
from sklearn.metrics import confusion_matrix
39
cm = confusion_matrix(y_test, y_pred)
40

41
# Applying k-Fold Cross Validation
42
from sklearn.model_selection import cross_val_score
43
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
44
accuracies.mean()
45
accuracies.std()
46

Product

Resources

Company