Path: blob/master/Part 10 - Model Selection And Boosting/XGBoost/xgboost.py
1341 views
# XGBoost12# Install xgboost following the instructions on this link: http://xgboost.readthedocs.io/en/latest/build.html#34# Importing the libraries5import numpy as np6import matplotlib.pyplot as plt7import pandas as pd89# Importing the dataset10dataset = pd.read_csv('Churn_Modelling.csv')11X = dataset.iloc[:, 3:13].values12y = dataset.iloc[:, 13].values1314# Encoding categorical data15from sklearn.preprocessing import LabelEncoder, OneHotEncoder16labelencoder_X_1 = LabelEncoder()17X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])18labelencoder_X_2 = LabelEncoder()19X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])20onehotencoder = OneHotEncoder(categorical_features = [1])21X = onehotencoder.fit_transform(X).toarray()22X = X[:, 1:]2324# Splitting the dataset into the Training set and Test set25from sklearn.model_selection import train_test_split26X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)2728# Fitting XGBoost to the Training set29from xgboost import XGBClassifier30classifier = XGBClassifier()31classifier.fit(X_train, y_train)3233# Predicting the Test set results34y_pred = classifier.predict(X_test)3536# Making the Confusion Matrix37from sklearn.metrics import confusion_matrix38cm = confusion_matrix(y_test, y_pred)3940# Applying k-Fold Cross Validation41from sklearn.model_selection import cross_val_score42accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)43accuracies.mean()44accuracies.std()4546