Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
DataScienceUWL
GitHub Repository: DataScienceUWL/DS775
Path: blob/main/Lessons/Lesson 08 - Hyperparameter Optimization (Project)/tpot_XGBregressor.py
871 views
1
import numpy as np
2
import pandas as pd
3
from sklearn.model_selection import train_test_split
4
from xgboost import XGBRegressor
5
6
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
7
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
8
features = tpot_data.drop('target', axis=1)
9
training_features, testing_features, training_target, testing_target = \
10
train_test_split(features, tpot_data['target'], random_state=8675309)
11
12
# Average CV score on the training set was: 0.9744238058725992
13
exported_pipeline = XGBRegressor(learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=100, objective="reg:squarederror", reg_alpha=2.75, reg_lambda=2.5, subsample=0.6500000000000001)
14
# Fix random state in exported estimator
15
if hasattr(exported_pipeline, 'random_state'):
16
setattr(exported_pipeline, 'random_state', 8675309)
17
18
exported_pipeline.fit(training_features, training_target)
19
results = exported_pipeline.predict(testing_features)
20
21