Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/deprecated/scripts/bayes_opt_demo.py
1192 views
1
# Bayesian optimization of 1d continuous function
2
# Modified from Martin Krasser's code
3
# https://github.com/krasserm/bayesian-machine-learning/blob/dev/bayesian-optimization/bayesian_optimization.ipynb
4
5
6
import superimport
7
8
import numpy as np
9
from bayes_opt_utils import BayesianOptimizer, MultiRestartGradientOptimizer, expected_improvement
10
11
import matplotlib.pyplot as plt
12
import pyprobml_utils as pml
13
save_figures = True #False
14
15
from sklearn.gaussian_process import GaussianProcessRegressor
16
from sklearn.gaussian_process.kernels import ConstantKernel, Matern
17
18
np.random.seed(0)
19
20
21
def plot_approximation(gpr, X, Y, X_sample, Y_sample, X_next=None, show_legend=False):
22
X = np.atleast_2d(X)
23
#Y = np.atleast_2d(Y)
24
mu, std = gpr.predict(X, return_std=True)
25
plt.fill_between(X.ravel(),
26
mu.ravel() + 1.96 * std,
27
mu.ravel() - 1.96 * std,
28
alpha=0.1)
29
plt.plot(X, Y, 'y--', lw=1, label='Noise-free objective')
30
plt.plot(X, mu, 'b-', lw=1, label='Surrogate function')
31
plt.plot(X_sample, Y_sample, 'kx', mew=3, label='Noisy samples')
32
if X_next:
33
plt.axvline(x=X_next, ls='--', c='k', lw=1)
34
if show_legend:
35
plt.legend()
36
37
def plot_acquisition(X, Y, X_next, show_legend=False):
38
plt.plot(X.ravel(), Y.ravel(), 'r-', lw=1, label='Acquisition function')
39
plt.axvline(x=X_next, ls='--', c='k', lw=1, label='Next sampling location')
40
if show_legend:
41
plt.legend()
42
43
def plot_convergence(X_sample, Y_sample, n_init=2):
44
plt.figure(figsize=(12, 3))
45
46
x = X_sample[n_init:].ravel()
47
y = Y_sample[n_init:].ravel()
48
r = range(1, len(x)+1)
49
50
x_neighbor_dist = [np.abs(a-b) for a, b in zip(x, x[1:])]
51
y_max_watermark = np.maximum.accumulate(y)
52
53
plt.subplot(1, 2, 1)
54
plt.plot(r[1:], x_neighbor_dist, 'bo-')
55
plt.xlabel('Iteration')
56
plt.ylabel('Distance')
57
plt.title('Distance between consecutive x\'s')
58
59
plt.subplot(1, 2, 2)
60
plt.plot(r, y_max_watermark, 'ro-')
61
plt.xlabel('Iteration')
62
plt.ylabel('Best Y')
63
64
65
##################
66
67
bounds = np.array([[-1.0, 2.0]])
68
noise = 0.2
69
70
def f(X, noise=noise):
71
return -np.sin(3*X) - X**2 + 0.7*X + noise * np.random.randn(*X.shape)
72
73
X_init = np.array([[-0.9], [1.1]])
74
Y_init = f(X_init)
75
76
77
# Dense grid of points within bounds
78
X = np.arange(bounds[:, 0], bounds[:, 1], 0.01).reshape(-1, 1)
79
80
# Noise-free objective function values at X
81
Y = f(X,0)
82
83
# Plot optimization objective with noise level
84
plt.plot(X, Y, 'y--', lw=2, label='Noise-free objective')
85
plt.plot(X, f(X), 'bx', lw=1, alpha=0.1, label='Noisy samples')
86
plt.plot(X_init, Y_init, 'kx', mew=3, label='Initial samples')
87
plt.legend()
88
if save_figures: pml.savefig('bayes-opt-init.pdf')
89
plt.show()
90
91
92
################
93
94
95
kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5)
96
gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2)
97
98
99
"""
100
https://github.com/scikit-learn/scikit-learn/blob/7b136e9/sklearn/gaussian_process/kernels.py#L1287
101
The parameter nu controlling the smoothness of the learned function.
102
The smaller nu, the less smooth the approximated function is.
103
For nu=inf, the kernel becomes equivalent to the RBF kernel and for
104
nu=0.5 to the absolute exponential kernel. Important intermediate
105
values are nu=1.5 (once differentiable functions) and nu=2.5
106
(twice differentiable functions). Note that values of nu not in
107
[0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
108
(appr. 10 times higher) since they require to evaluate the modified
109
Bessel function. Furthermore, in contrast to l, nu is kept fixed to
110
its initial value and not optimized.
111
"""
112
113
114
# Keep track of visited points for plotting purposes
115
global X_sample, Y_sample
116
X_sample = X_init
117
Y_sample = Y_init
118
119
def callback(X_next, Y_next, i):
120
global X_sample, Y_sample
121
# Plot samples, surrogate function, noise-free objective and next sampling location
122
#plt.subplot(n_iter, 2, 2 * i + 1)
123
plt.figure()
124
plot_approximation(gpr, X, Y, X_sample, Y_sample, X_next, show_legend=i==0)
125
plt.title(f'Iteration {i+1}')
126
if save_figures: pml.savefig('bayes-opt-surrogate-{}.pdf'.format(i+1))
127
plt.show()
128
129
plt.figure()
130
#plt.subplot(n_iter, 2, 2 * i + 2)
131
plot_acquisition(X, expected_improvement(X, X_sample, Y_sample, gpr), X_next, show_legend=i==0)
132
if save_figures: pml.savefig('bayes-opt-acquisition-{}.pdf'.format(i+1))
133
plt.show()
134
135
# Add sample to previous samples
136
X_sample = np.append(X_sample, np.atleast_2d(X_next), axis=0)
137
Y_sample = np.append(Y_sample, np.atleast_2d(Y_next), axis=0)
138
139
140
def callback_noplot(X_next, Y_next, i):
141
global X_sample, Y_sample
142
X_next = np.atleast_2d(X_next)
143
Y_next = np.atleast_2d(Y_next)
144
X_sample = np.vstack((X_sample, X_next))
145
Y_sample = np.vstack((Y_sample, Y_next))
146
147
n_restarts = 25
148
np.random.seed(0)
149
noise = 0.2
150
n_iter = 10
151
acq_fn = expected_improvement
152
acq_solver = MultiRestartGradientOptimizer(dim=1, bounds=bounds, n_restarts=n_restarts)
153
solver = BayesianOptimizer(X_init, Y_init, gpr, acq_fn, acq_solver, n_iter=n_iter, callback=callback)
154
155
solver.maximize(f)
156
157
158
plot_convergence(X_sample, Y_sample)
159
if save_figures: pml.savefig('bayes-opt-convergence.pdf')
160
plt.show()
161
162
####################
163
# skopt, https://scikit-optimize.github.io/
164
"""
165
#from sklearn.base import clone
166
from skopt import gp_minimize
167
168
np.random.seed(0)
169
r = gp_minimize(lambda x: -f(np.array(x))[0],
170
bounds.tolist(),
171
base_estimator=gpr,
172
acq_func='EI', # expected improvement
173
xi=0.01, # exploitation-exploration trade-off
174
n_calls=10, # number of iterations
175
n_random_starts=0, # initial samples are provided
176
x0=X_init.tolist(), # initial samples
177
y0=-Y_init.ravel())
178
179
# Fit GP model to samples for plotting results. Note negation of f.
180
gpr.fit(r.x_iters, -r.func_vals)
181
plot_approximation(gpr, X, Y, r.x_iters, -r.func_vals, show_legend=True)
182
save_fig('bayes-opt-skopt.pdf')
183
plt.show()
184
185
plot_convergence(np.array(r.x_iters), -r.func_vals)
186
187
188
###############
189
# https://github.com/SheffieldML/GPyOpt
190
191
import GPy
192
from GPyOpt.methods import BayesianOptimization
193
194
kernel = GPy.kern.Matern52(input_dim=1, variance=1.0, lengthscale=1.0)
195
bds = [{'name': 'X', 'type': 'continuous', 'domain': bounds.ravel()}]
196
197
np.random.seed(2)
198
optimizer = BayesianOptimization(f=lambda X: -f(X),
199
domain=bds,
200
model_type='GP',
201
kernel=kernel,
202
acquisition_type ='EI',
203
acquisition_jitter = 0.01,
204
X=X_init,
205
Y=-Y_init,
206
noise_var = noise**2,
207
exact_feval=False,
208
normalize_Y=False,
209
maximize=False)
210
211
optimizer.run_optimization(max_iter=10)
212
optimizer.plot_acquisition()
213
save_fig('bayes-opt-gpyopt.pdf')
214
plt.show()
215
216
optimizer.plot_convergence()
217
218
219
"""
220
221