CoCalc -- bayes_opt

GitHub Repository: probml/pyprobml
Path: blob/master/deprecated/scripts/bayes_opt_demo.py
¹¹⁹² views
1
# Bayesian optimization of 1d continuous function
2
# Modified from Martin Krasser's code
3
# https://github.com/krasserm/bayesian-machine-learning/blob/dev/bayesian-optimization/bayesian_optimization.ipynb
4

5

6
import superimport
7

8
import numpy as np
9
from bayes_opt_utils import BayesianOptimizer, MultiRestartGradientOptimizer, expected_improvement
10

11
import matplotlib.pyplot as plt
12
import pyprobml_utils as pml
13
save_figures = True #False
14

15
from sklearn.gaussian_process import GaussianProcessRegressor
16
from sklearn.gaussian_process.kernels import ConstantKernel, Matern
17

18
np.random.seed(0)
19

20

21
def plot_approximation(gpr, X, Y, X_sample, Y_sample, X_next=None, show_legend=False):
22
    X = np.atleast_2d(X)
23
    #Y = np.atleast_2d(Y)
24
    mu, std = gpr.predict(X, return_std=True)
25
    plt.fill_between(X.ravel(), 
26
                     mu.ravel() + 1.96 * std, 
27
                     mu.ravel() - 1.96 * std, 
28
                     alpha=0.1) 
29
    plt.plot(X, Y, 'y--', lw=1, label='Noise-free objective')
30
    plt.plot(X, mu, 'b-', lw=1, label='Surrogate function')
31
    plt.plot(X_sample, Y_sample, 'kx', mew=3, label='Noisy samples')
32
    if X_next:
33
        plt.axvline(x=X_next, ls='--', c='k', lw=1)
34
    if show_legend:
35
        plt.legend()
36

37
def plot_acquisition(X, Y, X_next, show_legend=False):
38
    plt.plot(X.ravel(), Y.ravel(), 'r-', lw=1, label='Acquisition function')
39
    plt.axvline(x=X_next, ls='--', c='k', lw=1, label='Next sampling location')
40
    if show_legend:
41
        plt.legend()    
42
        
43
def plot_convergence(X_sample, Y_sample, n_init=2):
44
    plt.figure(figsize=(12, 3))
45

46
    x = X_sample[n_init:].ravel()
47
    y = Y_sample[n_init:].ravel()
48
    r = range(1, len(x)+1)
49
    
50
    x_neighbor_dist = [np.abs(a-b) for a, b in zip(x, x[1:])]
51
    y_max_watermark = np.maximum.accumulate(y)
52
    
53
    plt.subplot(1, 2, 1)
54
    plt.plot(r[1:], x_neighbor_dist, 'bo-')
55
    plt.xlabel('Iteration')
56
    plt.ylabel('Distance')
57
    plt.title('Distance between consecutive x\'s')
58

59
    plt.subplot(1, 2, 2)
60
    plt.plot(r, y_max_watermark, 'ro-')
61
    plt.xlabel('Iteration')
62
    plt.ylabel('Best Y')
63
    
64
    
65
##################
66
    
67
bounds = np.array([[-1.0, 2.0]])
68
noise = 0.2
69

70
def f(X, noise=noise):
71
    return -np.sin(3*X) - X**2 + 0.7*X + noise * np.random.randn(*X.shape)
72

73
X_init = np.array([[-0.9], [1.1]])
74
Y_init = f(X_init)
75

76

77
# Dense grid of points within bounds
78
X = np.arange(bounds[:, 0], bounds[:, 1], 0.01).reshape(-1, 1)
79

80
# Noise-free objective function values at X 
81
Y = f(X,0)
82

83
# Plot optimization objective with noise level 
84
plt.plot(X, Y, 'y--', lw=2, label='Noise-free objective')
85
plt.plot(X, f(X), 'bx', lw=1, alpha=0.1, label='Noisy samples')
86
plt.plot(X_init, Y_init, 'kx', mew=3, label='Initial samples')
87
plt.legend()
88
if save_figures: pml.savefig('bayes-opt-init.pdf')
89
plt.show()
90

91

92
################
93

94

95
kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5)
96
gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2)
97

98

99
"""
100
https://github.com/scikit-learn/scikit-learn/blob/7b136e9/sklearn/gaussian_process/kernels.py#L1287
101
The parameter nu controlling the smoothness of the learned function.
102
        The smaller nu, the less smooth the approximated function is.
103
        For nu=inf, the kernel becomes equivalent to the RBF kernel and for
104
        nu=0.5 to the absolute exponential kernel. Important intermediate
105
        values are nu=1.5 (once differentiable functions) and nu=2.5
106
        (twice differentiable functions). Note that values of nu not in
107
        [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
108
        (appr. 10 times higher) since they require to evaluate the modified
109
        Bessel function. Furthermore, in contrast to l, nu is kept fixed to
110
        its initial value and not optimized.
111
"""
112

113

114
# Keep track of visited points for plotting purposes
115
global X_sample, Y_sample
116
X_sample = X_init
117
Y_sample = Y_init
118

119
def callback(X_next, Y_next, i):
120
  global X_sample, Y_sample
121
  # Plot samples, surrogate function, noise-free objective and next sampling location
122
  #plt.subplot(n_iter, 2, 2 * i + 1)
123
  plt.figure()
124
  plot_approximation(gpr, X, Y, X_sample, Y_sample, X_next, show_legend=i==0)
125
  plt.title(f'Iteration {i+1}')
126
  if save_figures: pml.savefig('bayes-opt-surrogate-{}.pdf'.format(i+1))
127
  plt.show()
128
  
129
  plt.figure()
130
  #plt.subplot(n_iter, 2, 2 * i + 2)
131
  plot_acquisition(X, expected_improvement(X, X_sample, Y_sample, gpr), X_next, show_legend=i==0)
132
  if save_figures: pml.savefig('bayes-opt-acquisition-{}.pdf'.format(i+1))
133
  plt.show()
134
  
135
  # Add sample to previous samples
136
  X_sample = np.append(X_sample, np.atleast_2d(X_next), axis=0)
137
  Y_sample = np.append(Y_sample, np.atleast_2d(Y_next), axis=0)
138
    
139
  
140
def callback_noplot(X_next, Y_next, i):
141
  global X_sample, Y_sample
142
  X_next = np.atleast_2d(X_next)
143
  Y_next = np.atleast_2d(Y_next)
144
  X_sample = np.vstack((X_sample, X_next))
145
  Y_sample = np.vstack((Y_sample, Y_next))
146
  
147
n_restarts = 25
148
np.random.seed(0)
149
noise = 0.2
150
n_iter = 10
151
acq_fn = expected_improvement
152
acq_solver = MultiRestartGradientOptimizer(dim=1, bounds=bounds, n_restarts=n_restarts)
153
solver = BayesianOptimizer(X_init, Y_init, gpr, acq_fn, acq_solver, n_iter=n_iter, callback=callback)
154

155
solver.maximize(f)
156

157
 
158
plot_convergence(X_sample, Y_sample)
159
if save_figures: pml.savefig('bayes-opt-convergence.pdf')
160
plt.show()
161
  
162
####################
163
 # skopt, https://scikit-optimize.github.io/
164
"""
165
#from sklearn.base import clone
166
from skopt import gp_minimize
167

168
np.random.seed(0)
169
r = gp_minimize(lambda x: -f(np.array(x))[0], 
170
                bounds.tolist(),
171
                base_estimator=gpr,
172
                acq_func='EI',      # expected improvement
173
                xi=0.01,            # exploitation-exploration trade-off
174
                n_calls=10,         # number of iterations
175
                n_random_starts=0,  # initial samples are provided
176
                x0=X_init.tolist(), # initial samples
177
                y0=-Y_init.ravel())
178

179
# Fit GP model to samples for plotting results. Note negation of f.
180
gpr.fit(r.x_iters, -r.func_vals)
181
plot_approximation(gpr, X, Y, r.x_iters, -r.func_vals, show_legend=True)
182
save_fig('bayes-opt-skopt.pdf')
183
plt.show()
184

185
plot_convergence(np.array(r.x_iters), -r.func_vals)
186

187

188
###############
189
# https://github.com/SheffieldML/GPyOpt
190

191
import GPy
192
from GPyOpt.methods import BayesianOptimization
193

194
kernel = GPy.kern.Matern52(input_dim=1, variance=1.0, lengthscale=1.0)
195
bds = [{'name': 'X', 'type': 'continuous', 'domain': bounds.ravel()}]
196

197
np.random.seed(2)  
198
optimizer = BayesianOptimization(f=lambda X: -f(X),
199
                                 domain=bds,
200
                                 model_type='GP',
201
                                 kernel=kernel,
202
                                 acquisition_type ='EI',
203
                                 acquisition_jitter = 0.01,
204
                                 X=X_init,
205
                                 Y=-Y_init,
206
                                 noise_var = noise**2,
207
                                 exact_feval=False,
208
                                 normalize_Y=False,
209
                                 maximize=False)
210

211
optimizer.run_optimization(max_iter=10)
212
optimizer.plot_acquisition()
213
save_fig('bayes-opt-gpyopt.pdf')
214
plt.show()
215

216
optimizer.plot_convergence()
217

218

219
"""
220

221
Product

Resources

Company