Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
debakarr
GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 6 - Reinforcement Learning/Upper Confidence Bound/upper_confidence_bound.py
1009 views
1
# Upper Confidence Bound
2
3
# Importing the libraries
4
import numpy as np
5
import matplotlib.pyplot as plt
6
import pandas as pd
7
8
# Importing the dataset
9
dataset = pd.read_csv('Ads_CTR_Optimisation.csv')
10
11
# Implementing UCB
12
import math
13
N = 10000
14
d = 10
15
ads_selected = []
16
numbers_of_selections = [0] * d
17
sums_of_rewards = [0] * d
18
total_reward = 0
19
for n in range(0, N):
20
ad = 0
21
max_upper_bound = 0
22
for i in range(0, d):
23
if (numbers_of_selections[i] > 0):
24
average_reward = sums_of_rewards[i] / numbers_of_selections[i]
25
delta_i = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections[i])
26
upper_bound = average_reward + delta_i
27
else:
28
upper_bound = 1e400
29
if upper_bound > max_upper_bound:
30
max_upper_bound = upper_bound
31
ad = i
32
ads_selected.append(ad)
33
numbers_of_selections[ad] = numbers_of_selections[ad] + 1
34
reward = dataset.values[n, ad]
35
sums_of_rewards[ad] = sums_of_rewards[ad] + reward
36
total_reward = total_reward + reward
37
38
# Visualising the results
39
plt.hist(ads_selected)
40
plt.title('Histogram of ads selections')
41
plt.xlabel('Ads')
42
plt.ylabel('Number of times each ad was selected')
43
plt.show()
44