CoCalc -- upper_confidence

GitHub Repository: debakarr/machinelearning
Path: blob/master/Part 6 - Reinforcement Learning/Upper Confidence Bound/upper_confidence_bound.py
¹⁰⁰⁹ views

1
# Upper Confidence Bound
2

3
# Importing the libraries
4
import numpy as np
5
import matplotlib.pyplot as plt
6
import pandas as pd
7

8
# Importing the dataset
9
dataset = pd.read_csv('Ads_CTR_Optimisation.csv')
10

11
# Implementing UCB
12
import math
13
N = 10000
14
d = 10
15
ads_selected = []
16
numbers_of_selections = [0] * d
17
sums_of_rewards = [0] * d
18
total_reward = 0
19
for n in range(0, N):
20
    ad = 0
21
    max_upper_bound = 0
22
    for i in range(0, d):
23
        if (numbers_of_selections[i] > 0):
24
            average_reward = sums_of_rewards[i] / numbers_of_selections[i]
25
            delta_i = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections[i])
26
            upper_bound = average_reward + delta_i
27
        else:
28
            upper_bound = 1e400
29
        if upper_bound > max_upper_bound:
30
            max_upper_bound = upper_bound
31
            ad = i
32
    ads_selected.append(ad)
33
    numbers_of_selections[ad] = numbers_of_selections[ad] + 1
34
    reward = dataset.values[n, ad]
35
    sums_of_rewards[ad] = sums_of_rewards[ad] + reward
36
    total_reward = total_reward + reward
37

38
# Visualising the results
39
plt.hist(ads_selected)
40
plt.title('Histogram of ads selections')
41
plt.xlabel('Ads')
42
plt.ylabel('Number of times each ad was selected')
43
plt.show()
44

Product

Resources

Company