Path: blob/master/Part 6 - Reinforcement Learning/Upper Confidence Bound/upper_confidence_bound.py
1009 views
# Upper Confidence Bound12# Importing the libraries3import numpy as np4import matplotlib.pyplot as plt5import pandas as pd67# Importing the dataset8dataset = pd.read_csv('Ads_CTR_Optimisation.csv')910# Implementing UCB11import math12N = 1000013d = 1014ads_selected = []15numbers_of_selections = [0] * d16sums_of_rewards = [0] * d17total_reward = 018for n in range(0, N):19ad = 020max_upper_bound = 021for i in range(0, d):22if (numbers_of_selections[i] > 0):23average_reward = sums_of_rewards[i] / numbers_of_selections[i]24delta_i = math.sqrt(3/2 * math.log(n + 1) / numbers_of_selections[i])25upper_bound = average_reward + delta_i26else:27upper_bound = 1e40028if upper_bound > max_upper_bound:29max_upper_bound = upper_bound30ad = i31ads_selected.append(ad)32numbers_of_selections[ad] = numbers_of_selections[ad] + 133reward = dataset.values[n, ad]34sums_of_rewards[ad] = sums_of_rewards[ad] + reward35total_reward = total_reward + reward3637# Visualising the results38plt.hist(ads_selected)39plt.title('Histogram of ads selections')40plt.xlabel('Ads')41plt.ylabel('Number of times each ad was selected')42plt.show()4344