Path: blob/master/Part 6 - Reinforcement Learning/Upper Confidence Bound/upper_confidence_bound.R
1009 views
# Upper Confidence Bound12# Importing the dataset3dataset = read.csv('Ads_CTR_Optimisation.csv')45# Implementing UCB6N = 100007d = 108ads_selected = integer(0)9numbers_of_selections = integer(d)10sums_of_rewards = integer(d)11total_reward = 012for (n in 1:N) {13ad = 014max_upper_bound = 015for (i in 1:d) {16if (numbers_of_selections[i] > 0) {17average_reward = sums_of_rewards[i] / numbers_of_selections[i]18delta_i = sqrt(3/2 * log(n) / numbers_of_selections[i])19upper_bound = average_reward + delta_i20} else {21upper_bound = 1e40022}23if (upper_bound > max_upper_bound) {24max_upper_bound = upper_bound25ad = i26}27}28ads_selected = append(ads_selected, ad)29numbers_of_selections[ad] = numbers_of_selections[ad] + 130reward = dataset[n, ad]31sums_of_rewards[ad] = sums_of_rewards[ad] + reward32total_reward = total_reward + reward33}3435# Visualising the results36hist(ads_selected,37col = 'blue',38main = 'Histogram of ads selections',39xlab = 'Ads',40ylab = 'Number of times each ad was selected')4142