Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/thinkbayes2
Path: blob/master/scripts/hockey.py
1901 views
1
"""This file contains code for use with "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2012 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function, division
9
10
import math
11
12
import columns
13
import thinkbayes2
14
import thinkbayes2
15
import thinkplot
16
17
18
USE_SUMMARY_DATA = True
19
20
class Hockey(thinkbayes2.Suite):
21
"""Represents hypotheses about the scoring rate for a team."""
22
23
def __init__(self, label=None):
24
"""Initializes the Hockey object.
25
26
label: string
27
"""
28
if USE_SUMMARY_DATA:
29
# prior based on each team's average goals scored
30
mu = 2.8
31
sigma = 0.3
32
else:
33
# prior based on each pair-wise match-up
34
mu = 2.8
35
sigma = 0.85
36
37
pmf = thinkbayes2.MakeNormalPmf(mu, sigma, 4)
38
thinkbayes2.Suite.__init__(self, pmf, label=label)
39
40
def Likelihood(self, data, hypo):
41
"""Computes the likelihood of the data under the hypothesis.
42
43
Evaluates the Poisson PMF for lambda and k.
44
45
hypo: goal scoring rate in goals per game
46
data: goals scored in one game
47
"""
48
lam = hypo
49
k = data
50
like = thinkbayes2.EvalPoissonPmf(k, lam)
51
return like
52
53
54
def MakeGoalPmf(suite, high=10):
55
"""Makes the distribution of goals scored, given distribution of lam.
56
57
suite: distribution of goal-scoring rate
58
high: upper bound
59
60
returns: Pmf of goals per game
61
"""
62
metapmf = thinkbayes2.Pmf()
63
64
for lam, prob in suite.Items():
65
pmf = thinkbayes2.MakePoissonPmf(lam, high)
66
metapmf.Set(pmf, prob)
67
68
mix = thinkbayes2.MakeMixture(metapmf, label=suite.label)
69
return mix
70
71
72
def MakeGoalTimePmf(suite):
73
"""Makes the distribution of time til first goal.
74
75
suite: distribution of goal-scoring rate
76
77
returns: Pmf of goals per game
78
"""
79
metapmf = thinkbayes2.Pmf()
80
81
for lam, prob in suite.Items():
82
pmf = thinkbayes2.MakeExponentialPmf(lam, high=2, n=2001)
83
metapmf.Set(pmf, prob)
84
85
mix = thinkbayes2.MakeMixture(metapmf, label=suite.label)
86
return mix
87
88
89
class Game(object):
90
"""Represents a game.
91
92
Attributes are set in columns.read_csv.
93
"""
94
convert = dict()
95
96
def clean(self):
97
self.goals = self.pd1 + self.pd2 + self.pd3
98
99
100
def ReadHockeyData(filename='hockey_data.csv'):
101
"""Read game scores from the data file.
102
103
filename: string
104
"""
105
game_list = columns.read_csv(filename, Game)
106
107
# map from gameID to list of two games
108
games = {}
109
for game in game_list:
110
if game.season != 2011:
111
continue
112
key = game.game
113
games.setdefault(key, []).append(game)
114
115
# map from (team1, team2) to (score1, score2)
116
pairs = {}
117
for key, pair in games.iteritems():
118
t1, t2 = pair
119
key = t1.team, t2.team
120
entry = t1.total, t2.total
121
pairs.setdefault(key, []).append(entry)
122
123
ProcessScoresTeamwise(pairs)
124
ProcessScoresPairwise(pairs)
125
126
127
def ProcessScoresPairwise(pairs):
128
"""Average number of goals for each team against each opponent.
129
130
pairs: map from (team1, team2) to (score1, score2)
131
"""
132
# map from (team1, team2) to list of goals scored
133
goals_scored = {}
134
for key, entries in pairs.iteritems():
135
t1, t2 = key
136
for entry in entries:
137
g1, g2 = entry
138
goals_scored.setdefault((t1, t2), []).append(g1)
139
goals_scored.setdefault((t2, t1), []).append(g2)
140
141
# make a list of average goals scored
142
lams = []
143
for key, goals in goals_scored.iteritems():
144
if len(goals) < 3:
145
continue
146
lam = thinkbayes2.Mean(goals)
147
lams.append(lam)
148
149
# make the distribution of average goals scored
150
cdf = thinkbayes2.MakeCdfFromList(lams)
151
thinkplot.Cdf(cdf)
152
thinkplot.Show()
153
154
mu, var = thinkbayes2.MeanVar(lams)
155
print('mu, sig', mu, math.sqrt(var))
156
157
print('BOS v VAN', pairs['BOS', 'VAN'])
158
159
160
def ProcessScoresTeamwise(pairs):
161
"""Average number of goals for each team.
162
163
pairs: map from (team1, team2) to (score1, score2)
164
"""
165
# map from team to list of goals scored
166
goals_scored = {}
167
for key, entries in pairs.iteritems():
168
t1, t2 = key
169
for entry in entries:
170
g1, g2 = entry
171
goals_scored.setdefault(t1, []).append(g1)
172
goals_scored.setdefault(t2, []).append(g2)
173
174
# make a list of average goals scored
175
lams = []
176
for key, goals in goals_scored.iteritems():
177
lam = thinkbayes2.Mean(goals)
178
lams.append(lam)
179
180
# make the distribution of average goals scored
181
cdf = thinkbayes2.MakeCdfFromList(lams)
182
thinkplot.Cdf(cdf)
183
thinkplot.Show()
184
185
mu, var = thinkbayes2.MeanVar(lams)
186
print('mu, sig', mu, math.sqrt(var))
187
188
189
def main():
190
#ReadHockeyData()
191
#return
192
193
formats = ['pdf', 'eps']
194
195
suite1 = Hockey('bruins')
196
suite2 = Hockey('canucks')
197
198
thinkplot.Clf()
199
thinkplot.PrePlot(num=2)
200
thinkplot.Pmf(suite1)
201
thinkplot.Pmf(suite2)
202
thinkplot.Save(root='hockey0',
203
xlabel='Goals per game',
204
ylabel='Probability',
205
formats=formats)
206
207
suite1.UpdateSet([0, 2, 8, 4])
208
suite2.UpdateSet([1, 3, 1, 0])
209
210
thinkplot.Clf()
211
thinkplot.PrePlot(num=2)
212
thinkplot.Pmf(suite1)
213
thinkplot.Pmf(suite2)
214
thinkplot.Save(root='hockey1',
215
xlabel='Goals per game',
216
ylabel='Probability',
217
formats=formats)
218
219
220
goal_dist1 = MakeGoalPmf(suite1)
221
goal_dist2 = MakeGoalPmf(suite2)
222
223
thinkplot.Clf()
224
thinkplot.PrePlot(num=2)
225
thinkplot.Pmf(goal_dist1)
226
thinkplot.Pmf(goal_dist2)
227
thinkplot.Save(root='hockey2',
228
xlabel='Goals',
229
ylabel='Probability',
230
formats=formats)
231
232
time_dist1 = MakeGoalTimePmf(suite1)
233
time_dist2 = MakeGoalTimePmf(suite2)
234
235
print('MLE bruins', suite1.MaximumLikelihood())
236
print('MLE canucks', suite2.MaximumLikelihood())
237
238
thinkplot.Clf()
239
thinkplot.PrePlot(num=2)
240
thinkplot.Pmf(time_dist1)
241
thinkplot.Pmf(time_dist2)
242
thinkplot.Save(root='hockey3',
243
xlabel='Games until goal',
244
ylabel='Probability',
245
formats=formats)
246
247
diff = goal_dist1 - goal_dist2
248
p_win = diff.ProbGreater(0)
249
p_loss = diff.ProbLess(0)
250
p_tie = diff.Prob(0)
251
252
print(p_win, p_loss, p_tie)
253
254
p_overtime = thinkbayes2.PmfProbLess(time_dist1, time_dist2)
255
p_adjust = thinkbayes2.PmfProbEqual(time_dist1, time_dist2)
256
p_overtime += p_adjust / 2
257
print('p_overtime', p_overtime)
258
259
print(p_overtime * p_tie)
260
p_win += p_overtime * p_tie
261
print('p_win', p_win)
262
263
# win the next two
264
p_series = p_win**2
265
266
# split the next two, win the third
267
p_series += 2 * p_win * (1-p_win) * p_win
268
269
print('p_series', p_series)
270
271
272
if __name__ == '__main__':
273
main()
274
275