CoCalc -- sat.py

GitHub Repository: allendowney/thinkbayes2
Path: blob/master/scripts/sat.py
¹⁹⁰¹ views
1
"""This file contains code used in "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3

4
Copyright 2012 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7

8
from __future__ import print_function, division
9

10
import csv
11
import math
12
import numpy
13
import sys
14

15
import matplotlib
16
import matplotlib.pyplot as pyplot
17

18
import thinkbayes2
19
import thinkplot
20

21

22
def ReadScale(filename='sat_scale.csv', col=2):
23
    """Reads a CSV file of SAT scales (maps from raw score to standard score).
24

25
    Args:
26
      filename: string filename
27
      col: which column to start with (0=Reading, 2=Math, 4=Writing)
28

29
    Returns: thinkbayes2.Interpolator object
30
    """
31
    def ParseRange(s):
32
        """Parse a range of values in the form 123-456
33

34
        s: string
35
        """
36
        t = [int(x) for x in s.split('-')]
37
        return 1.0 * sum(t) / len(t)
38

39
    fp = open(filename)
40
    reader = csv.reader(fp)
41
    raws = []
42
    scores = []
43

44
    for t in reader:
45
        try:
46
            raw = int(t[col])
47
            raws.append(raw)
48
            score = ParseRange(t[col+1])
49
            scores.append(score)
50
        except ValueError:
51
            pass
52

53
    raws.sort()
54
    scores.sort()
55
    return thinkbayes2.Interpolator(raws, scores)
56

57

58
def ReadRanks(filename='sat_ranks.csv'):
59
    """Reads a CSV file of SAT scores.
60

61
    Args:
62
      filename: string filename
63

64
    Returns:
65
      list of (score, freq) pairs
66
    """
67
    fp = open(filename)
68
    reader = csv.reader(fp)
69
    res = []
70

71
    for t in reader:
72
        try:
73
            score = int(t[0])
74
            freq = int(t[1])
75
            res.append((score, freq))
76
        except ValueError:
77
            pass
78

79
    return res
80

81

82
def DivideValues(pmf, denom):
83
    """Divides the values in a Pmf by denom.
84

85
    Returns a new Pmf.
86
    """
87
    new = thinkbayes2.Pmf()
88
    denom = float(denom)
89
    for val, prob in pmf.Items():
90
        x = val / denom
91
        new.Set(x, prob)
92
    return new
93

94

95
class Exam(object):
96
    """Encapsulates information about an exam.
97

98
    Contains the distribution of scaled scores and an
99
    Interpolator that maps between scaled and raw scores.
100
    """
101
    def __init__(self):
102
        self.scale = ReadScale()
103

104
        scores = ReadRanks()
105
        score_pmf = thinkbayes2.Pmf(dict(scores))
106

107
        self.raw = self.ReverseScale(score_pmf)
108
        self.max_score = max(self.raw.Values())
109
        self.prior = DivideValues(self.raw, denom=self.max_score)
110
        
111
        center = -0.05
112
        width = 1.8
113
        self.difficulties = MakeDifficulties(center, width, self.max_score)
114

115
    def CompareScores(self, a_score, b_score, constructor):
116
        """Computes posteriors for two test scores and the likelihood ratio.
117

118
        a_score, b_score: scales SAT scores
119
        constructor: function that instantiates an Sat or Sat2 object
120
        """
121
        a_sat = constructor(self, a_score)
122
        b_sat = constructor(self, b_score)
123

124
        a_sat.PlotPosteriors(b_sat)
125

126
        if constructor is Sat:
127
            PlotJointDist(a_sat, b_sat)
128

129
        top = TopLevel('AB')
130
        top.Update((a_sat, b_sat))
131
        top.Print()
132

133
        ratio = top.Prob('A') / top.Prob('B')
134
        
135
        print('Likelihood ratio', ratio)
136

137
        posterior = ratio / (ratio + 1)
138
        print('Posterior', posterior)
139

140
        if constructor is Sat2:
141
            ComparePosteriorPredictive(a_sat, b_sat)
142

143
    def MakeRawScoreDist(self, efficacies):
144
        """Makes the distribution of raw scores for given difficulty.
145

146
        efficacies: Pmf of efficacy
147
        """
148
        pmfs = thinkbayes2.Pmf()
149
        for efficacy, prob in efficacies.Items():
150
            scores = self.PmfCorrect(efficacy)
151
            pmfs.Set(scores, prob)
152

153
        mix = thinkbayes2.MakeMixture(pmfs)
154
        return mix
155

156
    def CalibrateDifficulty(self):
157
        """Make a plot showing the model distribution of raw scores."""
158
        thinkplot.Clf()
159
        thinkplot.PrePlot(num=2)
160

161
        cdf = thinkbayes2.Cdf(self.raw, label='data')
162
        thinkplot.Cdf(cdf)
163

164
        efficacies = thinkbayes2.MakeNormalPmf(0, 1.5, 3)
165
        pmf = self.MakeRawScoreDist(efficacies)
166
        cdf = thinkbayes2.Cdf(pmf, label='model')
167
        thinkplot.Cdf(cdf)
168
        
169
        thinkplot.Save(root='sat_calibrate',
170
                    xlabel='raw score',
171
                    ylabel='CDF',
172
                    formats=['pdf', 'eps'])
173

174
    def PmfCorrect(self, efficacy):
175
        """Returns the PMF of number of correct responses.
176

177
        efficacy: float
178
        """
179
        pmf = PmfCorrect(efficacy, self.difficulties)
180
        return pmf
181

182
    def Lookup(self, raw):
183
        """Looks up a raw score and returns a scaled score."""
184
        return self.scale.Lookup(raw)
185
        
186
    def Reverse(self, score):
187
        """Looks up a scaled score and returns a raw score.
188

189
        Since we ignore the penalty, negative scores round up to zero.
190
        """
191
        raw = self.scale.Reverse(score)
192
        return raw if raw > 0 else 0
193
        
194
    def ReverseScale(self, pmf):
195
        """Applies the reverse scale to the values of a PMF.
196

197
        Args:
198
            pmf: Pmf object
199
            scale: Interpolator object
200

201
        Returns:
202
            new Pmf
203
        """
204
        new = thinkbayes2.Pmf()
205
        for val, prob in pmf.Items():
206
            raw = self.Reverse(val)
207
            new.Incr(raw, prob)
208
        return new
209

210

211
class Sat(thinkbayes2.Suite):
212
    """Represents the distribution of p_correct for a test-taker."""
213

214
    def __init__(self, exam, score):
215
        self.exam = exam
216
        self.score = score
217

218
        # start with the prior distribution
219
        thinkbayes2.Suite.__init__(self, exam.prior)
220

221
        # update based on an exam score
222
        self.Update(score)
223

224
    def Likelihood(self, data, hypo):
225
        """Computes the likelihood of a test score, given efficacy."""
226
        p_correct = hypo
227
        score = data
228

229
        k = self.exam.Reverse(score)
230
        n = self.exam.max_score
231
        like = thinkbayes2.EvalBinomialPmf(k, n, p_correct)
232
        return like
233

234
    def PlotPosteriors(self, other):
235
        """Plots posterior distributions of efficacy.
236

237
        self, other: Sat objects.
238
        """
239
        thinkplot.Clf()
240
        thinkplot.PrePlot(num=2)
241

242
        cdf1 = thinkbayes2.Cdf(self, label='posterior %d' % self.score)
243
        cdf2 = thinkbayes2.Cdf(other, label='posterior %d' % other.score)
244

245
        thinkplot.Cdfs([cdf1, cdf2])
246
        thinkplot.Save(xlabel='p_correct', 
247
                    ylabel='CDF', 
248
                    axis=[0.7, 1.0, 0.0, 1.0],
249
                    root='sat_posteriors_p_corr',
250
                    formats=['pdf', 'eps'])
251

252

253
class Sat2(thinkbayes2.Suite):
254
    """Represents the distribution of efficacy for a test-taker."""
255

256
    def __init__(self, exam, score):
257
        self.exam = exam
258
        self.score = score
259

260
        # start with the Normal prior
261
        efficacies = thinkbayes2.MakeNormalPmf(0, 1.5, 3)
262
        thinkbayes2.Suite.__init__(self, efficacies)
263

264
        # update based on an exam score
265
        self.Update(score)
266

267
    def Likelihood(self, data, hypo):
268
        """Computes the likelihood of a test score, given efficacy."""
269
        efficacy = hypo
270
        score = data
271
        raw = self.exam.Reverse(score)
272

273
        pmf = self.exam.PmfCorrect(efficacy)
274
        like = pmf.Prob(raw)
275
        return like
276

277
    def MakePredictiveDist(self):
278
        """Returns the distribution of raw scores expected on a re-test."""
279
        raw_pmf = self.exam.MakeRawScoreDist(self)
280
        return raw_pmf
281
    
282
    def PlotPosteriors(self, other):
283
        """Plots posterior distributions of efficacy.
284

285
        self, other: Sat objects.
286
        """
287
        thinkplot.Clf()
288
        thinkplot.PrePlot(num=2)
289

290
        cdf1 = thinkbayes2.Cdf(self, label='posterior %d' % self.score)
291
        cdf2 = thinkbayes2.Cdf(other, label='posterior %d' % other.score)
292

293
        thinkplot.Cdfs([cdf1, cdf2])
294
        thinkplot.Save(xlabel='efficacy', 
295
                    ylabel='CDF', 
296
                    axis=[0, 4.6, 0.0, 1.0],
297
                    root='sat_posteriors_eff',
298
                    formats=['pdf', 'eps'])
299

300

301
def PlotJointDist(pmf1, pmf2, thresh=0.8):
302
    """Plot the joint distribution of p_correct.
303

304
    pmf1, pmf2: posterior distributions
305
    thresh: lower bound of the range to be plotted
306
    """
307
    def Clean(pmf):
308
        """Removes values below thresh."""
309
        vals = [val for val in pmf.Values() if val < thresh]
310
        [pmf.Remove(val) for val in vals]
311

312
    Clean(pmf1)
313
    Clean(pmf2)
314
    pmf = thinkbayes2.MakeJoint(pmf1, pmf2)
315

316
    thinkplot.Figure(figsize=(6, 6))    
317
    thinkplot.Contour(pmf, contour=False, pcolor=True)
318

319
    thinkplot.Plot([thresh, 1.0], [thresh, 1.0],
320
                color='gray', alpha=0.2, linewidth=4)
321

322
    thinkplot.Save(root='sat_joint',
323
                   xlabel='p_correct Alice', 
324
                   ylabel='p_correct Bob',
325
                   axis=[thresh, 1.0, thresh, 1.0],
326
                   formats=['pdf', 'eps'])
327

328

329
def ComparePosteriorPredictive(a_sat, b_sat):
330
    """Compares the predictive distributions of raw scores.
331

332
    a_sat: posterior distribution
333
    b_sat:
334
    """
335
    a_pred = a_sat.MakePredictiveDist()
336
    b_pred = b_sat.MakePredictiveDist()
337

338
    #thinkplot.Clf()
339
    #thinkplot.Pmfs([a_pred, b_pred])
340
    #thinkplot.Show()
341

342
    a_like = thinkbayes2.PmfProbGreater(a_pred, b_pred)
343
    b_like = thinkbayes2.PmfProbLess(a_pred, b_pred)
344
    c_like = thinkbayes2.PmfProbEqual(a_pred, b_pred)
345

346
    print('Posterior predictive')
347
    print('A', a_like)
348
    print('B', b_like)
349
    print('C', c_like)
350

351

352
def PlotPriorDist(pmf):
353
    """Plot the prior distribution of p_correct.
354

355
    pmf: prior
356
    """
357
    thinkplot.Clf()
358
    thinkplot.PrePlot(num=1)
359

360
    cdf1 = thinkbayes2.Cdf(pmf, label='prior')
361

362
    thinkplot.Cdf(cdf1)
363
    thinkplot.Save(root='sat_prior',
364
                   xlabel='p_correct', 
365
                   ylabel='CDF',
366
                   formats=['pdf', 'eps'])
367

368

369
class TopLevel(thinkbayes2.Suite):
370
    """Evaluates the top-level hypotheses about Alice and Bob.
371

372
    Uses the bottom-level posterior distribution about p_correct
373
    (or efficacy).
374
    """
375

376
    def Update(self, data):
377
        a_sat, b_sat = data
378

379
        a_like = thinkbayes2.PmfProbGreater(a_sat, b_sat)
380
        b_like = thinkbayes2.PmfProbLess(a_sat, b_sat)
381
        c_like = thinkbayes2.PmfProbEqual(a_sat, b_sat)
382

383
        a_like += c_like / 2
384
        b_like += c_like / 2
385

386
        self.Mult('A', a_like)
387
        self.Mult('B', b_like)
388

389
        self.Normalize()
390

391

392
def ProbCorrect(efficacy, difficulty, a=1):
393
    """Returns the probability that a person gets a question right.
394

395
    efficacy: personal ability to answer questions
396
    difficulty: how hard the question is
397

398
    Returns: float prob
399
    """
400
    return 1 / (1 + math.exp(-a * (efficacy - difficulty)))
401

402

403
def BinaryPmf(p):
404
    """Makes a Pmf with values 1 and 0.
405
    
406
    p: probability given to 1
407
    
408
    Returns: Pmf object
409
    """
410
    pmf = thinkbayes2.Pmf()
411
    pmf.Set(1, p)
412
    pmf.Set(0, 1-p)
413
    return pmf
414

415

416
def PmfCorrect(efficacy, difficulties):
417
    """Computes the distribution of correct responses.
418

419
    efficacy: personal ability to answer questions
420
    difficulties: list of difficulties, one for each question
421

422
    Returns: new Pmf object
423
    """
424
    pmf0 = thinkbayes2.Pmf([0])
425

426
    ps = [ProbCorrect(efficacy, difficulty) for difficulty in difficulties]
427
    pmfs = [BinaryPmf(p) for p in ps]
428
    dist = sum(pmfs, pmf0)
429
    return dist
430

431

432
def MakeDifficulties(center, width, n):
433
    """Makes a list of n difficulties with a given center and width.
434

435
    Returns: list of n floats between center-width and center+width
436
    """
437
    low, high = center-width, center+width
438
    return numpy.linspace(low, high, n)
439

440

441
def ProbCorrectTable():
442
    """Makes a table of p_correct for a range of efficacy and difficulty."""
443
    efficacies = [3, 1.5, 0, -1.5, -3]
444
    difficulties = [-1.85, -0.05, 1.75]
445

446
    for eff in efficacies:
447
        print('%0.2f & ' % eff, end=' ') 
448
        for diff in difficulties:
449
            p = ProbCorrect(eff, diff)
450
            print('%0.2f & ' % p, end=' ') 
451
        print(r'\\')
452

453

454
def main(script):
455
    ProbCorrectTable()
456

457
    exam = Exam()
458

459
    PlotPriorDist(exam.prior)
460
    exam.CalibrateDifficulty()
461

462
    exam.CompareScores(780, 740, constructor=Sat)
463

464
    exam.CompareScores(780, 740, constructor=Sat2)
465

466

467
if __name__ == '__main__':
468
    main(*sys.argv)
469

470
Product

Resources

Company