CoCalc -- stringbench.py

GitHub Repository: allendowney/cpython
Path: blob/main/Tools/stringbench/stringbench.py
¹² views
1

2
# Various microbenchmarks comparing unicode and byte string performance
3
# Please keep this file both 2.x and 3.x compatible!
4

5
import timeit
6
import itertools
7
import operator
8
import re
9
import sys
10
import datetime
11
import optparse
12

13
VERSION = '2.0'
14

15
def p(*args):
16
    sys.stdout.write(' '.join(str(s) for s in args) + '\n')
17

18
if sys.version_info >= (3,):
19
    BYTES = bytes_from_str = lambda x: x.encode('ascii')
20
    UNICODE = unicode_from_str = lambda x: x
21
else:
22
    BYTES = bytes_from_str = lambda x: x
23
    UNICODE = unicode_from_str = lambda x: x.decode('ascii')
24

25
class UnsupportedType(TypeError):
26
    pass
27

28

29
p('stringbench v%s' % VERSION)
30
p(sys.version)
31
p(datetime.datetime.now())
32

33
REPEAT = 1
34
REPEAT = 3
35
#REPEAT = 7
36

37
if __name__ != "__main__":
38
    raise SystemExit("Must run as main program")
39

40
parser = optparse.OptionParser()
41
parser.add_option("-R", "--skip-re", dest="skip_re",
42
                  action="store_true",
43
                  help="skip regular expression tests")
44
parser.add_option("-8", "--8-bit", dest="bytes_only",
45
                  action="store_true",
46
                  help="only do 8-bit string benchmarks")
47
parser.add_option("-u", "--unicode", dest="unicode_only",
48
                  action="store_true",
49
                  help="only do Unicode string benchmarks")
50

51

52
_RANGE_1000 = list(range(1000))
53
_RANGE_100 = list(range(100))
54
_RANGE_10 = list(range(10))
55

56
dups = {}
57
def bench(s, group, repeat_count):
58
    def blah(f):
59
        if f.__name__ in dups:
60
            raise AssertionError("Multiple functions with same name: %r" %
61
                                 (f.__name__,))
62
        dups[f.__name__] = 1
63
        f.comment = s
64
        f.is_bench = True
65
        f.group = group
66
        f.repeat_count = repeat_count
67
        return f
68
    return blah
69

70
def uses_re(f):
71
    f.uses_re = True
72

73
####### 'in' comparisons
74

75
@bench('"A" in "A"*1000', "early match, single character", 1000)
76
def in_test_quick_match_single_character(STR):
77
    s1 = STR("A" * 1000)
78
    s2 = STR("A")
79
    for x in _RANGE_1000:
80
        s2 in s1
81

82
@bench('"B" in "A"*1000', "no match, single character", 1000)
83
def in_test_no_match_single_character(STR):
84
    s1 = STR("A" * 1000)
85
    s2 = STR("B")
86
    for x in _RANGE_1000:
87
        s2 in s1
88

89

90
@bench('"AB" in "AB"*1000', "early match, two characters", 1000)
91
def in_test_quick_match_two_characters(STR):
92
    s1 = STR("AB" * 1000)
93
    s2 = STR("AB")
94
    for x in _RANGE_1000:
95
        s2 in s1
96

97
@bench('"BC" in "AB"*1000', "no match, two characters", 1000)
98
def in_test_no_match_two_character(STR):
99
    s1 = STR("AB" * 1000)
100
    s2 = STR("BC")
101
    for x in _RANGE_1000:
102
        s2 in s1
103

104
@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
105
def in_test_slow_match_two_characters(STR):
106
    s1 = STR("AB" * 300+"C")
107
    s2 = STR("BC")
108
    for x in _RANGE_1000:
109
        s2 in s1
110

111
@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
112
       "late match, 100 characters", 100)
113
def in_test_slow_match_100_characters(STR):
114
    m = STR("ABC"*33)
115
    d = STR("D")
116
    e = STR("E")
117
    s1 = (m+d)*300 + m+e
118
    s2 = m+e
119
    for x in _RANGE_100:
120
        s2 in s1
121

122
# Try with regex
123
@uses_re
124
@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
125
       "late match, 100 characters", 100)
126
def re_test_slow_match_100_characters(STR):
127
    m = STR("ABC"*33)
128
    d = STR("D")
129
    e = STR("E")
130
    s1 = (m+d)*300 + m+e
131
    s2 = m+e
132
    pat = re.compile(s2)
133
    search = pat.search
134
    for x in _RANGE_100:
135
        search(s1)
136

137

138
#### same tests as 'in' but use 'find'
139

140
@bench('("A"*1000).find("A")', "early match, single character", 1000)
141
def find_test_quick_match_single_character(STR):
142
    s1 = STR("A" * 1000)
143
    s2 = STR("A")
144
    s1_find = s1.find
145
    for x in _RANGE_1000:
146
        s1_find(s2)
147

148
@bench('("A"*1000).find("B")', "no match, single character", 1000)
149
def find_test_no_match_single_character(STR):
150
    s1 = STR("A" * 1000)
151
    s2 = STR("B")
152
    s1_find = s1.find
153
    for x in _RANGE_1000:
154
        s1_find(s2)
155

156

157
@bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
158
def find_test_quick_match_two_characters(STR):
159
    s1 = STR("AB" * 1000)
160
    s2 = STR("AB")
161
    s1_find = s1.find
162
    for x in _RANGE_1000:
163
        s1_find(s2)
164

165
@bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
166
def find_test_no_match_two_character(STR):
167
    s1 = STR("AB" * 1000)
168
    s2 = STR("BC")
169
    s1_find = s1.find
170
    for x in _RANGE_1000:
171
        s1_find(s2)
172

173
@bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
174
def find_test_no_match_two_character_bis(STR):
175
    s1 = STR("AB" * 1000)
176
    s2 = STR("CA")
177
    s1_find = s1.find
178
    for x in _RANGE_1000:
179
        s1_find(s2)
180

181
@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
182
def find_test_slow_match_two_characters(STR):
183
    s1 = STR("AB" * 300+"C")
184
    s2 = STR("BC")
185
    s1_find = s1.find
186
    for x in _RANGE_1000:
187
        s1_find(s2)
188

189
@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
190
def find_test_slow_match_two_characters_bis(STR):
191
    s1 = STR("AB" * 300+"CA")
192
    s2 = STR("CA")
193
    s1_find = s1.find
194
    for x in _RANGE_1000:
195
        s1_find(s2)
196

197
@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
198
       "late match, 100 characters", 100)
199
def find_test_slow_match_100_characters(STR):
200
    m = STR("ABC"*33)
201
    d = STR("D")
202
    e = STR("E")
203
    s1 = (m+d)*500 + m+e
204
    s2 = m+e
205
    s1_find = s1.find
206
    for x in _RANGE_100:
207
        s1_find(s2)
208

209
@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
210
       "late match, 100 characters", 100)
211
def find_test_slow_match_100_characters_bis(STR):
212
    m = STR("ABC"*33)
213
    d = STR("D")
214
    e = STR("E")
215
    s1 = (m+d)*500 + e+m
216
    s2 = e+m
217
    s1_find = s1.find
218
    for x in _RANGE_100:
219
        s1_find(s2)
220

221

222
#### Same tests for 'rfind'
223

224
@bench('("A"*1000).rfind("A")', "early match, single character", 1000)
225
def rfind_test_quick_match_single_character(STR):
226
    s1 = STR("A" * 1000)
227
    s2 = STR("A")
228
    s1_rfind = s1.rfind
229
    for x in _RANGE_1000:
230
        s1_rfind(s2)
231

232
@bench('("A"*1000).rfind("B")', "no match, single character", 1000)
233
def rfind_test_no_match_single_character(STR):
234
    s1 = STR("A" * 1000)
235
    s2 = STR("B")
236
    s1_rfind = s1.rfind
237
    for x in _RANGE_1000:
238
        s1_rfind(s2)
239

240

241
@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
242
def rfind_test_quick_match_two_characters(STR):
243
    s1 = STR("AB" * 1000)
244
    s2 = STR("AB")
245
    s1_rfind = s1.rfind
246
    for x in _RANGE_1000:
247
        s1_rfind(s2)
248

249
@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
250
def rfind_test_no_match_two_character(STR):
251
    s1 = STR("AB" * 1000)
252
    s2 = STR("BC")
253
    s1_rfind = s1.rfind
254
    for x in _RANGE_1000:
255
        s1_rfind(s2)
256

257
@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
258
def rfind_test_no_match_two_character_bis(STR):
259
    s1 = STR("AB" * 1000)
260
    s2 = STR("CA")
261
    s1_rfind = s1.rfind
262
    for x in _RANGE_1000:
263
        s1_rfind(s2)
264

265
@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
266
def rfind_test_slow_match_two_characters(STR):
267
    s1 = STR("C" + "AB" * 300)
268
    s2 = STR("CA")
269
    s1_rfind = s1.rfind
270
    for x in _RANGE_1000:
271
        s1_rfind(s2)
272

273
@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
274
def rfind_test_slow_match_two_characters_bis(STR):
275
    s1 = STR("BC" + "AB" * 300)
276
    s2 = STR("BC")
277
    s1_rfind = s1.rfind
278
    for x in _RANGE_1000:
279
        s1_rfind(s2)
280

281
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
282
       "late match, 100 characters", 100)
283
def rfind_test_slow_match_100_characters(STR):
284
    m = STR("ABC"*33)
285
    d = STR("D")
286
    e = STR("E")
287
    s1 = e+m + (d+m)*500
288
    s2 = e+m
289
    s1_rfind = s1.rfind
290
    for x in _RANGE_100:
291
        s1_rfind(s2)
292

293
@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
294
       "late match, 100 characters", 100)
295
def rfind_test_slow_match_100_characters_bis(STR):
296
    m = STR("ABC"*33)
297
    d = STR("D")
298
    e = STR("E")
299
    s1 = m+e + (d+m)*500
300
    s2 = m+e
301
    s1_rfind = s1.rfind
302
    for x in _RANGE_100:
303
        s1_rfind(s2)
304

305

306
#### Now with index.
307
# Skip the ones which fail because that would include exception overhead.
308

309
@bench('("A"*1000).index("A")', "early match, single character", 1000)
310
def index_test_quick_match_single_character(STR):
311
    s1 = STR("A" * 1000)
312
    s2 = STR("A")
313
    s1_index = s1.index
314
    for x in _RANGE_1000:
315
        s1_index(s2)
316

317
@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
318
def index_test_quick_match_two_characters(STR):
319
    s1 = STR("AB" * 1000)
320
    s2 = STR("AB")
321
    s1_index = s1.index
322
    for x in _RANGE_1000:
323
        s1_index(s2)
324

325
@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
326
def index_test_slow_match_two_characters(STR):
327
    s1 = STR("AB" * 300+"C")
328
    s2 = STR("BC")
329
    s1_index = s1.index
330
    for x in _RANGE_1000:
331
        s1_index(s2)
332

333
@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
334
       "late match, 100 characters", 100)
335
def index_test_slow_match_100_characters(STR):
336
    m = STR("ABC"*33)
337
    d = STR("D")
338
    e = STR("E")
339
    s1 = (m+d)*500 + m+e
340
    s2 = m+e
341
    s1_index = s1.index
342
    for x in _RANGE_100:
343
        s1_index(s2)
344

345

346
#### Same for rindex
347

348
@bench('("A"*1000).rindex("A")', "early match, single character", 1000)
349
def rindex_test_quick_match_single_character(STR):
350
    s1 = STR("A" * 1000)
351
    s2 = STR("A")
352
    s1_rindex = s1.rindex
353
    for x in _RANGE_1000:
354
        s1_rindex(s2)
355

356
@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
357
def rindex_test_quick_match_two_characters(STR):
358
    s1 = STR("AB" * 1000)
359
    s2 = STR("AB")
360
    s1_rindex = s1.rindex
361
    for x in _RANGE_1000:
362
        s1_rindex(s2)
363

364
@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
365
def rindex_test_slow_match_two_characters(STR):
366
    s1 = STR("C" + "AB" * 300)
367
    s2 = STR("CA")
368
    s1_rindex = s1.rindex
369
    for x in _RANGE_1000:
370
        s1_rindex(s2)
371

372
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
373
       "late match, 100 characters", 100)
374
def rindex_test_slow_match_100_characters(STR):
375
    m = STR("ABC"*33)
376
    d = STR("D")
377
    e = STR("E")
378
    s1 = e + m + (d+m)*500
379
    s2 = e + m
380
    s1_rindex = s1.rindex
381
    for x in _RANGE_100:
382
        s1_rindex(s2)
383

384

385
#### Same for partition
386

387
@bench('("A"*1000).partition("A")', "early match, single character", 1000)
388
def partition_test_quick_match_single_character(STR):
389
    s1 = STR("A" * 1000)
390
    s2 = STR("A")
391
    s1_partition = s1.partition
392
    for x in _RANGE_1000:
393
        s1_partition(s2)
394

395
@bench('("A"*1000).partition("B")', "no match, single character", 1000)
396
def partition_test_no_match_single_character(STR):
397
    s1 = STR("A" * 1000)
398
    s2 = STR("B")
399
    s1_partition = s1.partition
400
    for x in _RANGE_1000:
401
        s1_partition(s2)
402

403

404
@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
405
def partition_test_quick_match_two_characters(STR):
406
    s1 = STR("AB" * 1000)
407
    s2 = STR("AB")
408
    s1_partition = s1.partition
409
    for x in _RANGE_1000:
410
        s1_partition(s2)
411

412
@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
413
def partition_test_no_match_two_character(STR):
414
    s1 = STR("AB" * 1000)
415
    s2 = STR("BC")
416
    s1_partition = s1.partition
417
    for x in _RANGE_1000:
418
        s1_partition(s2)
419

420
@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
421
def partition_test_slow_match_two_characters(STR):
422
    s1 = STR("AB" * 300+"C")
423
    s2 = STR("BC")
424
    s1_partition = s1.partition
425
    for x in _RANGE_1000:
426
        s1_partition(s2)
427

428
@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
429
       "late match, 100 characters", 100)
430
def partition_test_slow_match_100_characters(STR):
431
    m = STR("ABC"*33)
432
    d = STR("D")
433
    e = STR("E")
434
    s1 = (m+d)*500 + m+e
435
    s2 = m+e
436
    s1_partition = s1.partition
437
    for x in _RANGE_100:
438
        s1_partition(s2)
439

440

441
#### Same for rpartition
442

443
@bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
444
def rpartition_test_quick_match_single_character(STR):
445
    s1 = STR("A" * 1000)
446
    s2 = STR("A")
447
    s1_rpartition = s1.rpartition
448
    for x in _RANGE_1000:
449
        s1_rpartition(s2)
450

451
@bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
452
def rpartition_test_no_match_single_character(STR):
453
    s1 = STR("A" * 1000)
454
    s2 = STR("B")
455
    s1_rpartition = s1.rpartition
456
    for x in _RANGE_1000:
457
        s1_rpartition(s2)
458

459

460
@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
461
def rpartition_test_quick_match_two_characters(STR):
462
    s1 = STR("AB" * 1000)
463
    s2 = STR("AB")
464
    s1_rpartition = s1.rpartition
465
    for x in _RANGE_1000:
466
        s1_rpartition(s2)
467

468
@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
469
def rpartition_test_no_match_two_character(STR):
470
    s1 = STR("AB" * 1000)
471
    s2 = STR("BC")
472
    s1_rpartition = s1.rpartition
473
    for x in _RANGE_1000:
474
        s1_rpartition(s2)
475

476
@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
477
def rpartition_test_slow_match_two_characters(STR):
478
    s1 = STR("C" + "AB" * 300)
479
    s2 = STR("CA")
480
    s1_rpartition = s1.rpartition
481
    for x in _RANGE_1000:
482
        s1_rpartition(s2)
483

484
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
485
       "late match, 100 characters", 100)
486
def rpartition_test_slow_match_100_characters(STR):
487
    m = STR("ABC"*33)
488
    d = STR("D")
489
    e = STR("E")
490
    s1 = e + m + (d+m)*500
491
    s2 = e + m
492
    s1_rpartition = s1.rpartition
493
    for x in _RANGE_100:
494
        s1_rpartition(s2)
495

496

497
#### Same for split(s, 1)
498

499
@bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
500
def split_test_quick_match_single_character(STR):
501
    s1 = STR("A" * 1000)
502
    s2 = STR("A")
503
    s1_split = s1.split
504
    for x in _RANGE_1000:
505
        s1_split(s2, 1)
506

507
@bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
508
def split_test_no_match_single_character(STR):
509
    s1 = STR("A" * 1000)
510
    s2 = STR("B")
511
    s1_split = s1.split
512
    for x in _RANGE_1000:
513
        s1_split(s2, 1)
514

515

516
@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
517
def split_test_quick_match_two_characters(STR):
518
    s1 = STR("AB" * 1000)
519
    s2 = STR("AB")
520
    s1_split = s1.split
521
    for x in _RANGE_1000:
522
        s1_split(s2, 1)
523

524
@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
525
def split_test_no_match_two_character(STR):
526
    s1 = STR("AB" * 1000)
527
    s2 = STR("BC")
528
    s1_split = s1.split
529
    for x in _RANGE_1000:
530
        s1_split(s2, 1)
531

532
@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
533
def split_test_slow_match_two_characters(STR):
534
    s1 = STR("AB" * 300+"C")
535
    s2 = STR("BC")
536
    s1_split = s1.split
537
    for x in _RANGE_1000:
538
        s1_split(s2, 1)
539

540
@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
541
       "late match, 100 characters", 100)
542
def split_test_slow_match_100_characters(STR):
543
    m = STR("ABC"*33)
544
    d = STR("D")
545
    e = STR("E")
546
    s1 = (m+d)*500 + m+e
547
    s2 = m+e
548
    s1_split = s1.split
549
    for x in _RANGE_100:
550
        s1_split(s2, 1)
551

552

553
#### Same for rsplit(s, 1)
554

555
@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
556
def rsplit_test_quick_match_single_character(STR):
557
    s1 = STR("A" * 1000)
558
    s2 = STR("A")
559
    s1_rsplit = s1.rsplit
560
    for x in _RANGE_1000:
561
        s1_rsplit(s2, 1)
562

563
@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
564
def rsplit_test_no_match_single_character(STR):
565
    s1 = STR("A" * 1000)
566
    s2 = STR("B")
567
    s1_rsplit = s1.rsplit
568
    for x in _RANGE_1000:
569
        s1_rsplit(s2, 1)
570

571

572
@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
573
def rsplit_test_quick_match_two_characters(STR):
574
    s1 = STR("AB" * 1000)
575
    s2 = STR("AB")
576
    s1_rsplit = s1.rsplit
577
    for x in _RANGE_1000:
578
        s1_rsplit(s2, 1)
579

580
@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
581
def rsplit_test_no_match_two_character(STR):
582
    s1 = STR("AB" * 1000)
583
    s2 = STR("BC")
584
    s1_rsplit = s1.rsplit
585
    for x in _RANGE_1000:
586
        s1_rsplit(s2, 1)
587

588
@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
589
def rsplit_test_slow_match_two_characters(STR):
590
    s1 = STR("C" + "AB" * 300)
591
    s2 = STR("CA")
592
    s1_rsplit = s1.rsplit
593
    for x in _RANGE_1000:
594
        s1_rsplit(s2, 1)
595

596
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
597
       "late match, 100 characters", 100)
598
def rsplit_test_slow_match_100_characters(STR):
599
    m = STR("ABC"*33)
600
    d = STR("D")
601
    e = STR("E")
602
    s1 = e + m + (d+m)*500
603
    s2 = e + m
604
    s1_rsplit = s1.rsplit
605
    for x in _RANGE_100:
606
        s1_rsplit(s2, 1)
607

608

609
#### Benchmark the operator-based methods
610

611
@bench('"A"*10', "repeat 1 character 10 times", 1000)
612
def repeat_single_10_times(STR):
613
    s = STR("A")
614
    for x in _RANGE_1000:
615
        s * 10
616

617
@bench('"A"*1000', "repeat 1 character 1000 times", 1000)
618
def repeat_single_1000_times(STR):
619
    s = STR("A")
620
    for x in _RANGE_1000:
621
        s * 1000
622

623
@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
624
def repeat_5_10_times(STR):
625
    s = STR("ABCDE")
626
    for x in _RANGE_1000:
627
        s * 10
628

629
@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
630
def repeat_5_1000_times(STR):
631
    s = STR("ABCDE")
632
    for x in _RANGE_1000:
633
        s * 1000
634

635
# + for concat
636

637
@bench('"Andrew"+"Dalke"', "concat two strings", 1000)
638
def concat_two_strings(STR):
639
    s1 = STR("Andrew")
640
    s2 = STR("Dalke")
641
    for x in _RANGE_1000:
642
        s1+s2
643

644
@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
645
       1000)
646
def concat_many_strings(STR):
647
    s1=STR('TIXSGYNREDCVBHJ')
648
    s2=STR('PUMTLXBZVDO')
649
    s3=STR('FVZNJ')
650
    s4=STR('OGDXUW')
651
    s5=STR('WEIMRNCOYVGHKB')
652
    s6=STR('FCQTNMXPUZH')
653
    s7=STR('TICZJYRLBNVUEAK')
654
    s8=STR('REYB')
655
    s9=STR('PWUOQ')
656
    s10=STR('EQHCMKBS')
657
    s11=STR('AEVDFOH')
658
    s12=STR('IFHVD')
659
    s13=STR('JGTCNLXWOHQ')
660
    s14=STR('ITSKEPYLROZAWXF')
661
    s15=STR('THEK')
662
    s16=STR('GHPZFBUYCKMNJIT')
663
    s17=STR('JMUZ')
664
    s18=STR('WLZQMTB')
665
    s19=STR('KPADCBW')
666
    s20=STR('TNJHZQAGBU')
667
    for x in _RANGE_1000:
668
        (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
669
         s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
670

671

672
#### Benchmark join
673

674
def get_bytes_yielding_seq(STR, arg):
675
    if STR is BYTES and sys.version_info >= (3,):
676
        raise UnsupportedType
677
    return STR(arg)
678

679
@bench('"A".join("")',
680
       "join empty string, with 1 character sep", 100)
681
def join_empty_single(STR):
682
    sep = STR("A")
683
    s2 = get_bytes_yielding_seq(STR, "")
684
    sep_join = sep.join
685
    for x in _RANGE_100:
686
        sep_join(s2)
687

688
@bench('"ABCDE".join("")',
689
       "join empty string, with 5 character sep", 100)
690
def join_empty_5(STR):
691
    sep = STR("ABCDE")
692
    s2 = get_bytes_yielding_seq(STR, "")
693
    sep_join = sep.join
694
    for x in _RANGE_100:
695
        sep_join(s2)
696

697
@bench('"A".join("ABC..Z")',
698
       "join string with 26 characters, with 1 character sep", 1000)
699
def join_alphabet_single(STR):
700
    sep = STR("A")
701
    s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
702
    sep_join = sep.join
703
    for x in _RANGE_1000:
704
        sep_join(s2)
705

706
@bench('"ABCDE".join("ABC..Z")',
707
       "join string with 26 characters, with 5 character sep", 1000)
708
def join_alphabet_5(STR):
709
    sep = STR("ABCDE")
710
    s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
711
    sep_join = sep.join
712
    for x in _RANGE_1000:
713
        sep_join(s2)
714

715
@bench('"A".join(list("ABC..Z"))',
716
       "join list of 26 characters, with 1 character sep", 1000)
717
def join_alphabet_list_single(STR):
718
    sep = STR("A")
719
    s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
720
    sep_join = sep.join
721
    for x in _RANGE_1000:
722
        sep_join(s2)
723

724
@bench('"ABCDE".join(list("ABC..Z"))',
725
       "join list of 26 characters, with 5 character sep", 1000)
726
def join_alphabet_list_five(STR):
727
    sep = STR("ABCDE")
728
    s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
729
    sep_join = sep.join
730
    for x in _RANGE_1000:
731
        sep_join(s2)
732

733
@bench('"A".join(["Bob"]*100)',
734
       "join list of 100 words, with 1 character sep", 1000)
735
def join_100_words_single(STR):
736
    sep = STR("A")
737
    s2 = [STR("Bob")]*100
738
    sep_join = sep.join
739
    for x in _RANGE_1000:
740
        sep_join(s2)
741

742
@bench('"ABCDE".join(["Bob"]*100))',
743
       "join list of 100 words, with 5 character sep", 1000)
744
def join_100_words_5(STR):
745
    sep = STR("ABCDE")
746
    s2 = [STR("Bob")]*100
747
    sep_join = sep.join
748
    for x in _RANGE_1000:
749
        sep_join(s2)
750

751
#### split tests
752

753
@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
754
def whitespace_split(STR):
755
    s = STR("Here are some words. "*2)
756
    s_split = s.split
757
    for x in _RANGE_1000:
758
        s_split()
759

760
@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
761
def whitespace_rsplit(STR):
762
    s = STR("Here are some words. "*2)
763
    s_rsplit = s.rsplit
764
    for x in _RANGE_1000:
765
        s_rsplit()
766

767
@bench('("Here are some words. "*2).split(None, 1)',
768
       "split 1 whitespace", 1000)
769
def whitespace_split_1(STR):
770
    s = STR("Here are some words. "*2)
771
    s_split = s.split
772
    N = None
773
    for x in _RANGE_1000:
774
        s_split(N, 1)
775

776
@bench('("Here are some words. "*2).rsplit(None, 1)',
777
       "split 1 whitespace", 1000)
778
def whitespace_rsplit_1(STR):
779
    s = STR("Here are some words. "*2)
780
    s_rsplit = s.rsplit
781
    N = None
782
    for x in _RANGE_1000:
783
        s_rsplit(N, 1)
784

785
@bench('("Here are some words. "*2).partition(" ")',
786
       "split 1 whitespace", 1000)
787
def whitespace_partition(STR):
788
    sep = STR(" ")
789
    s = STR("Here are some words. "*2)
790
    s_partition = s.partition
791
    for x in _RANGE_1000:
792
        s_partition(sep)
793

794
@bench('("Here are some words. "*2).rpartition(" ")',
795
       "split 1 whitespace", 1000)
796
def whitespace_rpartition(STR):
797
    sep = STR(" ")
798
    s = STR("Here are some words. "*2)
799
    s_rpartition = s.rpartition
800
    for x in _RANGE_1000:
801
        s_rpartition(sep)
802

803
human_text = """\
804
Python is a dynamic object-oriented programming language that can be
805
used for many kinds of software development. It offers strong support
806
for integration with other languages and tools, comes with extensive
807
standard libraries, and can be learned in a few days. Many Python
808
programmers report substantial productivity gains and feel the language
809
encourages the development of higher quality, more maintainable code.
810

811
Python runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm
812
Handhelds, and Nokia mobile phones. Python has also been ported to the
813
Java and .NET virtual machines.
814

815
Python is distributed under an OSI-approved open source license that
816
makes it free to use, even for commercial products.
817
"""*25
818
human_text_bytes = bytes_from_str(human_text)
819
human_text_unicode = unicode_from_str(human_text)
820
def _get_human_text(STR):
821
    if STR is UNICODE:
822
        return human_text_unicode
823
    if STR is BYTES:
824
        return human_text_bytes
825
    raise AssertionError
826

827
@bench('human_text.split()', "split whitespace (huge)", 10)
828
def whitespace_split_huge(STR):
829
    s = _get_human_text(STR)
830
    s_split = s.split
831
    for x in _RANGE_10:
832
        s_split()
833

834
@bench('human_text.rsplit()', "split whitespace (huge)", 10)
835
def whitespace_rsplit_huge(STR):
836
    s = _get_human_text(STR)
837
    s_rsplit = s.rsplit
838
    for x in _RANGE_10:
839
        s_rsplit()
840

841

842

843
@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
844
def newlines_split(STR):
845
    s = STR("this\nis\na\ntest\n")
846
    s_split = s.split
847
    nl = STR("\n")
848
    for x in _RANGE_1000:
849
        s_split(nl)
850

851

852
@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
853
def newlines_rsplit(STR):
854
    s = STR("this\nis\na\ntest\n")
855
    s_rsplit = s.rsplit
856
    nl = STR("\n")
857
    for x in _RANGE_1000:
858
        s_rsplit(nl)
859

860
@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
861
def newlines_splitlines(STR):
862
    s = STR("this\nis\na\ntest\n")
863
    s_splitlines = s.splitlines
864
    for x in _RANGE_1000:
865
        s_splitlines()
866

867
## split text with 2000 newlines
868

869
def _make_2000_lines():
870
    import random
871
    r = random.Random(100)
872
    chars = list(map(chr, range(32, 128)))
873
    i = 0
874
    while i < len(chars):
875
        chars[i] = " "
876
        i += r.randrange(9)
877
    s = "".join(chars)
878
    s = s*4
879
    words = []
880
    for i in range(2000):
881
        start = r.randrange(96)
882
        n = r.randint(5, 65)
883
        words.append(s[start:start+n])
884
    return "\n".join(words)+"\n"
885

886
_text_with_2000_lines = _make_2000_lines()
887
_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
888
_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
889
def _get_2000_lines(STR):
890
    if STR is UNICODE:
891
        return _text_with_2000_lines_unicode
892
    if STR is BYTES:
893
        return _text_with_2000_lines_bytes
894
    raise AssertionError
895

896

897
@bench('"...text...".split("\\n")', "split 2000 newlines", 10)
898
def newlines_split_2000(STR):
899
    s = _get_2000_lines(STR)
900
    s_split = s.split
901
    nl = STR("\n")
902
    for x in _RANGE_10:
903
        s_split(nl)
904

905
@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
906
def newlines_rsplit_2000(STR):
907
    s = _get_2000_lines(STR)
908
    s_rsplit = s.rsplit
909
    nl = STR("\n")
910
    for x in _RANGE_10:
911
        s_rsplit(nl)
912

913
@bench('"...text...".splitlines()', "split 2000 newlines", 10)
914
def newlines_splitlines_2000(STR):
915
    s = _get_2000_lines(STR)
916
    s_splitlines = s.splitlines
917
    for x in _RANGE_10:
918
        s_splitlines()
919

920

921
## split text on "--" characters
922
@bench(
923
    '"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
924
    "split on multicharacter separator (small)", 1000)
925
def split_multichar_sep_small(STR):
926
    s = STR("this--is--a--test--of--the--emergency--broadcast--system")
927
    s_split = s.split
928
    pat = STR("--")
929
    for x in _RANGE_1000:
930
        s_split(pat)
931
@bench(
932
    '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
933
    "split on multicharacter separator (small)", 1000)
934
def rsplit_multichar_sep_small(STR):
935
    s = STR("this--is--a--test--of--the--emergency--broadcast--system")
936
    s_rsplit = s.rsplit
937
    pat = STR("--")
938
    for x in _RANGE_1000:
939
        s_rsplit(pat)
940

941
## split dna text on "ACTAT" characters
942
@bench('dna.split("ACTAT")',
943
       "split on multicharacter separator (dna)", 10)
944
def split_multichar_sep_dna(STR):
945
    s = _get_dna(STR)
946
    s_split = s.split
947
    pat = STR("ACTAT")
948
    for x in _RANGE_10:
949
        s_split(pat)
950

951
@bench('dna.rsplit("ACTAT")',
952
       "split on multicharacter separator (dna)", 10)
953
def rsplit_multichar_sep_dna(STR):
954
    s = _get_dna(STR)
955
    s_rsplit = s.rsplit
956
    pat = STR("ACTAT")
957
    for x in _RANGE_10:
958
        s_rsplit(pat)
959

960

961

962
## split with limits
963

964
GFF3_example = "\t".join([
965
    "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
966
    "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
967

968
@bench('GFF3_example.split("\\t")', "tab split", 1000)
969
def tab_split_no_limit(STR):
970
    sep = STR("\t")
971
    s = STR(GFF3_example)
972
    s_split = s.split
973
    for x in _RANGE_1000:
974
        s_split(sep)
975

976
@bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
977
def tab_split_limit(STR):
978
    sep = STR("\t")
979
    s = STR(GFF3_example)
980
    s_split = s.split
981
    for x in _RANGE_1000:
982
        s_split(sep, 8)
983

984
@bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
985
def tab_rsplit_no_limit(STR):
986
    sep = STR("\t")
987
    s = STR(GFF3_example)
988
    s_rsplit = s.rsplit
989
    for x in _RANGE_1000:
990
        s_rsplit(sep)
991

992
@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
993
def tab_rsplit_limit(STR):
994
    sep = STR("\t")
995
    s = STR(GFF3_example)
996
    s_rsplit = s.rsplit
997
    for x in _RANGE_1000:
998
        s_rsplit(sep, 8)
999

1000
#### Count characters
1001

1002
@bench('...text.with.2000.newlines.count("\\n")',
1003
       "count newlines", 10)
1004
def count_newlines(STR):
1005
    s = _get_2000_lines(STR)
1006
    s_count = s.count
1007
    nl = STR("\n")
1008
    for x in _RANGE_10:
1009
        s_count(nl)
1010

1011
# Orchid sequences concatenated, from Biopython
1012
_dna = """
1013
CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
1014
AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
1015
AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
1016
TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
1017
AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
1018
TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
1019
CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
1020
TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
1021
GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
1022
TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
1023
GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
1024
ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
1025
CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
1026
ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
1027
ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
1028
TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
1029
CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
1030
GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
1031
ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
1032
ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
1033
ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
1034
GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
1035
TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
1036
TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
1037
TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
1038
GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
1039
GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
1040
AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
1041
GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
1042
TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
1043
CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
1044
TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
1045
TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
1046
AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
1047
GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
1048
GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
1049
CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
1050
GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
1051
TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
1052
ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
1053
GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
1054
AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
1055
AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
1056
ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
1057
GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
1058
GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
1059
AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
1060
GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
1061
ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
1062
GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
1063
"""
1064
_dna = "".join(_dna.splitlines())
1065
_dna = _dna * 25
1066
_dna_bytes = bytes_from_str(_dna)
1067
_dna_unicode = unicode_from_str(_dna)
1068

1069
def _get_dna(STR):
1070
    if STR is UNICODE:
1071
        return _dna_unicode
1072
    if STR is BYTES:
1073
        return _dna_bytes
1074
    raise AssertionError
1075

1076
@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
1077
def count_aact(STR):
1078
    seq = _get_dna(STR)
1079
    seq_count = seq.count
1080
    needle = STR("AACT")
1081
    for x in _RANGE_10:
1082
        seq_count(needle)
1083

1084
##### startswith and endswith
1085

1086
@bench('"Andrew".startswith("A")', 'startswith single character', 1000)
1087
def startswith_single(STR):
1088
    s1 = STR("Andrew")
1089
    s2 = STR("A")
1090
    s1_startswith = s1.startswith
1091
    for x in _RANGE_1000:
1092
        s1_startswith(s2)
1093

1094
@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
1095
       1000)
1096
def startswith_multiple(STR):
1097
    s1 = STR("Andrew")
1098
    s2 = STR("Andrew")
1099
    s1_startswith = s1.startswith
1100
    for x in _RANGE_1000:
1101
        s1_startswith(s2)
1102

1103
@bench('"Andrew".startswith("Anders")',
1104
       'startswith multiple characters - not!', 1000)
1105
def startswith_multiple_not(STR):
1106
    s1 = STR("Andrew")
1107
    s2 = STR("Anders")
1108
    s1_startswith = s1.startswith
1109
    for x in _RANGE_1000:
1110
        s1_startswith(s2)
1111

1112

1113
# endswith
1114

1115
@bench('"Andrew".endswith("w")', 'endswith single character', 1000)
1116
def endswith_single(STR):
1117
    s1 = STR("Andrew")
1118
    s2 = STR("w")
1119
    s1_endswith = s1.endswith
1120
    for x in _RANGE_1000:
1121
        s1_endswith(s2)
1122

1123
@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
1124
def endswith_multiple(STR):
1125
    s1 = STR("Andrew")
1126
    s2 = STR("Andrew")
1127
    s1_endswith = s1.endswith
1128
    for x in _RANGE_1000:
1129
        s1_endswith(s2)
1130

1131
@bench('"Andrew".endswith("Anders")',
1132
       'endswith multiple characters - not!', 1000)
1133
def endswith_multiple_not(STR):
1134
    s1 = STR("Andrew")
1135
    s2 = STR("Anders")
1136
    s1_endswith = s1.endswith
1137
    for x in _RANGE_1000:
1138
        s1_endswith(s2)
1139

1140
#### Strip
1141

1142
@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
1143
def terminal_newline_strip_right(STR):
1144
    s = STR("Hello!\n")
1145
    s_strip = s.strip
1146
    for x in _RANGE_1000:
1147
        s_strip()
1148

1149
@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
1150
def terminal_newline_rstrip(STR):
1151
    s = STR("Hello!\n")
1152
    s_rstrip = s.rstrip
1153
    for x in _RANGE_1000:
1154
        s_rstrip()
1155

1156
@bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
1157
def terminal_newline_strip_left(STR):
1158
    s = STR("\nHello!")
1159
    s_strip = s.strip
1160
    for x in _RANGE_1000:
1161
        s_strip()
1162

1163
@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
1164
def terminal_newline_strip_both(STR):
1165
    s = STR("\nHello!\n")
1166
    s_strip = s.strip
1167
    for x in _RANGE_1000:
1168
        s_strip()
1169

1170
@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
1171
def terminal_newline_lstrip(STR):
1172
    s = STR("\nHello!")
1173
    s_lstrip = s.lstrip
1174
    for x in _RANGE_1000:
1175
        s_lstrip()
1176

1177
@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
1178
       'strip terminal newline', 1000)
1179
def terminal_newline_if_else(STR):
1180
    s = STR("Hello!\n")
1181
    NL = STR("\n")
1182
    for x in _RANGE_1000:
1183
        s[:-1] if (s[-1] == NL) else s
1184

1185

1186
# Strip multiple spaces or tabs
1187

1188
@bench('"Hello\\t   \\t".strip()', 'strip terminal spaces and tabs', 1000)
1189
def terminal_space_strip(STR):
1190
    s = STR("Hello\t   \t!")
1191
    s_strip = s.strip
1192
    for x in _RANGE_1000:
1193
        s_strip()
1194

1195
@bench('"Hello\\t   \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
1196
def terminal_space_rstrip(STR):
1197
    s = STR("Hello!\t   \t")
1198
    s_rstrip = s.rstrip
1199
    for x in _RANGE_1000:
1200
        s_rstrip()
1201

1202
@bench('"\\t   \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
1203
def terminal_space_lstrip(STR):
1204
    s = STR("\t   \tHello!")
1205
    s_lstrip = s.lstrip
1206
    for x in _RANGE_1000:
1207
        s_lstrip()
1208

1209

1210
#### replace
1211
@bench('"This is a test".replace(" ", "\\t")', 'replace single character',
1212
       1000)
1213
def replace_single_character(STR):
1214
    s = STR("This is a test!")
1215
    from_str = STR(" ")
1216
    to_str = STR("\t")
1217
    s_replace = s.replace
1218
    for x in _RANGE_1000:
1219
        s_replace(from_str, to_str)
1220

1221
@uses_re
1222
@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
1223
       1000)
1224
def replace_single_character_re(STR):
1225
    s = STR("This is a test!")
1226
    pat = re.compile(STR(" "))
1227
    to_str = STR("\t")
1228
    pat_sub = pat.sub
1229
    for x in _RANGE_1000:
1230
        pat_sub(to_str, s)
1231

1232
@bench('"...text.with.2000.lines...replace("\\n", " ")',
1233
       'replace single character, big string', 10)
1234
def replace_single_character_big(STR):
1235
    s = _get_2000_lines(STR)
1236
    from_str = STR("\n")
1237
    to_str = STR(" ")
1238
    s_replace = s.replace
1239
    for x in _RANGE_10:
1240
        s_replace(from_str, to_str)
1241

1242
@uses_re
1243
@bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
1244
       'replace single character, big string', 10)
1245
def replace_single_character_big_re(STR):
1246
    s = _get_2000_lines(STR)
1247
    pat = re.compile(STR("\n"))
1248
    to_str = STR(" ")
1249
    pat_sub = pat.sub
1250
    for x in _RANGE_10:
1251
        pat_sub(to_str, s)
1252

1253

1254
@bench('dna.replace("ATC", "ATT")',
1255
       'replace multiple characters, dna', 10)
1256
def replace_multiple_characters_dna(STR):
1257
    seq = _get_dna(STR)
1258
    from_str = STR("ATC")
1259
    to_str = STR("ATT")
1260
    seq_replace = seq.replace
1261
    for x in _RANGE_10:
1262
        seq_replace(from_str, to_str)
1263

1264
# This increases the character count
1265
@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
1266
       'replace and expand multiple characters, big string', 10)
1267
def replace_multiple_character_big(STR):
1268
    s = _get_2000_lines(STR)
1269
    from_str = STR("\n")
1270
    to_str = STR("\r\n")
1271
    s_replace = s.replace
1272
    for x in _RANGE_10:
1273
        s_replace(from_str, to_str)
1274

1275

1276
# This decreases the character count
1277
@bench('"When shall we three meet again?".replace("ee", "")',
1278
       'replace/remove multiple characters', 1000)
1279
def replace_multiple_character_remove(STR):
1280
    s = STR("When shall we three meet again?")
1281
    from_str = STR("ee")
1282
    to_str = STR("")
1283
    s_replace = s.replace
1284
    for x in _RANGE_1000:
1285
        s_replace(from_str, to_str)
1286

1287

1288
big_s = "A" + ("Z"*128*1024)
1289
big_s_bytes = bytes_from_str(big_s)
1290
big_s_unicode = unicode_from_str(big_s)
1291
def _get_big_s(STR):
1292
    if STR is UNICODE: return big_s_unicode
1293
    if STR is BYTES: return big_s_bytes
1294
    raise AssertionError
1295

1296
# The older replace implementation counted all matches in
1297
# the string even when it only needed to make one replacement.
1298
@bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
1299
       'quick replace single character match', 10)
1300
def quick_replace_single_match(STR):
1301
    s = _get_big_s(STR)
1302
    from_str = STR("A")
1303
    to_str = STR("BB")
1304
    s_replace = s.replace
1305
    for x in _RANGE_10:
1306
        s_replace(from_str, to_str, 1)
1307

1308
@bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
1309
       'quick replace multiple character match', 10)
1310
def quick_replace_multiple_match(STR):
1311
    s = _get_big_s(STR)
1312
    from_str = STR("AZZ")
1313
    to_str = STR("BBZZ")
1314
    s_replace = s.replace
1315
    for x in _RANGE_10:
1316
        s_replace(from_str, to_str, 1)
1317

1318

1319
####
1320

1321
# CCP does a lot of this, for internationalisation of ingame messages.
1322
_format = "The %(thing)s is %(place)s the %(location)s."
1323
_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
1324
_format_bytes = bytes_from_str(_format)
1325
_format_unicode = unicode_from_str(_format)
1326
_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
1327
_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
1328

1329
def _get_format(STR):
1330
    if STR is UNICODE:
1331
        return _format_unicode
1332
    if STR is BYTES:
1333
        if sys.version_info >= (3,):
1334
            raise UnsupportedType
1335
        return _format_bytes
1336
    raise AssertionError
1337

1338
def _get_format_dict(STR):
1339
    if STR is UNICODE:
1340
        return _format_dict_unicode
1341
    if STR is BYTES:
1342
        if sys.version_info >= (3,):
1343
            raise UnsupportedType
1344
        return _format_dict_bytes
1345
    raise AssertionError
1346

1347
# Formatting.
1348
@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
1349
       'formatting a string type with a dict', 1000)
1350
def format_with_dict(STR):
1351
    s = _get_format(STR)
1352
    d = _get_format_dict(STR)
1353
    for x in _RANGE_1000:
1354
        s % d
1355

1356

1357
#### Upper- and lower- case conversion
1358

1359
@bench('("Where in the world is Carmen San Deigo?"*10).lower()',
1360
       "case conversion -- rare", 1000)
1361
def lower_conversion_rare(STR):
1362
    s = STR("Where in the world is Carmen San Deigo?"*10)
1363
    s_lower = s.lower
1364
    for x in _RANGE_1000:
1365
        s_lower()
1366

1367
@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
1368
       "case conversion -- dense", 1000)
1369
def lower_conversion_dense(STR):
1370
    s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
1371
    s_lower = s.lower
1372
    for x in _RANGE_1000:
1373
        s_lower()
1374

1375

1376
@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
1377
       "case conversion -- rare", 1000)
1378
def upper_conversion_rare(STR):
1379
    s = STR("Where in the world is Carmen San Deigo?"*10)
1380
    s_upper = s.upper
1381
    for x in _RANGE_1000:
1382
        s_upper()
1383

1384
@bench('("where in the world is carmen san deigo?"*10).upper()',
1385
       "case conversion -- dense", 1000)
1386
def upper_conversion_dense(STR):
1387
    s = STR("where in the world is carmen san deigo?"*10)
1388
    s_upper = s.upper
1389
    for x in _RANGE_1000:
1390
        s_upper()
1391

1392

1393
# end of benchmarks
1394

1395
#################
1396

1397
class BenchTimer(timeit.Timer):
1398
    def best(self, repeat=1):
1399
        for i in range(1, 10):
1400
            number = 10**i
1401
            x = self.timeit(number)
1402
            if x > 0.02:
1403
                break
1404
        times = [x]
1405
        for i in range(1, repeat):
1406
            times.append(self.timeit(number))
1407
        return min(times) / number
1408

1409
def main():
1410
    (options, test_names) = parser.parse_args()
1411
    if options.bytes_only and options.unicode_only:
1412
        raise SystemExit("Only one of --8-bit and --unicode are allowed")
1413

1414
    bench_functions = []
1415
    for (k,v) in globals().items():
1416
        if hasattr(v, "is_bench"):
1417
            if test_names:
1418
                for name in test_names:
1419
                    if name in v.group:
1420
                        break
1421
                else:
1422
                    # Not selected, ignore
1423
                    continue
1424
            if options.skip_re and hasattr(v, "uses_re"):
1425
                continue
1426

1427
            bench_functions.append( (v.group, k, v) )
1428
    bench_functions.sort()
1429

1430
    p("bytes\tunicode")
1431
    p("(in ms)\t(in ms)\t%\tcomment")
1432

1433
    bytes_total = uni_total = 0.0
1434

1435
    for title, group in itertools.groupby(bench_functions,
1436
                                      operator.itemgetter(0)):
1437
        # Flush buffer before each group
1438
        sys.stdout.flush()
1439
        p("="*10, title)
1440
        for (_, k, v) in group:
1441
            if hasattr(v, "is_bench"):
1442
                bytes_time = 0.0
1443
                bytes_time_s = " - "
1444
                if not options.unicode_only:
1445
                    try:
1446
                        bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
1447
                                                "import __main__").best(REPEAT)
1448
                        bytes_time_s = "%.2f" % (1000 * bytes_time)
1449
                        bytes_total += bytes_time
1450
                    except UnsupportedType:
1451
                        bytes_time_s = "N/A"
1452
                uni_time = 0.0
1453
                uni_time_s = " - "
1454
                if not options.bytes_only:
1455
                    try:
1456
                        uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
1457
                                              "import __main__").best(REPEAT)
1458
                        uni_time_s = "%.2f" % (1000 * uni_time)
1459
                        uni_total += uni_time
1460
                    except UnsupportedType:
1461
                        uni_time_s = "N/A"
1462
                try:
1463
                    average = bytes_time/uni_time
1464
                except (TypeError, ZeroDivisionError):
1465
                    average = 0.0
1466
                p("%s\t%s\t%.1f\t%s (*%d)" % (
1467
                    bytes_time_s, uni_time_s, 100.*average,
1468
                    v.comment, v.repeat_count))
1469

1470
    if bytes_total == uni_total == 0.0:
1471
        p("That was zippy!")
1472
    else:
1473
        try:
1474
            ratio = bytes_total/uni_total
1475
        except ZeroDivisionError:
1476
            ratio = 0.0
1477
        p("%.2f\t%.2f\t%.1f\t%s" % (
1478
            1000*bytes_total, 1000*uni_total, 100.*ratio,
1479
            "TOTAL"))
1480

1481
if __name__ == "__main__":
1482
    main()
1483

1484
Product

Resources

Company