Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/stringbench/stringbench.py
12 views
1
2
# Various microbenchmarks comparing unicode and byte string performance
3
# Please keep this file both 2.x and 3.x compatible!
4
5
import timeit
6
import itertools
7
import operator
8
import re
9
import sys
10
import datetime
11
import optparse
12
13
VERSION = '2.0'
14
15
def p(*args):
16
sys.stdout.write(' '.join(str(s) for s in args) + '\n')
17
18
if sys.version_info >= (3,):
19
BYTES = bytes_from_str = lambda x: x.encode('ascii')
20
UNICODE = unicode_from_str = lambda x: x
21
else:
22
BYTES = bytes_from_str = lambda x: x
23
UNICODE = unicode_from_str = lambda x: x.decode('ascii')
24
25
class UnsupportedType(TypeError):
26
pass
27
28
29
p('stringbench v%s' % VERSION)
30
p(sys.version)
31
p(datetime.datetime.now())
32
33
REPEAT = 1
34
REPEAT = 3
35
#REPEAT = 7
36
37
if __name__ != "__main__":
38
raise SystemExit("Must run as main program")
39
40
parser = optparse.OptionParser()
41
parser.add_option("-R", "--skip-re", dest="skip_re",
42
action="store_true",
43
help="skip regular expression tests")
44
parser.add_option("-8", "--8-bit", dest="bytes_only",
45
action="store_true",
46
help="only do 8-bit string benchmarks")
47
parser.add_option("-u", "--unicode", dest="unicode_only",
48
action="store_true",
49
help="only do Unicode string benchmarks")
50
51
52
_RANGE_1000 = list(range(1000))
53
_RANGE_100 = list(range(100))
54
_RANGE_10 = list(range(10))
55
56
dups = {}
57
def bench(s, group, repeat_count):
58
def blah(f):
59
if f.__name__ in dups:
60
raise AssertionError("Multiple functions with same name: %r" %
61
(f.__name__,))
62
dups[f.__name__] = 1
63
f.comment = s
64
f.is_bench = True
65
f.group = group
66
f.repeat_count = repeat_count
67
return f
68
return blah
69
70
def uses_re(f):
71
f.uses_re = True
72
73
####### 'in' comparisons
74
75
@bench('"A" in "A"*1000', "early match, single character", 1000)
76
def in_test_quick_match_single_character(STR):
77
s1 = STR("A" * 1000)
78
s2 = STR("A")
79
for x in _RANGE_1000:
80
s2 in s1
81
82
@bench('"B" in "A"*1000', "no match, single character", 1000)
83
def in_test_no_match_single_character(STR):
84
s1 = STR("A" * 1000)
85
s2 = STR("B")
86
for x in _RANGE_1000:
87
s2 in s1
88
89
90
@bench('"AB" in "AB"*1000', "early match, two characters", 1000)
91
def in_test_quick_match_two_characters(STR):
92
s1 = STR("AB" * 1000)
93
s2 = STR("AB")
94
for x in _RANGE_1000:
95
s2 in s1
96
97
@bench('"BC" in "AB"*1000', "no match, two characters", 1000)
98
def in_test_no_match_two_character(STR):
99
s1 = STR("AB" * 1000)
100
s2 = STR("BC")
101
for x in _RANGE_1000:
102
s2 in s1
103
104
@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
105
def in_test_slow_match_two_characters(STR):
106
s1 = STR("AB" * 300+"C")
107
s2 = STR("BC")
108
for x in _RANGE_1000:
109
s2 in s1
110
111
@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
112
"late match, 100 characters", 100)
113
def in_test_slow_match_100_characters(STR):
114
m = STR("ABC"*33)
115
d = STR("D")
116
e = STR("E")
117
s1 = (m+d)*300 + m+e
118
s2 = m+e
119
for x in _RANGE_100:
120
s2 in s1
121
122
# Try with regex
123
@uses_re
124
@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
125
"late match, 100 characters", 100)
126
def re_test_slow_match_100_characters(STR):
127
m = STR("ABC"*33)
128
d = STR("D")
129
e = STR("E")
130
s1 = (m+d)*300 + m+e
131
s2 = m+e
132
pat = re.compile(s2)
133
search = pat.search
134
for x in _RANGE_100:
135
search(s1)
136
137
138
#### same tests as 'in' but use 'find'
139
140
@bench('("A"*1000).find("A")', "early match, single character", 1000)
141
def find_test_quick_match_single_character(STR):
142
s1 = STR("A" * 1000)
143
s2 = STR("A")
144
s1_find = s1.find
145
for x in _RANGE_1000:
146
s1_find(s2)
147
148
@bench('("A"*1000).find("B")', "no match, single character", 1000)
149
def find_test_no_match_single_character(STR):
150
s1 = STR("A" * 1000)
151
s2 = STR("B")
152
s1_find = s1.find
153
for x in _RANGE_1000:
154
s1_find(s2)
155
156
157
@bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
158
def find_test_quick_match_two_characters(STR):
159
s1 = STR("AB" * 1000)
160
s2 = STR("AB")
161
s1_find = s1.find
162
for x in _RANGE_1000:
163
s1_find(s2)
164
165
@bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
166
def find_test_no_match_two_character(STR):
167
s1 = STR("AB" * 1000)
168
s2 = STR("BC")
169
s1_find = s1.find
170
for x in _RANGE_1000:
171
s1_find(s2)
172
173
@bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
174
def find_test_no_match_two_character_bis(STR):
175
s1 = STR("AB" * 1000)
176
s2 = STR("CA")
177
s1_find = s1.find
178
for x in _RANGE_1000:
179
s1_find(s2)
180
181
@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
182
def find_test_slow_match_two_characters(STR):
183
s1 = STR("AB" * 300+"C")
184
s2 = STR("BC")
185
s1_find = s1.find
186
for x in _RANGE_1000:
187
s1_find(s2)
188
189
@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
190
def find_test_slow_match_two_characters_bis(STR):
191
s1 = STR("AB" * 300+"CA")
192
s2 = STR("CA")
193
s1_find = s1.find
194
for x in _RANGE_1000:
195
s1_find(s2)
196
197
@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
198
"late match, 100 characters", 100)
199
def find_test_slow_match_100_characters(STR):
200
m = STR("ABC"*33)
201
d = STR("D")
202
e = STR("E")
203
s1 = (m+d)*500 + m+e
204
s2 = m+e
205
s1_find = s1.find
206
for x in _RANGE_100:
207
s1_find(s2)
208
209
@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
210
"late match, 100 characters", 100)
211
def find_test_slow_match_100_characters_bis(STR):
212
m = STR("ABC"*33)
213
d = STR("D")
214
e = STR("E")
215
s1 = (m+d)*500 + e+m
216
s2 = e+m
217
s1_find = s1.find
218
for x in _RANGE_100:
219
s1_find(s2)
220
221
222
#### Same tests for 'rfind'
223
224
@bench('("A"*1000).rfind("A")', "early match, single character", 1000)
225
def rfind_test_quick_match_single_character(STR):
226
s1 = STR("A" * 1000)
227
s2 = STR("A")
228
s1_rfind = s1.rfind
229
for x in _RANGE_1000:
230
s1_rfind(s2)
231
232
@bench('("A"*1000).rfind("B")', "no match, single character", 1000)
233
def rfind_test_no_match_single_character(STR):
234
s1 = STR("A" * 1000)
235
s2 = STR("B")
236
s1_rfind = s1.rfind
237
for x in _RANGE_1000:
238
s1_rfind(s2)
239
240
241
@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
242
def rfind_test_quick_match_two_characters(STR):
243
s1 = STR("AB" * 1000)
244
s2 = STR("AB")
245
s1_rfind = s1.rfind
246
for x in _RANGE_1000:
247
s1_rfind(s2)
248
249
@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
250
def rfind_test_no_match_two_character(STR):
251
s1 = STR("AB" * 1000)
252
s2 = STR("BC")
253
s1_rfind = s1.rfind
254
for x in _RANGE_1000:
255
s1_rfind(s2)
256
257
@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
258
def rfind_test_no_match_two_character_bis(STR):
259
s1 = STR("AB" * 1000)
260
s2 = STR("CA")
261
s1_rfind = s1.rfind
262
for x in _RANGE_1000:
263
s1_rfind(s2)
264
265
@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
266
def rfind_test_slow_match_two_characters(STR):
267
s1 = STR("C" + "AB" * 300)
268
s2 = STR("CA")
269
s1_rfind = s1.rfind
270
for x in _RANGE_1000:
271
s1_rfind(s2)
272
273
@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
274
def rfind_test_slow_match_two_characters_bis(STR):
275
s1 = STR("BC" + "AB" * 300)
276
s2 = STR("BC")
277
s1_rfind = s1.rfind
278
for x in _RANGE_1000:
279
s1_rfind(s2)
280
281
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
282
"late match, 100 characters", 100)
283
def rfind_test_slow_match_100_characters(STR):
284
m = STR("ABC"*33)
285
d = STR("D")
286
e = STR("E")
287
s1 = e+m + (d+m)*500
288
s2 = e+m
289
s1_rfind = s1.rfind
290
for x in _RANGE_100:
291
s1_rfind(s2)
292
293
@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
294
"late match, 100 characters", 100)
295
def rfind_test_slow_match_100_characters_bis(STR):
296
m = STR("ABC"*33)
297
d = STR("D")
298
e = STR("E")
299
s1 = m+e + (d+m)*500
300
s2 = m+e
301
s1_rfind = s1.rfind
302
for x in _RANGE_100:
303
s1_rfind(s2)
304
305
306
#### Now with index.
307
# Skip the ones which fail because that would include exception overhead.
308
309
@bench('("A"*1000).index("A")', "early match, single character", 1000)
310
def index_test_quick_match_single_character(STR):
311
s1 = STR("A" * 1000)
312
s2 = STR("A")
313
s1_index = s1.index
314
for x in _RANGE_1000:
315
s1_index(s2)
316
317
@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
318
def index_test_quick_match_two_characters(STR):
319
s1 = STR("AB" * 1000)
320
s2 = STR("AB")
321
s1_index = s1.index
322
for x in _RANGE_1000:
323
s1_index(s2)
324
325
@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
326
def index_test_slow_match_two_characters(STR):
327
s1 = STR("AB" * 300+"C")
328
s2 = STR("BC")
329
s1_index = s1.index
330
for x in _RANGE_1000:
331
s1_index(s2)
332
333
@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
334
"late match, 100 characters", 100)
335
def index_test_slow_match_100_characters(STR):
336
m = STR("ABC"*33)
337
d = STR("D")
338
e = STR("E")
339
s1 = (m+d)*500 + m+e
340
s2 = m+e
341
s1_index = s1.index
342
for x in _RANGE_100:
343
s1_index(s2)
344
345
346
#### Same for rindex
347
348
@bench('("A"*1000).rindex("A")', "early match, single character", 1000)
349
def rindex_test_quick_match_single_character(STR):
350
s1 = STR("A" * 1000)
351
s2 = STR("A")
352
s1_rindex = s1.rindex
353
for x in _RANGE_1000:
354
s1_rindex(s2)
355
356
@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
357
def rindex_test_quick_match_two_characters(STR):
358
s1 = STR("AB" * 1000)
359
s2 = STR("AB")
360
s1_rindex = s1.rindex
361
for x in _RANGE_1000:
362
s1_rindex(s2)
363
364
@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
365
def rindex_test_slow_match_two_characters(STR):
366
s1 = STR("C" + "AB" * 300)
367
s2 = STR("CA")
368
s1_rindex = s1.rindex
369
for x in _RANGE_1000:
370
s1_rindex(s2)
371
372
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
373
"late match, 100 characters", 100)
374
def rindex_test_slow_match_100_characters(STR):
375
m = STR("ABC"*33)
376
d = STR("D")
377
e = STR("E")
378
s1 = e + m + (d+m)*500
379
s2 = e + m
380
s1_rindex = s1.rindex
381
for x in _RANGE_100:
382
s1_rindex(s2)
383
384
385
#### Same for partition
386
387
@bench('("A"*1000).partition("A")', "early match, single character", 1000)
388
def partition_test_quick_match_single_character(STR):
389
s1 = STR("A" * 1000)
390
s2 = STR("A")
391
s1_partition = s1.partition
392
for x in _RANGE_1000:
393
s1_partition(s2)
394
395
@bench('("A"*1000).partition("B")', "no match, single character", 1000)
396
def partition_test_no_match_single_character(STR):
397
s1 = STR("A" * 1000)
398
s2 = STR("B")
399
s1_partition = s1.partition
400
for x in _RANGE_1000:
401
s1_partition(s2)
402
403
404
@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
405
def partition_test_quick_match_two_characters(STR):
406
s1 = STR("AB" * 1000)
407
s2 = STR("AB")
408
s1_partition = s1.partition
409
for x in _RANGE_1000:
410
s1_partition(s2)
411
412
@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
413
def partition_test_no_match_two_character(STR):
414
s1 = STR("AB" * 1000)
415
s2 = STR("BC")
416
s1_partition = s1.partition
417
for x in _RANGE_1000:
418
s1_partition(s2)
419
420
@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
421
def partition_test_slow_match_two_characters(STR):
422
s1 = STR("AB" * 300+"C")
423
s2 = STR("BC")
424
s1_partition = s1.partition
425
for x in _RANGE_1000:
426
s1_partition(s2)
427
428
@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
429
"late match, 100 characters", 100)
430
def partition_test_slow_match_100_characters(STR):
431
m = STR("ABC"*33)
432
d = STR("D")
433
e = STR("E")
434
s1 = (m+d)*500 + m+e
435
s2 = m+e
436
s1_partition = s1.partition
437
for x in _RANGE_100:
438
s1_partition(s2)
439
440
441
#### Same for rpartition
442
443
@bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
444
def rpartition_test_quick_match_single_character(STR):
445
s1 = STR("A" * 1000)
446
s2 = STR("A")
447
s1_rpartition = s1.rpartition
448
for x in _RANGE_1000:
449
s1_rpartition(s2)
450
451
@bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
452
def rpartition_test_no_match_single_character(STR):
453
s1 = STR("A" * 1000)
454
s2 = STR("B")
455
s1_rpartition = s1.rpartition
456
for x in _RANGE_1000:
457
s1_rpartition(s2)
458
459
460
@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
461
def rpartition_test_quick_match_two_characters(STR):
462
s1 = STR("AB" * 1000)
463
s2 = STR("AB")
464
s1_rpartition = s1.rpartition
465
for x in _RANGE_1000:
466
s1_rpartition(s2)
467
468
@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
469
def rpartition_test_no_match_two_character(STR):
470
s1 = STR("AB" * 1000)
471
s2 = STR("BC")
472
s1_rpartition = s1.rpartition
473
for x in _RANGE_1000:
474
s1_rpartition(s2)
475
476
@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
477
def rpartition_test_slow_match_two_characters(STR):
478
s1 = STR("C" + "AB" * 300)
479
s2 = STR("CA")
480
s1_rpartition = s1.rpartition
481
for x in _RANGE_1000:
482
s1_rpartition(s2)
483
484
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
485
"late match, 100 characters", 100)
486
def rpartition_test_slow_match_100_characters(STR):
487
m = STR("ABC"*33)
488
d = STR("D")
489
e = STR("E")
490
s1 = e + m + (d+m)*500
491
s2 = e + m
492
s1_rpartition = s1.rpartition
493
for x in _RANGE_100:
494
s1_rpartition(s2)
495
496
497
#### Same for split(s, 1)
498
499
@bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
500
def split_test_quick_match_single_character(STR):
501
s1 = STR("A" * 1000)
502
s2 = STR("A")
503
s1_split = s1.split
504
for x in _RANGE_1000:
505
s1_split(s2, 1)
506
507
@bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
508
def split_test_no_match_single_character(STR):
509
s1 = STR("A" * 1000)
510
s2 = STR("B")
511
s1_split = s1.split
512
for x in _RANGE_1000:
513
s1_split(s2, 1)
514
515
516
@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
517
def split_test_quick_match_two_characters(STR):
518
s1 = STR("AB" * 1000)
519
s2 = STR("AB")
520
s1_split = s1.split
521
for x in _RANGE_1000:
522
s1_split(s2, 1)
523
524
@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
525
def split_test_no_match_two_character(STR):
526
s1 = STR("AB" * 1000)
527
s2 = STR("BC")
528
s1_split = s1.split
529
for x in _RANGE_1000:
530
s1_split(s2, 1)
531
532
@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
533
def split_test_slow_match_two_characters(STR):
534
s1 = STR("AB" * 300+"C")
535
s2 = STR("BC")
536
s1_split = s1.split
537
for x in _RANGE_1000:
538
s1_split(s2, 1)
539
540
@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
541
"late match, 100 characters", 100)
542
def split_test_slow_match_100_characters(STR):
543
m = STR("ABC"*33)
544
d = STR("D")
545
e = STR("E")
546
s1 = (m+d)*500 + m+e
547
s2 = m+e
548
s1_split = s1.split
549
for x in _RANGE_100:
550
s1_split(s2, 1)
551
552
553
#### Same for rsplit(s, 1)
554
555
@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
556
def rsplit_test_quick_match_single_character(STR):
557
s1 = STR("A" * 1000)
558
s2 = STR("A")
559
s1_rsplit = s1.rsplit
560
for x in _RANGE_1000:
561
s1_rsplit(s2, 1)
562
563
@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
564
def rsplit_test_no_match_single_character(STR):
565
s1 = STR("A" * 1000)
566
s2 = STR("B")
567
s1_rsplit = s1.rsplit
568
for x in _RANGE_1000:
569
s1_rsplit(s2, 1)
570
571
572
@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
573
def rsplit_test_quick_match_two_characters(STR):
574
s1 = STR("AB" * 1000)
575
s2 = STR("AB")
576
s1_rsplit = s1.rsplit
577
for x in _RANGE_1000:
578
s1_rsplit(s2, 1)
579
580
@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
581
def rsplit_test_no_match_two_character(STR):
582
s1 = STR("AB" * 1000)
583
s2 = STR("BC")
584
s1_rsplit = s1.rsplit
585
for x in _RANGE_1000:
586
s1_rsplit(s2, 1)
587
588
@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
589
def rsplit_test_slow_match_two_characters(STR):
590
s1 = STR("C" + "AB" * 300)
591
s2 = STR("CA")
592
s1_rsplit = s1.rsplit
593
for x in _RANGE_1000:
594
s1_rsplit(s2, 1)
595
596
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
597
"late match, 100 characters", 100)
598
def rsplit_test_slow_match_100_characters(STR):
599
m = STR("ABC"*33)
600
d = STR("D")
601
e = STR("E")
602
s1 = e + m + (d+m)*500
603
s2 = e + m
604
s1_rsplit = s1.rsplit
605
for x in _RANGE_100:
606
s1_rsplit(s2, 1)
607
608
609
#### Benchmark the operator-based methods
610
611
@bench('"A"*10', "repeat 1 character 10 times", 1000)
612
def repeat_single_10_times(STR):
613
s = STR("A")
614
for x in _RANGE_1000:
615
s * 10
616
617
@bench('"A"*1000', "repeat 1 character 1000 times", 1000)
618
def repeat_single_1000_times(STR):
619
s = STR("A")
620
for x in _RANGE_1000:
621
s * 1000
622
623
@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
624
def repeat_5_10_times(STR):
625
s = STR("ABCDE")
626
for x in _RANGE_1000:
627
s * 10
628
629
@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
630
def repeat_5_1000_times(STR):
631
s = STR("ABCDE")
632
for x in _RANGE_1000:
633
s * 1000
634
635
# + for concat
636
637
@bench('"Andrew"+"Dalke"', "concat two strings", 1000)
638
def concat_two_strings(STR):
639
s1 = STR("Andrew")
640
s2 = STR("Dalke")
641
for x in _RANGE_1000:
642
s1+s2
643
644
@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
645
1000)
646
def concat_many_strings(STR):
647
s1=STR('TIXSGYNREDCVBHJ')
648
s2=STR('PUMTLXBZVDO')
649
s3=STR('FVZNJ')
650
s4=STR('OGDXUW')
651
s5=STR('WEIMRNCOYVGHKB')
652
s6=STR('FCQTNMXPUZH')
653
s7=STR('TICZJYRLBNVUEAK')
654
s8=STR('REYB')
655
s9=STR('PWUOQ')
656
s10=STR('EQHCMKBS')
657
s11=STR('AEVDFOH')
658
s12=STR('IFHVD')
659
s13=STR('JGTCNLXWOHQ')
660
s14=STR('ITSKEPYLROZAWXF')
661
s15=STR('THEK')
662
s16=STR('GHPZFBUYCKMNJIT')
663
s17=STR('JMUZ')
664
s18=STR('WLZQMTB')
665
s19=STR('KPADCBW')
666
s20=STR('TNJHZQAGBU')
667
for x in _RANGE_1000:
668
(s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
669
s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
670
671
672
#### Benchmark join
673
674
def get_bytes_yielding_seq(STR, arg):
675
if STR is BYTES and sys.version_info >= (3,):
676
raise UnsupportedType
677
return STR(arg)
678
679
@bench('"A".join("")',
680
"join empty string, with 1 character sep", 100)
681
def join_empty_single(STR):
682
sep = STR("A")
683
s2 = get_bytes_yielding_seq(STR, "")
684
sep_join = sep.join
685
for x in _RANGE_100:
686
sep_join(s2)
687
688
@bench('"ABCDE".join("")',
689
"join empty string, with 5 character sep", 100)
690
def join_empty_5(STR):
691
sep = STR("ABCDE")
692
s2 = get_bytes_yielding_seq(STR, "")
693
sep_join = sep.join
694
for x in _RANGE_100:
695
sep_join(s2)
696
697
@bench('"A".join("ABC..Z")',
698
"join string with 26 characters, with 1 character sep", 1000)
699
def join_alphabet_single(STR):
700
sep = STR("A")
701
s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
702
sep_join = sep.join
703
for x in _RANGE_1000:
704
sep_join(s2)
705
706
@bench('"ABCDE".join("ABC..Z")',
707
"join string with 26 characters, with 5 character sep", 1000)
708
def join_alphabet_5(STR):
709
sep = STR("ABCDE")
710
s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
711
sep_join = sep.join
712
for x in _RANGE_1000:
713
sep_join(s2)
714
715
@bench('"A".join(list("ABC..Z"))',
716
"join list of 26 characters, with 1 character sep", 1000)
717
def join_alphabet_list_single(STR):
718
sep = STR("A")
719
s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
720
sep_join = sep.join
721
for x in _RANGE_1000:
722
sep_join(s2)
723
724
@bench('"ABCDE".join(list("ABC..Z"))',
725
"join list of 26 characters, with 5 character sep", 1000)
726
def join_alphabet_list_five(STR):
727
sep = STR("ABCDE")
728
s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
729
sep_join = sep.join
730
for x in _RANGE_1000:
731
sep_join(s2)
732
733
@bench('"A".join(["Bob"]*100)',
734
"join list of 100 words, with 1 character sep", 1000)
735
def join_100_words_single(STR):
736
sep = STR("A")
737
s2 = [STR("Bob")]*100
738
sep_join = sep.join
739
for x in _RANGE_1000:
740
sep_join(s2)
741
742
@bench('"ABCDE".join(["Bob"]*100))',
743
"join list of 100 words, with 5 character sep", 1000)
744
def join_100_words_5(STR):
745
sep = STR("ABCDE")
746
s2 = [STR("Bob")]*100
747
sep_join = sep.join
748
for x in _RANGE_1000:
749
sep_join(s2)
750
751
#### split tests
752
753
@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
754
def whitespace_split(STR):
755
s = STR("Here are some words. "*2)
756
s_split = s.split
757
for x in _RANGE_1000:
758
s_split()
759
760
@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
761
def whitespace_rsplit(STR):
762
s = STR("Here are some words. "*2)
763
s_rsplit = s.rsplit
764
for x in _RANGE_1000:
765
s_rsplit()
766
767
@bench('("Here are some words. "*2).split(None, 1)',
768
"split 1 whitespace", 1000)
769
def whitespace_split_1(STR):
770
s = STR("Here are some words. "*2)
771
s_split = s.split
772
N = None
773
for x in _RANGE_1000:
774
s_split(N, 1)
775
776
@bench('("Here are some words. "*2).rsplit(None, 1)',
777
"split 1 whitespace", 1000)
778
def whitespace_rsplit_1(STR):
779
s = STR("Here are some words. "*2)
780
s_rsplit = s.rsplit
781
N = None
782
for x in _RANGE_1000:
783
s_rsplit(N, 1)
784
785
@bench('("Here are some words. "*2).partition(" ")',
786
"split 1 whitespace", 1000)
787
def whitespace_partition(STR):
788
sep = STR(" ")
789
s = STR("Here are some words. "*2)
790
s_partition = s.partition
791
for x in _RANGE_1000:
792
s_partition(sep)
793
794
@bench('("Here are some words. "*2).rpartition(" ")',
795
"split 1 whitespace", 1000)
796
def whitespace_rpartition(STR):
797
sep = STR(" ")
798
s = STR("Here are some words. "*2)
799
s_rpartition = s.rpartition
800
for x in _RANGE_1000:
801
s_rpartition(sep)
802
803
human_text = """\
804
Python is a dynamic object-oriented programming language that can be
805
used for many kinds of software development. It offers strong support
806
for integration with other languages and tools, comes with extensive
807
standard libraries, and can be learned in a few days. Many Python
808
programmers report substantial productivity gains and feel the language
809
encourages the development of higher quality, more maintainable code.
810
811
Python runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm
812
Handhelds, and Nokia mobile phones. Python has also been ported to the
813
Java and .NET virtual machines.
814
815
Python is distributed under an OSI-approved open source license that
816
makes it free to use, even for commercial products.
817
"""*25
818
human_text_bytes = bytes_from_str(human_text)
819
human_text_unicode = unicode_from_str(human_text)
820
def _get_human_text(STR):
821
if STR is UNICODE:
822
return human_text_unicode
823
if STR is BYTES:
824
return human_text_bytes
825
raise AssertionError
826
827
@bench('human_text.split()', "split whitespace (huge)", 10)
828
def whitespace_split_huge(STR):
829
s = _get_human_text(STR)
830
s_split = s.split
831
for x in _RANGE_10:
832
s_split()
833
834
@bench('human_text.rsplit()', "split whitespace (huge)", 10)
835
def whitespace_rsplit_huge(STR):
836
s = _get_human_text(STR)
837
s_rsplit = s.rsplit
838
for x in _RANGE_10:
839
s_rsplit()
840
841
842
843
@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
844
def newlines_split(STR):
845
s = STR("this\nis\na\ntest\n")
846
s_split = s.split
847
nl = STR("\n")
848
for x in _RANGE_1000:
849
s_split(nl)
850
851
852
@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
853
def newlines_rsplit(STR):
854
s = STR("this\nis\na\ntest\n")
855
s_rsplit = s.rsplit
856
nl = STR("\n")
857
for x in _RANGE_1000:
858
s_rsplit(nl)
859
860
@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
861
def newlines_splitlines(STR):
862
s = STR("this\nis\na\ntest\n")
863
s_splitlines = s.splitlines
864
for x in _RANGE_1000:
865
s_splitlines()
866
867
## split text with 2000 newlines
868
869
def _make_2000_lines():
870
import random
871
r = random.Random(100)
872
chars = list(map(chr, range(32, 128)))
873
i = 0
874
while i < len(chars):
875
chars[i] = " "
876
i += r.randrange(9)
877
s = "".join(chars)
878
s = s*4
879
words = []
880
for i in range(2000):
881
start = r.randrange(96)
882
n = r.randint(5, 65)
883
words.append(s[start:start+n])
884
return "\n".join(words)+"\n"
885
886
_text_with_2000_lines = _make_2000_lines()
887
_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
888
_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
889
def _get_2000_lines(STR):
890
if STR is UNICODE:
891
return _text_with_2000_lines_unicode
892
if STR is BYTES:
893
return _text_with_2000_lines_bytes
894
raise AssertionError
895
896
897
@bench('"...text...".split("\\n")', "split 2000 newlines", 10)
898
def newlines_split_2000(STR):
899
s = _get_2000_lines(STR)
900
s_split = s.split
901
nl = STR("\n")
902
for x in _RANGE_10:
903
s_split(nl)
904
905
@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
906
def newlines_rsplit_2000(STR):
907
s = _get_2000_lines(STR)
908
s_rsplit = s.rsplit
909
nl = STR("\n")
910
for x in _RANGE_10:
911
s_rsplit(nl)
912
913
@bench('"...text...".splitlines()', "split 2000 newlines", 10)
914
def newlines_splitlines_2000(STR):
915
s = _get_2000_lines(STR)
916
s_splitlines = s.splitlines
917
for x in _RANGE_10:
918
s_splitlines()
919
920
921
## split text on "--" characters
922
@bench(
923
'"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
924
"split on multicharacter separator (small)", 1000)
925
def split_multichar_sep_small(STR):
926
s = STR("this--is--a--test--of--the--emergency--broadcast--system")
927
s_split = s.split
928
pat = STR("--")
929
for x in _RANGE_1000:
930
s_split(pat)
931
@bench(
932
'"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
933
"split on multicharacter separator (small)", 1000)
934
def rsplit_multichar_sep_small(STR):
935
s = STR("this--is--a--test--of--the--emergency--broadcast--system")
936
s_rsplit = s.rsplit
937
pat = STR("--")
938
for x in _RANGE_1000:
939
s_rsplit(pat)
940
941
## split dna text on "ACTAT" characters
942
@bench('dna.split("ACTAT")',
943
"split on multicharacter separator (dna)", 10)
944
def split_multichar_sep_dna(STR):
945
s = _get_dna(STR)
946
s_split = s.split
947
pat = STR("ACTAT")
948
for x in _RANGE_10:
949
s_split(pat)
950
951
@bench('dna.rsplit("ACTAT")',
952
"split on multicharacter separator (dna)", 10)
953
def rsplit_multichar_sep_dna(STR):
954
s = _get_dna(STR)
955
s_rsplit = s.rsplit
956
pat = STR("ACTAT")
957
for x in _RANGE_10:
958
s_rsplit(pat)
959
960
961
962
## split with limits
963
964
GFF3_example = "\t".join([
965
"I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
966
"ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
967
968
@bench('GFF3_example.split("\\t")', "tab split", 1000)
969
def tab_split_no_limit(STR):
970
sep = STR("\t")
971
s = STR(GFF3_example)
972
s_split = s.split
973
for x in _RANGE_1000:
974
s_split(sep)
975
976
@bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
977
def tab_split_limit(STR):
978
sep = STR("\t")
979
s = STR(GFF3_example)
980
s_split = s.split
981
for x in _RANGE_1000:
982
s_split(sep, 8)
983
984
@bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
985
def tab_rsplit_no_limit(STR):
986
sep = STR("\t")
987
s = STR(GFF3_example)
988
s_rsplit = s.rsplit
989
for x in _RANGE_1000:
990
s_rsplit(sep)
991
992
@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
993
def tab_rsplit_limit(STR):
994
sep = STR("\t")
995
s = STR(GFF3_example)
996
s_rsplit = s.rsplit
997
for x in _RANGE_1000:
998
s_rsplit(sep, 8)
999
1000
#### Count characters
1001
1002
@bench('...text.with.2000.newlines.count("\\n")',
1003
"count newlines", 10)
1004
def count_newlines(STR):
1005
s = _get_2000_lines(STR)
1006
s_count = s.count
1007
nl = STR("\n")
1008
for x in _RANGE_10:
1009
s_count(nl)
1010
1011
# Orchid sequences concatenated, from Biopython
1012
_dna = """
1013
CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
1014
AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
1015
AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
1016
TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
1017
AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
1018
TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
1019
CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
1020
TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
1021
GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
1022
TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
1023
GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
1024
ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
1025
CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
1026
ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
1027
ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
1028
TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
1029
CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
1030
GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
1031
ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
1032
ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
1033
ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
1034
GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
1035
TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
1036
TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
1037
TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
1038
GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
1039
GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
1040
AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
1041
GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
1042
TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
1043
CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
1044
TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
1045
TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
1046
AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
1047
GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
1048
GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
1049
CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
1050
GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
1051
TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
1052
ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
1053
GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
1054
AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
1055
AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
1056
ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
1057
GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
1058
GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
1059
AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
1060
GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
1061
ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
1062
GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
1063
"""
1064
_dna = "".join(_dna.splitlines())
1065
_dna = _dna * 25
1066
_dna_bytes = bytes_from_str(_dna)
1067
_dna_unicode = unicode_from_str(_dna)
1068
1069
def _get_dna(STR):
1070
if STR is UNICODE:
1071
return _dna_unicode
1072
if STR is BYTES:
1073
return _dna_bytes
1074
raise AssertionError
1075
1076
@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
1077
def count_aact(STR):
1078
seq = _get_dna(STR)
1079
seq_count = seq.count
1080
needle = STR("AACT")
1081
for x in _RANGE_10:
1082
seq_count(needle)
1083
1084
##### startswith and endswith
1085
1086
@bench('"Andrew".startswith("A")', 'startswith single character', 1000)
1087
def startswith_single(STR):
1088
s1 = STR("Andrew")
1089
s2 = STR("A")
1090
s1_startswith = s1.startswith
1091
for x in _RANGE_1000:
1092
s1_startswith(s2)
1093
1094
@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
1095
1000)
1096
def startswith_multiple(STR):
1097
s1 = STR("Andrew")
1098
s2 = STR("Andrew")
1099
s1_startswith = s1.startswith
1100
for x in _RANGE_1000:
1101
s1_startswith(s2)
1102
1103
@bench('"Andrew".startswith("Anders")',
1104
'startswith multiple characters - not!', 1000)
1105
def startswith_multiple_not(STR):
1106
s1 = STR("Andrew")
1107
s2 = STR("Anders")
1108
s1_startswith = s1.startswith
1109
for x in _RANGE_1000:
1110
s1_startswith(s2)
1111
1112
1113
# endswith
1114
1115
@bench('"Andrew".endswith("w")', 'endswith single character', 1000)
1116
def endswith_single(STR):
1117
s1 = STR("Andrew")
1118
s2 = STR("w")
1119
s1_endswith = s1.endswith
1120
for x in _RANGE_1000:
1121
s1_endswith(s2)
1122
1123
@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
1124
def endswith_multiple(STR):
1125
s1 = STR("Andrew")
1126
s2 = STR("Andrew")
1127
s1_endswith = s1.endswith
1128
for x in _RANGE_1000:
1129
s1_endswith(s2)
1130
1131
@bench('"Andrew".endswith("Anders")',
1132
'endswith multiple characters - not!', 1000)
1133
def endswith_multiple_not(STR):
1134
s1 = STR("Andrew")
1135
s2 = STR("Anders")
1136
s1_endswith = s1.endswith
1137
for x in _RANGE_1000:
1138
s1_endswith(s2)
1139
1140
#### Strip
1141
1142
@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
1143
def terminal_newline_strip_right(STR):
1144
s = STR("Hello!\n")
1145
s_strip = s.strip
1146
for x in _RANGE_1000:
1147
s_strip()
1148
1149
@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
1150
def terminal_newline_rstrip(STR):
1151
s = STR("Hello!\n")
1152
s_rstrip = s.rstrip
1153
for x in _RANGE_1000:
1154
s_rstrip()
1155
1156
@bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
1157
def terminal_newline_strip_left(STR):
1158
s = STR("\nHello!")
1159
s_strip = s.strip
1160
for x in _RANGE_1000:
1161
s_strip()
1162
1163
@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
1164
def terminal_newline_strip_both(STR):
1165
s = STR("\nHello!\n")
1166
s_strip = s.strip
1167
for x in _RANGE_1000:
1168
s_strip()
1169
1170
@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
1171
def terminal_newline_lstrip(STR):
1172
s = STR("\nHello!")
1173
s_lstrip = s.lstrip
1174
for x in _RANGE_1000:
1175
s_lstrip()
1176
1177
@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
1178
'strip terminal newline', 1000)
1179
def terminal_newline_if_else(STR):
1180
s = STR("Hello!\n")
1181
NL = STR("\n")
1182
for x in _RANGE_1000:
1183
s[:-1] if (s[-1] == NL) else s
1184
1185
1186
# Strip multiple spaces or tabs
1187
1188
@bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000)
1189
def terminal_space_strip(STR):
1190
s = STR("Hello\t \t!")
1191
s_strip = s.strip
1192
for x in _RANGE_1000:
1193
s_strip()
1194
1195
@bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
1196
def terminal_space_rstrip(STR):
1197
s = STR("Hello!\t \t")
1198
s_rstrip = s.rstrip
1199
for x in _RANGE_1000:
1200
s_rstrip()
1201
1202
@bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
1203
def terminal_space_lstrip(STR):
1204
s = STR("\t \tHello!")
1205
s_lstrip = s.lstrip
1206
for x in _RANGE_1000:
1207
s_lstrip()
1208
1209
1210
#### replace
1211
@bench('"This is a test".replace(" ", "\\t")', 'replace single character',
1212
1000)
1213
def replace_single_character(STR):
1214
s = STR("This is a test!")
1215
from_str = STR(" ")
1216
to_str = STR("\t")
1217
s_replace = s.replace
1218
for x in _RANGE_1000:
1219
s_replace(from_str, to_str)
1220
1221
@uses_re
1222
@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
1223
1000)
1224
def replace_single_character_re(STR):
1225
s = STR("This is a test!")
1226
pat = re.compile(STR(" "))
1227
to_str = STR("\t")
1228
pat_sub = pat.sub
1229
for x in _RANGE_1000:
1230
pat_sub(to_str, s)
1231
1232
@bench('"...text.with.2000.lines...replace("\\n", " ")',
1233
'replace single character, big string', 10)
1234
def replace_single_character_big(STR):
1235
s = _get_2000_lines(STR)
1236
from_str = STR("\n")
1237
to_str = STR(" ")
1238
s_replace = s.replace
1239
for x in _RANGE_10:
1240
s_replace(from_str, to_str)
1241
1242
@uses_re
1243
@bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
1244
'replace single character, big string', 10)
1245
def replace_single_character_big_re(STR):
1246
s = _get_2000_lines(STR)
1247
pat = re.compile(STR("\n"))
1248
to_str = STR(" ")
1249
pat_sub = pat.sub
1250
for x in _RANGE_10:
1251
pat_sub(to_str, s)
1252
1253
1254
@bench('dna.replace("ATC", "ATT")',
1255
'replace multiple characters, dna', 10)
1256
def replace_multiple_characters_dna(STR):
1257
seq = _get_dna(STR)
1258
from_str = STR("ATC")
1259
to_str = STR("ATT")
1260
seq_replace = seq.replace
1261
for x in _RANGE_10:
1262
seq_replace(from_str, to_str)
1263
1264
# This increases the character count
1265
@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
1266
'replace and expand multiple characters, big string', 10)
1267
def replace_multiple_character_big(STR):
1268
s = _get_2000_lines(STR)
1269
from_str = STR("\n")
1270
to_str = STR("\r\n")
1271
s_replace = s.replace
1272
for x in _RANGE_10:
1273
s_replace(from_str, to_str)
1274
1275
1276
# This decreases the character count
1277
@bench('"When shall we three meet again?".replace("ee", "")',
1278
'replace/remove multiple characters', 1000)
1279
def replace_multiple_character_remove(STR):
1280
s = STR("When shall we three meet again?")
1281
from_str = STR("ee")
1282
to_str = STR("")
1283
s_replace = s.replace
1284
for x in _RANGE_1000:
1285
s_replace(from_str, to_str)
1286
1287
1288
big_s = "A" + ("Z"*128*1024)
1289
big_s_bytes = bytes_from_str(big_s)
1290
big_s_unicode = unicode_from_str(big_s)
1291
def _get_big_s(STR):
1292
if STR is UNICODE: return big_s_unicode
1293
if STR is BYTES: return big_s_bytes
1294
raise AssertionError
1295
1296
# The older replace implementation counted all matches in
1297
# the string even when it only needed to make one replacement.
1298
@bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
1299
'quick replace single character match', 10)
1300
def quick_replace_single_match(STR):
1301
s = _get_big_s(STR)
1302
from_str = STR("A")
1303
to_str = STR("BB")
1304
s_replace = s.replace
1305
for x in _RANGE_10:
1306
s_replace(from_str, to_str, 1)
1307
1308
@bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
1309
'quick replace multiple character match', 10)
1310
def quick_replace_multiple_match(STR):
1311
s = _get_big_s(STR)
1312
from_str = STR("AZZ")
1313
to_str = STR("BBZZ")
1314
s_replace = s.replace
1315
for x in _RANGE_10:
1316
s_replace(from_str, to_str, 1)
1317
1318
1319
####
1320
1321
# CCP does a lot of this, for internationalisation of ingame messages.
1322
_format = "The %(thing)s is %(place)s the %(location)s."
1323
_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
1324
_format_bytes = bytes_from_str(_format)
1325
_format_unicode = unicode_from_str(_format)
1326
_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
1327
_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
1328
1329
def _get_format(STR):
1330
if STR is UNICODE:
1331
return _format_unicode
1332
if STR is BYTES:
1333
if sys.version_info >= (3,):
1334
raise UnsupportedType
1335
return _format_bytes
1336
raise AssertionError
1337
1338
def _get_format_dict(STR):
1339
if STR is UNICODE:
1340
return _format_dict_unicode
1341
if STR is BYTES:
1342
if sys.version_info >= (3,):
1343
raise UnsupportedType
1344
return _format_dict_bytes
1345
raise AssertionError
1346
1347
# Formatting.
1348
@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
1349
'formatting a string type with a dict', 1000)
1350
def format_with_dict(STR):
1351
s = _get_format(STR)
1352
d = _get_format_dict(STR)
1353
for x in _RANGE_1000:
1354
s % d
1355
1356
1357
#### Upper- and lower- case conversion
1358
1359
@bench('("Where in the world is Carmen San Deigo?"*10).lower()',
1360
"case conversion -- rare", 1000)
1361
def lower_conversion_rare(STR):
1362
s = STR("Where in the world is Carmen San Deigo?"*10)
1363
s_lower = s.lower
1364
for x in _RANGE_1000:
1365
s_lower()
1366
1367
@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
1368
"case conversion -- dense", 1000)
1369
def lower_conversion_dense(STR):
1370
s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
1371
s_lower = s.lower
1372
for x in _RANGE_1000:
1373
s_lower()
1374
1375
1376
@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
1377
"case conversion -- rare", 1000)
1378
def upper_conversion_rare(STR):
1379
s = STR("Where in the world is Carmen San Deigo?"*10)
1380
s_upper = s.upper
1381
for x in _RANGE_1000:
1382
s_upper()
1383
1384
@bench('("where in the world is carmen san deigo?"*10).upper()',
1385
"case conversion -- dense", 1000)
1386
def upper_conversion_dense(STR):
1387
s = STR("where in the world is carmen san deigo?"*10)
1388
s_upper = s.upper
1389
for x in _RANGE_1000:
1390
s_upper()
1391
1392
1393
# end of benchmarks
1394
1395
#################
1396
1397
class BenchTimer(timeit.Timer):
1398
def best(self, repeat=1):
1399
for i in range(1, 10):
1400
number = 10**i
1401
x = self.timeit(number)
1402
if x > 0.02:
1403
break
1404
times = [x]
1405
for i in range(1, repeat):
1406
times.append(self.timeit(number))
1407
return min(times) / number
1408
1409
def main():
1410
(options, test_names) = parser.parse_args()
1411
if options.bytes_only and options.unicode_only:
1412
raise SystemExit("Only one of --8-bit and --unicode are allowed")
1413
1414
bench_functions = []
1415
for (k,v) in globals().items():
1416
if hasattr(v, "is_bench"):
1417
if test_names:
1418
for name in test_names:
1419
if name in v.group:
1420
break
1421
else:
1422
# Not selected, ignore
1423
continue
1424
if options.skip_re and hasattr(v, "uses_re"):
1425
continue
1426
1427
bench_functions.append( (v.group, k, v) )
1428
bench_functions.sort()
1429
1430
p("bytes\tunicode")
1431
p("(in ms)\t(in ms)\t%\tcomment")
1432
1433
bytes_total = uni_total = 0.0
1434
1435
for title, group in itertools.groupby(bench_functions,
1436
operator.itemgetter(0)):
1437
# Flush buffer before each group
1438
sys.stdout.flush()
1439
p("="*10, title)
1440
for (_, k, v) in group:
1441
if hasattr(v, "is_bench"):
1442
bytes_time = 0.0
1443
bytes_time_s = " - "
1444
if not options.unicode_only:
1445
try:
1446
bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
1447
"import __main__").best(REPEAT)
1448
bytes_time_s = "%.2f" % (1000 * bytes_time)
1449
bytes_total += bytes_time
1450
except UnsupportedType:
1451
bytes_time_s = "N/A"
1452
uni_time = 0.0
1453
uni_time_s = " - "
1454
if not options.bytes_only:
1455
try:
1456
uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
1457
"import __main__").best(REPEAT)
1458
uni_time_s = "%.2f" % (1000 * uni_time)
1459
uni_total += uni_time
1460
except UnsupportedType:
1461
uni_time_s = "N/A"
1462
try:
1463
average = bytes_time/uni_time
1464
except (TypeError, ZeroDivisionError):
1465
average = 0.0
1466
p("%s\t%s\t%.1f\t%s (*%d)" % (
1467
bytes_time_s, uni_time_s, 100.*average,
1468
v.comment, v.repeat_count))
1469
1470
if bytes_total == uni_total == 0.0:
1471
p("That was zippy!")
1472
else:
1473
try:
1474
ratio = bytes_total/uni_total
1475
except ZeroDivisionError:
1476
ratio = 0.0
1477
p("%.2f\t%.2f\t%.1f\t%s" % (
1478
1000*bytes_total, 1000*uni_total, 100.*ratio,
1479
"TOTAL"))
1480
1481
if __name__ == "__main__":
1482
main()
1483
1484