Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Objects/setobject.c
12 views
1
2
/* set object implementation
3
4
Written and maintained by Raymond D. Hettinger <[email protected]>
5
Derived from Lib/sets.py and Objects/dictobject.c.
6
7
The basic lookup function used by all operations.
8
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10
The initial probe index is computed as hash mod the table size.
11
Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13
To improve cache locality, each probe inspects a series of consecutive
14
nearby entries before moving on to probes elsewhere in memory. This leaves
15
us with a hybrid of linear probing and randomized probing. The linear probing
16
reduces the cost of hash collisions because consecutive memory accesses
17
tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18
we then use more of the upper bits from the hash value and apply a simple
19
linear congruential random number generator. This helps break-up long
20
chains of collisions.
21
22
All arithmetic on hash should ignore overflow.
23
24
Unlike the dictionary implementation, the lookkey function can return
25
NULL if the rich comparison returns an error.
26
27
Use cases for sets differ considerably from dictionaries where looked-up
28
keys are more likely to be present. In contrast, sets are primarily
29
about membership testing where the presence of an element is not known in
30
advance. Accordingly, the set implementation needs to optimize for both
31
the found and not-found case.
32
*/
33
34
#include "Python.h"
35
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
36
#include <stddef.h> // offsetof()
37
38
/* Object used as dummy key to fill deleted entries */
39
static PyObject _dummy_struct;
40
41
#define dummy (&_dummy_struct)
42
43
44
/* ======================================================================== */
45
/* ======= Begin logic for probing the hash table ========================= */
46
47
/* Set this to zero to turn-off linear probing */
48
#ifndef LINEAR_PROBES
49
#define LINEAR_PROBES 9
50
#endif
51
52
/* This must be >= 1 */
53
#define PERTURB_SHIFT 5
54
55
static setentry *
56
set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57
{
58
setentry *table;
59
setentry *entry;
60
size_t perturb = hash;
61
size_t mask = so->mask;
62
size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63
int probes;
64
int cmp;
65
66
while (1) {
67
entry = &so->table[i];
68
probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69
do {
70
if (entry->hash == 0 && entry->key == NULL)
71
return entry;
72
if (entry->hash == hash) {
73
PyObject *startkey = entry->key;
74
assert(startkey != dummy);
75
if (startkey == key)
76
return entry;
77
if (PyUnicode_CheckExact(startkey)
78
&& PyUnicode_CheckExact(key)
79
&& _PyUnicode_EQ(startkey, key))
80
return entry;
81
table = so->table;
82
Py_INCREF(startkey);
83
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84
Py_DECREF(startkey);
85
if (cmp < 0)
86
return NULL;
87
if (table != so->table || entry->key != startkey)
88
return set_lookkey(so, key, hash);
89
if (cmp > 0)
90
return entry;
91
mask = so->mask;
92
}
93
entry++;
94
} while (probes--);
95
perturb >>= PERTURB_SHIFT;
96
i = (i * 5 + 1 + perturb) & mask;
97
}
98
}
99
100
static int set_table_resize(PySetObject *, Py_ssize_t);
101
102
static int
103
set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104
{
105
setentry *table;
106
setentry *freeslot;
107
setentry *entry;
108
size_t perturb;
109
size_t mask;
110
size_t i; /* Unsigned for defined overflow behavior */
111
int probes;
112
int cmp;
113
114
/* Pre-increment is necessary to prevent arbitrary code in the rich
115
comparison from deallocating the key just before the insertion. */
116
Py_INCREF(key);
117
118
restart:
119
120
mask = so->mask;
121
i = (size_t)hash & mask;
122
freeslot = NULL;
123
perturb = hash;
124
125
while (1) {
126
entry = &so->table[i];
127
probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
128
do {
129
if (entry->hash == 0 && entry->key == NULL)
130
goto found_unused_or_dummy;
131
if (entry->hash == hash) {
132
PyObject *startkey = entry->key;
133
assert(startkey != dummy);
134
if (startkey == key)
135
goto found_active;
136
if (PyUnicode_CheckExact(startkey)
137
&& PyUnicode_CheckExact(key)
138
&& _PyUnicode_EQ(startkey, key))
139
goto found_active;
140
table = so->table;
141
Py_INCREF(startkey);
142
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
143
Py_DECREF(startkey);
144
if (cmp > 0)
145
goto found_active;
146
if (cmp < 0)
147
goto comparison_error;
148
if (table != so->table || entry->key != startkey)
149
goto restart;
150
mask = so->mask;
151
}
152
else if (entry->hash == -1) {
153
assert (entry->key == dummy);
154
freeslot = entry;
155
}
156
entry++;
157
} while (probes--);
158
perturb >>= PERTURB_SHIFT;
159
i = (i * 5 + 1 + perturb) & mask;
160
}
161
162
found_unused_or_dummy:
163
if (freeslot == NULL)
164
goto found_unused;
165
so->used++;
166
freeslot->key = key;
167
freeslot->hash = hash;
168
return 0;
169
170
found_unused:
171
so->fill++;
172
so->used++;
173
entry->key = key;
174
entry->hash = hash;
175
if ((size_t)so->fill*5 < mask*3)
176
return 0;
177
return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
178
179
found_active:
180
Py_DECREF(key);
181
return 0;
182
183
comparison_error:
184
Py_DECREF(key);
185
return -1;
186
}
187
188
/*
189
Internal routine used by set_table_resize() to insert an item which is
190
known to be absent from the set. Besides the performance benefit,
191
there is also safety benefit since using set_add_entry() risks making
192
a callback in the middle of a set_table_resize(), see issue 1456209.
193
The caller is responsible for updating the key's reference count and
194
the setobject's fill and used fields.
195
*/
196
static void
197
set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
198
{
199
setentry *entry;
200
size_t perturb = hash;
201
size_t i = (size_t)hash & mask;
202
size_t j;
203
204
while (1) {
205
entry = &table[i];
206
if (entry->key == NULL)
207
goto found_null;
208
if (i + LINEAR_PROBES <= mask) {
209
for (j = 0; j < LINEAR_PROBES; j++) {
210
entry++;
211
if (entry->key == NULL)
212
goto found_null;
213
}
214
}
215
perturb >>= PERTURB_SHIFT;
216
i = (i * 5 + 1 + perturb) & mask;
217
}
218
found_null:
219
entry->key = key;
220
entry->hash = hash;
221
}
222
223
/* ======== End logic for probing the hash table ========================== */
224
/* ======================================================================== */
225
226
/*
227
Restructure the table by allocating a new table and reinserting all
228
keys again. When entries have been deleted, the new table may
229
actually be smaller than the old one.
230
*/
231
static int
232
set_table_resize(PySetObject *so, Py_ssize_t minused)
233
{
234
setentry *oldtable, *newtable, *entry;
235
Py_ssize_t oldmask = so->mask;
236
size_t newmask;
237
int is_oldtable_malloced;
238
setentry small_copy[PySet_MINSIZE];
239
240
assert(minused >= 0);
241
242
/* Find the smallest table size > minused. */
243
/* XXX speed-up with intrinsics */
244
size_t newsize = PySet_MINSIZE;
245
while (newsize <= (size_t)minused) {
246
newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
247
}
248
249
/* Get space for a new table. */
250
oldtable = so->table;
251
assert(oldtable != NULL);
252
is_oldtable_malloced = oldtable != so->smalltable;
253
254
if (newsize == PySet_MINSIZE) {
255
/* A large table is shrinking, or we can't get any smaller. */
256
newtable = so->smalltable;
257
if (newtable == oldtable) {
258
if (so->fill == so->used) {
259
/* No dummies, so no point doing anything. */
260
return 0;
261
}
262
/* We're not going to resize it, but rebuild the
263
table anyway to purge old dummy entries.
264
Subtle: This is *necessary* if fill==size,
265
as set_lookkey needs at least one virgin slot to
266
terminate failing searches. If fill < size, it's
267
merely desirable, as dummies slow searches. */
268
assert(so->fill > so->used);
269
memcpy(small_copy, oldtable, sizeof(small_copy));
270
oldtable = small_copy;
271
}
272
}
273
else {
274
newtable = PyMem_NEW(setentry, newsize);
275
if (newtable == NULL) {
276
PyErr_NoMemory();
277
return -1;
278
}
279
}
280
281
/* Make the set empty, using the new table. */
282
assert(newtable != oldtable);
283
memset(newtable, 0, sizeof(setentry) * newsize);
284
so->mask = newsize - 1;
285
so->table = newtable;
286
287
/* Copy the data over; this is refcount-neutral for active entries;
288
dummy entries aren't copied over, of course */
289
newmask = (size_t)so->mask;
290
if (so->fill == so->used) {
291
for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
292
if (entry->key != NULL) {
293
set_insert_clean(newtable, newmask, entry->key, entry->hash);
294
}
295
}
296
} else {
297
so->fill = so->used;
298
for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
299
if (entry->key != NULL && entry->key != dummy) {
300
set_insert_clean(newtable, newmask, entry->key, entry->hash);
301
}
302
}
303
}
304
305
if (is_oldtable_malloced)
306
PyMem_Free(oldtable);
307
return 0;
308
}
309
310
static int
311
set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312
{
313
setentry *entry;
314
315
entry = set_lookkey(so, key, hash);
316
if (entry != NULL)
317
return entry->key != NULL;
318
return -1;
319
}
320
321
#define DISCARD_NOTFOUND 0
322
#define DISCARD_FOUND 1
323
324
static int
325
set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
326
{
327
setentry *entry;
328
PyObject *old_key;
329
330
entry = set_lookkey(so, key, hash);
331
if (entry == NULL)
332
return -1;
333
if (entry->key == NULL)
334
return DISCARD_NOTFOUND;
335
old_key = entry->key;
336
entry->key = dummy;
337
entry->hash = -1;
338
so->used--;
339
Py_DECREF(old_key);
340
return DISCARD_FOUND;
341
}
342
343
static int
344
set_add_key(PySetObject *so, PyObject *key)
345
{
346
Py_hash_t hash;
347
348
if (!PyUnicode_CheckExact(key) ||
349
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
350
hash = PyObject_Hash(key);
351
if (hash == -1)
352
return -1;
353
}
354
return set_add_entry(so, key, hash);
355
}
356
357
static int
358
set_contains_key(PySetObject *so, PyObject *key)
359
{
360
Py_hash_t hash;
361
362
if (!PyUnicode_CheckExact(key) ||
363
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
364
hash = PyObject_Hash(key);
365
if (hash == -1)
366
return -1;
367
}
368
return set_contains_entry(so, key, hash);
369
}
370
371
static int
372
set_discard_key(PySetObject *so, PyObject *key)
373
{
374
Py_hash_t hash;
375
376
if (!PyUnicode_CheckExact(key) ||
377
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
378
hash = PyObject_Hash(key);
379
if (hash == -1)
380
return -1;
381
}
382
return set_discard_entry(so, key, hash);
383
}
384
385
static void
386
set_empty_to_minsize(PySetObject *so)
387
{
388
memset(so->smalltable, 0, sizeof(so->smalltable));
389
so->fill = 0;
390
so->used = 0;
391
so->mask = PySet_MINSIZE - 1;
392
so->table = so->smalltable;
393
so->hash = -1;
394
}
395
396
static int
397
set_clear_internal(PySetObject *so)
398
{
399
setentry *entry;
400
setentry *table = so->table;
401
Py_ssize_t fill = so->fill;
402
Py_ssize_t used = so->used;
403
int table_is_malloced = table != so->smalltable;
404
setentry small_copy[PySet_MINSIZE];
405
406
assert (PyAnySet_Check(so));
407
assert(table != NULL);
408
409
/* This is delicate. During the process of clearing the set,
410
* decrefs can cause the set to mutate. To avoid fatal confusion
411
* (voice of experience), we have to make the set empty before
412
* clearing the slots, and never refer to anything via so->ref while
413
* clearing.
414
*/
415
if (table_is_malloced)
416
set_empty_to_minsize(so);
417
418
else if (fill > 0) {
419
/* It's a small table with something that needs to be cleared.
420
* Afraid the only safe way is to copy the set entries into
421
* another small table first.
422
*/
423
memcpy(small_copy, table, sizeof(small_copy));
424
table = small_copy;
425
set_empty_to_minsize(so);
426
}
427
/* else it's a small table that's already empty */
428
429
/* Now we can finally clear things. If C had refcounts, we could
430
* assert that the refcount on table is 1 now, i.e. that this function
431
* has unique access to it, so decref side-effects can't alter it.
432
*/
433
for (entry = table; used > 0; entry++) {
434
if (entry->key && entry->key != dummy) {
435
used--;
436
Py_DECREF(entry->key);
437
}
438
}
439
440
if (table_is_malloced)
441
PyMem_Free(table);
442
return 0;
443
}
444
445
/*
446
* Iterate over a set table. Use like so:
447
*
448
* Py_ssize_t pos;
449
* setentry *entry;
450
* pos = 0; # important! pos should not otherwise be changed by you
451
* while (set_next(yourset, &pos, &entry)) {
452
* Refer to borrowed reference in entry->key.
453
* }
454
*
455
* CAUTION: In general, it isn't safe to use set_next in a loop that
456
* mutates the table.
457
*/
458
static int
459
set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
460
{
461
Py_ssize_t i;
462
Py_ssize_t mask;
463
setentry *entry;
464
465
assert (PyAnySet_Check(so));
466
i = *pos_ptr;
467
assert(i >= 0);
468
mask = so->mask;
469
entry = &so->table[i];
470
while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
471
i++;
472
entry++;
473
}
474
*pos_ptr = i+1;
475
if (i > mask)
476
return 0;
477
assert(entry != NULL);
478
*entry_ptr = entry;
479
return 1;
480
}
481
482
static void
483
set_dealloc(PySetObject *so)
484
{
485
setentry *entry;
486
Py_ssize_t used = so->used;
487
488
/* bpo-31095: UnTrack is needed before calling any callbacks */
489
PyObject_GC_UnTrack(so);
490
Py_TRASHCAN_BEGIN(so, set_dealloc)
491
if (so->weakreflist != NULL)
492
PyObject_ClearWeakRefs((PyObject *) so);
493
494
for (entry = so->table; used > 0; entry++) {
495
if (entry->key && entry->key != dummy) {
496
used--;
497
Py_DECREF(entry->key);
498
}
499
}
500
if (so->table != so->smalltable)
501
PyMem_Free(so->table);
502
Py_TYPE(so)->tp_free(so);
503
Py_TRASHCAN_END
504
}
505
506
static PyObject *
507
set_repr(PySetObject *so)
508
{
509
PyObject *result=NULL, *keys, *listrepr, *tmp;
510
int status = Py_ReprEnter((PyObject*)so);
511
512
if (status != 0) {
513
if (status < 0)
514
return NULL;
515
return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
516
}
517
518
/* shortcut for the empty set */
519
if (!so->used) {
520
Py_ReprLeave((PyObject*)so);
521
return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
522
}
523
524
keys = PySequence_List((PyObject *)so);
525
if (keys == NULL)
526
goto done;
527
528
/* repr(keys)[1:-1] */
529
listrepr = PyObject_Repr(keys);
530
Py_DECREF(keys);
531
if (listrepr == NULL)
532
goto done;
533
tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
534
Py_DECREF(listrepr);
535
if (tmp == NULL)
536
goto done;
537
listrepr = tmp;
538
539
if (!PySet_CheckExact(so))
540
result = PyUnicode_FromFormat("%s({%U})",
541
Py_TYPE(so)->tp_name,
542
listrepr);
543
else
544
result = PyUnicode_FromFormat("{%U}", listrepr);
545
Py_DECREF(listrepr);
546
done:
547
Py_ReprLeave((PyObject*)so);
548
return result;
549
}
550
551
static Py_ssize_t
552
set_len(PyObject *so)
553
{
554
return ((PySetObject *)so)->used;
555
}
556
557
static int
558
set_merge(PySetObject *so, PyObject *otherset)
559
{
560
PySetObject *other;
561
PyObject *key;
562
Py_ssize_t i;
563
setentry *so_entry;
564
setentry *other_entry;
565
566
assert (PyAnySet_Check(so));
567
assert (PyAnySet_Check(otherset));
568
569
other = (PySetObject*)otherset;
570
if (other == so || other->used == 0)
571
/* a.update(a) or a.update(set()); nothing to do */
572
return 0;
573
/* Do one big resize at the start, rather than
574
* incrementally resizing as we insert new keys. Expect
575
* that there will be no (or few) overlapping keys.
576
*/
577
if ((so->fill + other->used)*5 >= so->mask*3) {
578
if (set_table_resize(so, (so->used + other->used)*2) != 0)
579
return -1;
580
}
581
so_entry = so->table;
582
other_entry = other->table;
583
584
/* If our table is empty, and both tables have the same size, and
585
there are no dummies to eliminate, then just copy the pointers. */
586
if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
587
for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
588
key = other_entry->key;
589
if (key != NULL) {
590
assert(so_entry->key == NULL);
591
so_entry->key = Py_NewRef(key);
592
so_entry->hash = other_entry->hash;
593
}
594
}
595
so->fill = other->fill;
596
so->used = other->used;
597
return 0;
598
}
599
600
/* If our table is empty, we can use set_insert_clean() */
601
if (so->fill == 0) {
602
setentry *newtable = so->table;
603
size_t newmask = (size_t)so->mask;
604
so->fill = other->used;
605
so->used = other->used;
606
for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
607
key = other_entry->key;
608
if (key != NULL && key != dummy) {
609
set_insert_clean(newtable, newmask, Py_NewRef(key),
610
other_entry->hash);
611
}
612
}
613
return 0;
614
}
615
616
/* We can't assure there are no duplicates, so do normal insertions */
617
for (i = 0; i <= other->mask; i++) {
618
other_entry = &other->table[i];
619
key = other_entry->key;
620
if (key != NULL && key != dummy) {
621
if (set_add_entry(so, key, other_entry->hash))
622
return -1;
623
}
624
}
625
return 0;
626
}
627
628
static PyObject *
629
set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
630
{
631
/* Make sure the search finger is in bounds */
632
setentry *entry = so->table + (so->finger & so->mask);
633
setentry *limit = so->table + so->mask;
634
PyObject *key;
635
636
if (so->used == 0) {
637
PyErr_SetString(PyExc_KeyError, "pop from an empty set");
638
return NULL;
639
}
640
while (entry->key == NULL || entry->key==dummy) {
641
entry++;
642
if (entry > limit)
643
entry = so->table;
644
}
645
key = entry->key;
646
entry->key = dummy;
647
entry->hash = -1;
648
so->used--;
649
so->finger = entry - so->table + 1; /* next place to start */
650
return key;
651
}
652
653
PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
654
Raises KeyError if the set is empty.");
655
656
static int
657
set_traverse(PySetObject *so, visitproc visit, void *arg)
658
{
659
Py_ssize_t pos = 0;
660
setentry *entry;
661
662
while (set_next(so, &pos, &entry))
663
Py_VISIT(entry->key);
664
return 0;
665
}
666
667
/* Work to increase the bit dispersion for closely spaced hash values.
668
This is important because some use cases have many combinations of a
669
small number of elements with nearby hashes so that many distinct
670
combinations collapse to only a handful of distinct hash values. */
671
672
static Py_uhash_t
673
_shuffle_bits(Py_uhash_t h)
674
{
675
return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
676
}
677
678
/* Most of the constants in this hash algorithm are randomly chosen
679
large primes with "interesting bit patterns" and that passed tests
680
for good collision statistics on a variety of problematic datasets
681
including powersets and graph structures (such as David Eppstein's
682
graph recipes in Lib/test/test_set.py) */
683
684
static Py_hash_t
685
frozenset_hash(PyObject *self)
686
{
687
PySetObject *so = (PySetObject *)self;
688
Py_uhash_t hash = 0;
689
setentry *entry;
690
691
if (so->hash != -1)
692
return so->hash;
693
694
/* Xor-in shuffled bits from every entry's hash field because xor is
695
commutative and a frozenset hash should be independent of order.
696
697
For speed, include null entries and dummy entries and then
698
subtract out their effect afterwards so that the final hash
699
depends only on active entries. This allows the code to be
700
vectorized by the compiler and it saves the unpredictable
701
branches that would arise when trying to exclude null and dummy
702
entries on every iteration. */
703
704
for (entry = so->table; entry <= &so->table[so->mask]; entry++)
705
hash ^= _shuffle_bits(entry->hash);
706
707
/* Remove the effect of an odd number of NULL entries */
708
if ((so->mask + 1 - so->fill) & 1)
709
hash ^= _shuffle_bits(0);
710
711
/* Remove the effect of an odd number of dummy entries */
712
if ((so->fill - so->used) & 1)
713
hash ^= _shuffle_bits(-1);
714
715
/* Factor in the number of active entries */
716
hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
717
718
/* Disperse patterns arising in nested frozensets */
719
hash ^= (hash >> 11) ^ (hash >> 25);
720
hash = hash * 69069U + 907133923UL;
721
722
/* -1 is reserved as an error code */
723
if (hash == (Py_uhash_t)-1)
724
hash = 590923713UL;
725
726
so->hash = hash;
727
return hash;
728
}
729
730
/***** Set iterator type ***********************************************/
731
732
typedef struct {
733
PyObject_HEAD
734
PySetObject *si_set; /* Set to NULL when iterator is exhausted */
735
Py_ssize_t si_used;
736
Py_ssize_t si_pos;
737
Py_ssize_t len;
738
} setiterobject;
739
740
static void
741
setiter_dealloc(setiterobject *si)
742
{
743
/* bpo-31095: UnTrack is needed before calling any callbacks */
744
_PyObject_GC_UNTRACK(si);
745
Py_XDECREF(si->si_set);
746
PyObject_GC_Del(si);
747
}
748
749
static int
750
setiter_traverse(setiterobject *si, visitproc visit, void *arg)
751
{
752
Py_VISIT(si->si_set);
753
return 0;
754
}
755
756
static PyObject *
757
setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
758
{
759
Py_ssize_t len = 0;
760
if (si->si_set != NULL && si->si_used == si->si_set->used)
761
len = si->len;
762
return PyLong_FromSsize_t(len);
763
}
764
765
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
766
767
static PyObject *setiter_iternext(setiterobject *si);
768
769
static PyObject *
770
setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
771
{
772
/* copy the iterator state */
773
setiterobject tmp = *si;
774
Py_XINCREF(tmp.si_set);
775
776
/* iterate the temporary into a list */
777
PyObject *list = PySequence_List((PyObject*)&tmp);
778
Py_XDECREF(tmp.si_set);
779
if (list == NULL) {
780
return NULL;
781
}
782
return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), list);
783
}
784
785
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
786
787
static PyMethodDef setiter_methods[] = {
788
{"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
789
{"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
790
{NULL, NULL} /* sentinel */
791
};
792
793
static PyObject *setiter_iternext(setiterobject *si)
794
{
795
PyObject *key;
796
Py_ssize_t i, mask;
797
setentry *entry;
798
PySetObject *so = si->si_set;
799
800
if (so == NULL)
801
return NULL;
802
assert (PyAnySet_Check(so));
803
804
if (si->si_used != so->used) {
805
PyErr_SetString(PyExc_RuntimeError,
806
"Set changed size during iteration");
807
si->si_used = -1; /* Make this state sticky */
808
return NULL;
809
}
810
811
i = si->si_pos;
812
assert(i>=0);
813
entry = so->table;
814
mask = so->mask;
815
while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
816
i++;
817
si->si_pos = i+1;
818
if (i > mask)
819
goto fail;
820
si->len--;
821
key = entry[i].key;
822
return Py_NewRef(key);
823
824
fail:
825
si->si_set = NULL;
826
Py_DECREF(so);
827
return NULL;
828
}
829
830
PyTypeObject PySetIter_Type = {
831
PyVarObject_HEAD_INIT(&PyType_Type, 0)
832
"set_iterator", /* tp_name */
833
sizeof(setiterobject), /* tp_basicsize */
834
0, /* tp_itemsize */
835
/* methods */
836
(destructor)setiter_dealloc, /* tp_dealloc */
837
0, /* tp_vectorcall_offset */
838
0, /* tp_getattr */
839
0, /* tp_setattr */
840
0, /* tp_as_async */
841
0, /* tp_repr */
842
0, /* tp_as_number */
843
0, /* tp_as_sequence */
844
0, /* tp_as_mapping */
845
0, /* tp_hash */
846
0, /* tp_call */
847
0, /* tp_str */
848
PyObject_GenericGetAttr, /* tp_getattro */
849
0, /* tp_setattro */
850
0, /* tp_as_buffer */
851
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
852
0, /* tp_doc */
853
(traverseproc)setiter_traverse, /* tp_traverse */
854
0, /* tp_clear */
855
0, /* tp_richcompare */
856
0, /* tp_weaklistoffset */
857
PyObject_SelfIter, /* tp_iter */
858
(iternextfunc)setiter_iternext, /* tp_iternext */
859
setiter_methods, /* tp_methods */
860
0,
861
};
862
863
static PyObject *
864
set_iter(PySetObject *so)
865
{
866
setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
867
if (si == NULL)
868
return NULL;
869
si->si_set = (PySetObject*)Py_NewRef(so);
870
si->si_used = so->used;
871
si->si_pos = 0;
872
si->len = so->used;
873
_PyObject_GC_TRACK(si);
874
return (PyObject *)si;
875
}
876
877
static int
878
set_update_internal(PySetObject *so, PyObject *other)
879
{
880
PyObject *key, *it;
881
882
if (PyAnySet_Check(other))
883
return set_merge(so, other);
884
885
if (PyDict_CheckExact(other)) {
886
PyObject *value;
887
Py_ssize_t pos = 0;
888
Py_hash_t hash;
889
Py_ssize_t dictsize = PyDict_GET_SIZE(other);
890
891
/* Do one big resize at the start, rather than
892
* incrementally resizing as we insert new keys. Expect
893
* that there will be no (or few) overlapping keys.
894
*/
895
if (dictsize < 0)
896
return -1;
897
if ((so->fill + dictsize)*5 >= so->mask*3) {
898
if (set_table_resize(so, (so->used + dictsize)*2) != 0)
899
return -1;
900
}
901
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
902
if (set_add_entry(so, key, hash))
903
return -1;
904
}
905
return 0;
906
}
907
908
it = PyObject_GetIter(other);
909
if (it == NULL)
910
return -1;
911
912
while ((key = PyIter_Next(it)) != NULL) {
913
if (set_add_key(so, key)) {
914
Py_DECREF(it);
915
Py_DECREF(key);
916
return -1;
917
}
918
Py_DECREF(key);
919
}
920
Py_DECREF(it);
921
if (PyErr_Occurred())
922
return -1;
923
return 0;
924
}
925
926
static PyObject *
927
set_update(PySetObject *so, PyObject *args)
928
{
929
Py_ssize_t i;
930
931
for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
932
PyObject *other = PyTuple_GET_ITEM(args, i);
933
if (set_update_internal(so, other))
934
return NULL;
935
}
936
Py_RETURN_NONE;
937
}
938
939
PyDoc_STRVAR(update_doc,
940
"Update a set with the union of itself and others.");
941
942
/* XXX Todo:
943
If aligned memory allocations become available, make the
944
set object 64 byte aligned so that most of the fields
945
can be retrieved or updated in a single cache line.
946
*/
947
948
static PyObject *
949
make_new_set(PyTypeObject *type, PyObject *iterable)
950
{
951
assert(PyType_Check(type));
952
PySetObject *so;
953
954
so = (PySetObject *)type->tp_alloc(type, 0);
955
if (so == NULL)
956
return NULL;
957
958
so->fill = 0;
959
so->used = 0;
960
so->mask = PySet_MINSIZE - 1;
961
so->table = so->smalltable;
962
so->hash = -1;
963
so->finger = 0;
964
so->weakreflist = NULL;
965
966
if (iterable != NULL) {
967
if (set_update_internal(so, iterable)) {
968
Py_DECREF(so);
969
return NULL;
970
}
971
}
972
973
return (PyObject *)so;
974
}
975
976
static PyObject *
977
make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
978
{
979
if (type != &PySet_Type && type != &PyFrozenSet_Type) {
980
if (PyType_IsSubtype(type, &PySet_Type))
981
type = &PySet_Type;
982
else
983
type = &PyFrozenSet_Type;
984
}
985
return make_new_set(type, iterable);
986
}
987
988
static PyObject *
989
make_new_frozenset(PyTypeObject *type, PyObject *iterable)
990
{
991
if (type != &PyFrozenSet_Type) {
992
return make_new_set(type, iterable);
993
}
994
995
if (iterable != NULL && PyFrozenSet_CheckExact(iterable)) {
996
/* frozenset(f) is idempotent */
997
return Py_NewRef(iterable);
998
}
999
return make_new_set(type, iterable);
1000
}
1001
1002
static PyObject *
1003
frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1004
{
1005
PyObject *iterable = NULL;
1006
1007
if ((type == &PyFrozenSet_Type ||
1008
type->tp_init == PyFrozenSet_Type.tp_init) &&
1009
!_PyArg_NoKeywords("frozenset", kwds)) {
1010
return NULL;
1011
}
1012
1013
if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1014
return NULL;
1015
}
1016
1017
return make_new_frozenset(type, iterable);
1018
}
1019
1020
static PyObject *
1021
frozenset_vectorcall(PyObject *type, PyObject * const*args,
1022
size_t nargsf, PyObject *kwnames)
1023
{
1024
if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1025
return NULL;
1026
}
1027
1028
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1029
if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1030
return NULL;
1031
}
1032
1033
PyObject *iterable = (nargs ? args[0] : NULL);
1034
return make_new_frozenset(_PyType_CAST(type), iterable);
1035
}
1036
1037
static PyObject *
1038
set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1039
{
1040
return make_new_set(type, NULL);
1041
}
1042
1043
/* set_swap_bodies() switches the contents of any two sets by moving their
1044
internal data pointers and, if needed, copying the internal smalltables.
1045
Semantically equivalent to:
1046
1047
t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1048
1049
The function always succeeds and it leaves both objects in a stable state.
1050
Useful for operations that update in-place (by allowing an intermediate
1051
result to be swapped into one of the original inputs).
1052
*/
1053
1054
static void
1055
set_swap_bodies(PySetObject *a, PySetObject *b)
1056
{
1057
Py_ssize_t t;
1058
setentry *u;
1059
setentry tab[PySet_MINSIZE];
1060
Py_hash_t h;
1061
1062
t = a->fill; a->fill = b->fill; b->fill = t;
1063
t = a->used; a->used = b->used; b->used = t;
1064
t = a->mask; a->mask = b->mask; b->mask = t;
1065
1066
u = a->table;
1067
if (a->table == a->smalltable)
1068
u = b->smalltable;
1069
a->table = b->table;
1070
if (b->table == b->smalltable)
1071
a->table = a->smalltable;
1072
b->table = u;
1073
1074
if (a->table == a->smalltable || b->table == b->smalltable) {
1075
memcpy(tab, a->smalltable, sizeof(tab));
1076
memcpy(a->smalltable, b->smalltable, sizeof(tab));
1077
memcpy(b->smalltable, tab, sizeof(tab));
1078
}
1079
1080
if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1081
PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1082
h = a->hash; a->hash = b->hash; b->hash = h;
1083
} else {
1084
a->hash = -1;
1085
b->hash = -1;
1086
}
1087
}
1088
1089
static PyObject *
1090
set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1091
{
1092
return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1093
}
1094
1095
static PyObject *
1096
frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1097
{
1098
if (PyFrozenSet_CheckExact(so)) {
1099
return Py_NewRef(so);
1100
}
1101
return set_copy(so, NULL);
1102
}
1103
1104
PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1105
1106
static PyObject *
1107
set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1108
{
1109
set_clear_internal(so);
1110
Py_RETURN_NONE;
1111
}
1112
1113
PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1114
1115
static PyObject *
1116
set_union(PySetObject *so, PyObject *args)
1117
{
1118
PySetObject *result;
1119
PyObject *other;
1120
Py_ssize_t i;
1121
1122
result = (PySetObject *)set_copy(so, NULL);
1123
if (result == NULL)
1124
return NULL;
1125
1126
for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1127
other = PyTuple_GET_ITEM(args, i);
1128
if ((PyObject *)so == other)
1129
continue;
1130
if (set_update_internal(result, other)) {
1131
Py_DECREF(result);
1132
return NULL;
1133
}
1134
}
1135
return (PyObject *)result;
1136
}
1137
1138
PyDoc_STRVAR(union_doc,
1139
"Return the union of sets as a new set.\n\
1140
\n\
1141
(i.e. all elements that are in either set.)");
1142
1143
static PyObject *
1144
set_or(PySetObject *so, PyObject *other)
1145
{
1146
PySetObject *result;
1147
1148
if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1149
Py_RETURN_NOTIMPLEMENTED;
1150
1151
result = (PySetObject *)set_copy(so, NULL);
1152
if (result == NULL)
1153
return NULL;
1154
if ((PyObject *)so == other)
1155
return (PyObject *)result;
1156
if (set_update_internal(result, other)) {
1157
Py_DECREF(result);
1158
return NULL;
1159
}
1160
return (PyObject *)result;
1161
}
1162
1163
static PyObject *
1164
set_ior(PySetObject *so, PyObject *other)
1165
{
1166
if (!PyAnySet_Check(other))
1167
Py_RETURN_NOTIMPLEMENTED;
1168
1169
if (set_update_internal(so, other))
1170
return NULL;
1171
return Py_NewRef(so);
1172
}
1173
1174
static PyObject *
1175
set_intersection(PySetObject *so, PyObject *other)
1176
{
1177
PySetObject *result;
1178
PyObject *key, *it, *tmp;
1179
Py_hash_t hash;
1180
int rv;
1181
1182
if ((PyObject *)so == other)
1183
return set_copy(so, NULL);
1184
1185
result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1186
if (result == NULL)
1187
return NULL;
1188
1189
if (PyAnySet_Check(other)) {
1190
Py_ssize_t pos = 0;
1191
setentry *entry;
1192
1193
if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1194
tmp = (PyObject *)so;
1195
so = (PySetObject *)other;
1196
other = tmp;
1197
}
1198
1199
while (set_next((PySetObject *)other, &pos, &entry)) {
1200
key = entry->key;
1201
hash = entry->hash;
1202
Py_INCREF(key);
1203
rv = set_contains_entry(so, key, hash);
1204
if (rv < 0) {
1205
Py_DECREF(result);
1206
Py_DECREF(key);
1207
return NULL;
1208
}
1209
if (rv) {
1210
if (set_add_entry(result, key, hash)) {
1211
Py_DECREF(result);
1212
Py_DECREF(key);
1213
return NULL;
1214
}
1215
}
1216
Py_DECREF(key);
1217
}
1218
return (PyObject *)result;
1219
}
1220
1221
it = PyObject_GetIter(other);
1222
if (it == NULL) {
1223
Py_DECREF(result);
1224
return NULL;
1225
}
1226
1227
while ((key = PyIter_Next(it)) != NULL) {
1228
hash = PyObject_Hash(key);
1229
if (hash == -1)
1230
goto error;
1231
rv = set_contains_entry(so, key, hash);
1232
if (rv < 0)
1233
goto error;
1234
if (rv) {
1235
if (set_add_entry(result, key, hash))
1236
goto error;
1237
if (PySet_GET_SIZE(result) >= PySet_GET_SIZE(so)) {
1238
Py_DECREF(key);
1239
break;
1240
}
1241
}
1242
Py_DECREF(key);
1243
}
1244
Py_DECREF(it);
1245
if (PyErr_Occurred()) {
1246
Py_DECREF(result);
1247
return NULL;
1248
}
1249
return (PyObject *)result;
1250
error:
1251
Py_DECREF(it);
1252
Py_DECREF(result);
1253
Py_DECREF(key);
1254
return NULL;
1255
}
1256
1257
static PyObject *
1258
set_intersection_multi(PySetObject *so, PyObject *args)
1259
{
1260
Py_ssize_t i;
1261
1262
if (PyTuple_GET_SIZE(args) == 0)
1263
return set_copy(so, NULL);
1264
1265
PyObject *result = Py_NewRef(so);
1266
for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1267
PyObject *other = PyTuple_GET_ITEM(args, i);
1268
PyObject *newresult = set_intersection((PySetObject *)result, other);
1269
if (newresult == NULL) {
1270
Py_DECREF(result);
1271
return NULL;
1272
}
1273
Py_SETREF(result, newresult);
1274
}
1275
return result;
1276
}
1277
1278
PyDoc_STRVAR(intersection_doc,
1279
"Return the intersection of two sets as a new set.\n\
1280
\n\
1281
(i.e. all elements that are in both sets.)");
1282
1283
static PyObject *
1284
set_intersection_update(PySetObject *so, PyObject *other)
1285
{
1286
PyObject *tmp;
1287
1288
tmp = set_intersection(so, other);
1289
if (tmp == NULL)
1290
return NULL;
1291
set_swap_bodies(so, (PySetObject *)tmp);
1292
Py_DECREF(tmp);
1293
Py_RETURN_NONE;
1294
}
1295
1296
static PyObject *
1297
set_intersection_update_multi(PySetObject *so, PyObject *args)
1298
{
1299
PyObject *tmp;
1300
1301
tmp = set_intersection_multi(so, args);
1302
if (tmp == NULL)
1303
return NULL;
1304
set_swap_bodies(so, (PySetObject *)tmp);
1305
Py_DECREF(tmp);
1306
Py_RETURN_NONE;
1307
}
1308
1309
PyDoc_STRVAR(intersection_update_doc,
1310
"Update a set with the intersection of itself and another.");
1311
1312
static PyObject *
1313
set_and(PySetObject *so, PyObject *other)
1314
{
1315
if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1316
Py_RETURN_NOTIMPLEMENTED;
1317
return set_intersection(so, other);
1318
}
1319
1320
static PyObject *
1321
set_iand(PySetObject *so, PyObject *other)
1322
{
1323
PyObject *result;
1324
1325
if (!PyAnySet_Check(other))
1326
Py_RETURN_NOTIMPLEMENTED;
1327
result = set_intersection_update(so, other);
1328
if (result == NULL)
1329
return NULL;
1330
Py_DECREF(result);
1331
return Py_NewRef(so);
1332
}
1333
1334
static PyObject *
1335
set_isdisjoint(PySetObject *so, PyObject *other)
1336
{
1337
PyObject *key, *it, *tmp;
1338
int rv;
1339
1340
if ((PyObject *)so == other) {
1341
if (PySet_GET_SIZE(so) == 0)
1342
Py_RETURN_TRUE;
1343
else
1344
Py_RETURN_FALSE;
1345
}
1346
1347
if (PyAnySet_CheckExact(other)) {
1348
Py_ssize_t pos = 0;
1349
setentry *entry;
1350
1351
if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1352
tmp = (PyObject *)so;
1353
so = (PySetObject *)other;
1354
other = tmp;
1355
}
1356
while (set_next((PySetObject *)other, &pos, &entry)) {
1357
PyObject *key = entry->key;
1358
Py_INCREF(key);
1359
rv = set_contains_entry(so, key, entry->hash);
1360
Py_DECREF(key);
1361
if (rv < 0) {
1362
return NULL;
1363
}
1364
if (rv) {
1365
Py_RETURN_FALSE;
1366
}
1367
}
1368
Py_RETURN_TRUE;
1369
}
1370
1371
it = PyObject_GetIter(other);
1372
if (it == NULL)
1373
return NULL;
1374
1375
while ((key = PyIter_Next(it)) != NULL) {
1376
rv = set_contains_key(so, key);
1377
Py_DECREF(key);
1378
if (rv < 0) {
1379
Py_DECREF(it);
1380
return NULL;
1381
}
1382
if (rv) {
1383
Py_DECREF(it);
1384
Py_RETURN_FALSE;
1385
}
1386
}
1387
Py_DECREF(it);
1388
if (PyErr_Occurred())
1389
return NULL;
1390
Py_RETURN_TRUE;
1391
}
1392
1393
PyDoc_STRVAR(isdisjoint_doc,
1394
"Return True if two sets have a null intersection.");
1395
1396
static int
1397
set_difference_update_internal(PySetObject *so, PyObject *other)
1398
{
1399
if ((PyObject *)so == other)
1400
return set_clear_internal(so);
1401
1402
if (PyAnySet_Check(other)) {
1403
setentry *entry;
1404
Py_ssize_t pos = 0;
1405
1406
/* Optimization: When the other set is more than 8 times
1407
larger than the base set, replace the other set with
1408
intersection of the two sets.
1409
*/
1410
if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1411
other = set_intersection(so, other);
1412
if (other == NULL)
1413
return -1;
1414
} else {
1415
Py_INCREF(other);
1416
}
1417
1418
while (set_next((PySetObject *)other, &pos, &entry)) {
1419
PyObject *key = entry->key;
1420
Py_INCREF(key);
1421
if (set_discard_entry(so, key, entry->hash) < 0) {
1422
Py_DECREF(other);
1423
Py_DECREF(key);
1424
return -1;
1425
}
1426
Py_DECREF(key);
1427
}
1428
1429
Py_DECREF(other);
1430
} else {
1431
PyObject *key, *it;
1432
it = PyObject_GetIter(other);
1433
if (it == NULL)
1434
return -1;
1435
1436
while ((key = PyIter_Next(it)) != NULL) {
1437
if (set_discard_key(so, key) < 0) {
1438
Py_DECREF(it);
1439
Py_DECREF(key);
1440
return -1;
1441
}
1442
Py_DECREF(key);
1443
}
1444
Py_DECREF(it);
1445
if (PyErr_Occurred())
1446
return -1;
1447
}
1448
/* If more than 1/4th are dummies, then resize them away. */
1449
if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1450
return 0;
1451
return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1452
}
1453
1454
static PyObject *
1455
set_difference_update(PySetObject *so, PyObject *args)
1456
{
1457
Py_ssize_t i;
1458
1459
for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1460
PyObject *other = PyTuple_GET_ITEM(args, i);
1461
if (set_difference_update_internal(so, other))
1462
return NULL;
1463
}
1464
Py_RETURN_NONE;
1465
}
1466
1467
PyDoc_STRVAR(difference_update_doc,
1468
"Remove all elements of another set from this set.");
1469
1470
static PyObject *
1471
set_copy_and_difference(PySetObject *so, PyObject *other)
1472
{
1473
PyObject *result;
1474
1475
result = set_copy(so, NULL);
1476
if (result == NULL)
1477
return NULL;
1478
if (set_difference_update_internal((PySetObject *) result, other) == 0)
1479
return result;
1480
Py_DECREF(result);
1481
return NULL;
1482
}
1483
1484
static PyObject *
1485
set_difference(PySetObject *so, PyObject *other)
1486
{
1487
PyObject *result;
1488
PyObject *key;
1489
Py_hash_t hash;
1490
setentry *entry;
1491
Py_ssize_t pos = 0, other_size;
1492
int rv;
1493
1494
if (PyAnySet_Check(other)) {
1495
other_size = PySet_GET_SIZE(other);
1496
}
1497
else if (PyDict_CheckExact(other)) {
1498
other_size = PyDict_GET_SIZE(other);
1499
}
1500
else {
1501
return set_copy_and_difference(so, other);
1502
}
1503
1504
/* If len(so) much more than len(other), it's more efficient to simply copy
1505
* so and then iterate other looking for common elements. */
1506
if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1507
return set_copy_and_difference(so, other);
1508
}
1509
1510
result = make_new_set_basetype(Py_TYPE(so), NULL);
1511
if (result == NULL)
1512
return NULL;
1513
1514
if (PyDict_CheckExact(other)) {
1515
while (set_next(so, &pos, &entry)) {
1516
key = entry->key;
1517
hash = entry->hash;
1518
Py_INCREF(key);
1519
rv = _PyDict_Contains_KnownHash(other, key, hash);
1520
if (rv < 0) {
1521
Py_DECREF(result);
1522
Py_DECREF(key);
1523
return NULL;
1524
}
1525
if (!rv) {
1526
if (set_add_entry((PySetObject *)result, key, hash)) {
1527
Py_DECREF(result);
1528
Py_DECREF(key);
1529
return NULL;
1530
}
1531
}
1532
Py_DECREF(key);
1533
}
1534
return result;
1535
}
1536
1537
/* Iterate over so, checking for common elements in other. */
1538
while (set_next(so, &pos, &entry)) {
1539
key = entry->key;
1540
hash = entry->hash;
1541
Py_INCREF(key);
1542
rv = set_contains_entry((PySetObject *)other, key, hash);
1543
if (rv < 0) {
1544
Py_DECREF(result);
1545
Py_DECREF(key);
1546
return NULL;
1547
}
1548
if (!rv) {
1549
if (set_add_entry((PySetObject *)result, key, hash)) {
1550
Py_DECREF(result);
1551
Py_DECREF(key);
1552
return NULL;
1553
}
1554
}
1555
Py_DECREF(key);
1556
}
1557
return result;
1558
}
1559
1560
static PyObject *
1561
set_difference_multi(PySetObject *so, PyObject *args)
1562
{
1563
Py_ssize_t i;
1564
PyObject *result, *other;
1565
1566
if (PyTuple_GET_SIZE(args) == 0)
1567
return set_copy(so, NULL);
1568
1569
other = PyTuple_GET_ITEM(args, 0);
1570
result = set_difference(so, other);
1571
if (result == NULL)
1572
return NULL;
1573
1574
for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1575
other = PyTuple_GET_ITEM(args, i);
1576
if (set_difference_update_internal((PySetObject *)result, other)) {
1577
Py_DECREF(result);
1578
return NULL;
1579
}
1580
}
1581
return result;
1582
}
1583
1584
PyDoc_STRVAR(difference_doc,
1585
"Return the difference of two or more sets as a new set.\n\
1586
\n\
1587
(i.e. all elements that are in this set but not the others.)");
1588
static PyObject *
1589
set_sub(PySetObject *so, PyObject *other)
1590
{
1591
if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1592
Py_RETURN_NOTIMPLEMENTED;
1593
return set_difference(so, other);
1594
}
1595
1596
static PyObject *
1597
set_isub(PySetObject *so, PyObject *other)
1598
{
1599
if (!PyAnySet_Check(other))
1600
Py_RETURN_NOTIMPLEMENTED;
1601
if (set_difference_update_internal(so, other))
1602
return NULL;
1603
return Py_NewRef(so);
1604
}
1605
1606
static PyObject *
1607
set_symmetric_difference_update(PySetObject *so, PyObject *other)
1608
{
1609
PySetObject *otherset;
1610
PyObject *key;
1611
Py_ssize_t pos = 0;
1612
Py_hash_t hash;
1613
setentry *entry;
1614
int rv;
1615
1616
if ((PyObject *)so == other)
1617
return set_clear(so, NULL);
1618
1619
if (PyDict_CheckExact(other)) {
1620
PyObject *value;
1621
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1622
Py_INCREF(key);
1623
rv = set_discard_entry(so, key, hash);
1624
if (rv < 0) {
1625
Py_DECREF(key);
1626
return NULL;
1627
}
1628
if (rv == DISCARD_NOTFOUND) {
1629
if (set_add_entry(so, key, hash)) {
1630
Py_DECREF(key);
1631
return NULL;
1632
}
1633
}
1634
Py_DECREF(key);
1635
}
1636
Py_RETURN_NONE;
1637
}
1638
1639
if (PyAnySet_Check(other)) {
1640
otherset = (PySetObject *)Py_NewRef(other);
1641
} else {
1642
otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1643
if (otherset == NULL)
1644
return NULL;
1645
}
1646
1647
while (set_next(otherset, &pos, &entry)) {
1648
key = entry->key;
1649
hash = entry->hash;
1650
Py_INCREF(key);
1651
rv = set_discard_entry(so, key, hash);
1652
if (rv < 0) {
1653
Py_DECREF(otherset);
1654
Py_DECREF(key);
1655
return NULL;
1656
}
1657
if (rv == DISCARD_NOTFOUND) {
1658
if (set_add_entry(so, key, hash)) {
1659
Py_DECREF(otherset);
1660
Py_DECREF(key);
1661
return NULL;
1662
}
1663
}
1664
Py_DECREF(key);
1665
}
1666
Py_DECREF(otherset);
1667
Py_RETURN_NONE;
1668
}
1669
1670
PyDoc_STRVAR(symmetric_difference_update_doc,
1671
"Update a set with the symmetric difference of itself and another.");
1672
1673
static PyObject *
1674
set_symmetric_difference(PySetObject *so, PyObject *other)
1675
{
1676
PyObject *rv;
1677
PySetObject *otherset;
1678
1679
otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1680
if (otherset == NULL)
1681
return NULL;
1682
rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1683
if (rv == NULL) {
1684
Py_DECREF(otherset);
1685
return NULL;
1686
}
1687
Py_DECREF(rv);
1688
return (PyObject *)otherset;
1689
}
1690
1691
PyDoc_STRVAR(symmetric_difference_doc,
1692
"Return the symmetric difference of two sets as a new set.\n\
1693
\n\
1694
(i.e. all elements that are in exactly one of the sets.)");
1695
1696
static PyObject *
1697
set_xor(PySetObject *so, PyObject *other)
1698
{
1699
if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1700
Py_RETURN_NOTIMPLEMENTED;
1701
return set_symmetric_difference(so, other);
1702
}
1703
1704
static PyObject *
1705
set_ixor(PySetObject *so, PyObject *other)
1706
{
1707
PyObject *result;
1708
1709
if (!PyAnySet_Check(other))
1710
Py_RETURN_NOTIMPLEMENTED;
1711
result = set_symmetric_difference_update(so, other);
1712
if (result == NULL)
1713
return NULL;
1714
Py_DECREF(result);
1715
return Py_NewRef(so);
1716
}
1717
1718
static PyObject *
1719
set_issubset(PySetObject *so, PyObject *other)
1720
{
1721
setentry *entry;
1722
Py_ssize_t pos = 0;
1723
int rv;
1724
1725
if (!PyAnySet_Check(other)) {
1726
PyObject *tmp = set_intersection(so, other);
1727
if (tmp == NULL) {
1728
return NULL;
1729
}
1730
int result = (PySet_GET_SIZE(tmp) == PySet_GET_SIZE(so));
1731
Py_DECREF(tmp);
1732
return PyBool_FromLong(result);
1733
}
1734
if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1735
Py_RETURN_FALSE;
1736
1737
while (set_next(so, &pos, &entry)) {
1738
PyObject *key = entry->key;
1739
Py_INCREF(key);
1740
rv = set_contains_entry((PySetObject *)other, key, entry->hash);
1741
Py_DECREF(key);
1742
if (rv < 0) {
1743
return NULL;
1744
}
1745
if (!rv) {
1746
Py_RETURN_FALSE;
1747
}
1748
}
1749
Py_RETURN_TRUE;
1750
}
1751
1752
PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1753
1754
static PyObject *
1755
set_issuperset(PySetObject *so, PyObject *other)
1756
{
1757
if (PyAnySet_Check(other)) {
1758
return set_issubset((PySetObject *)other, (PyObject *)so);
1759
}
1760
1761
PyObject *key, *it = PyObject_GetIter(other);
1762
if (it == NULL) {
1763
return NULL;
1764
}
1765
while ((key = PyIter_Next(it)) != NULL) {
1766
int rv = set_contains_key(so, key);
1767
Py_DECREF(key);
1768
if (rv < 0) {
1769
Py_DECREF(it);
1770
return NULL;
1771
}
1772
if (!rv) {
1773
Py_DECREF(it);
1774
Py_RETURN_FALSE;
1775
}
1776
}
1777
Py_DECREF(it);
1778
if (PyErr_Occurred()) {
1779
return NULL;
1780
}
1781
Py_RETURN_TRUE;
1782
}
1783
1784
PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1785
1786
static PyObject *
1787
set_richcompare(PySetObject *v, PyObject *w, int op)
1788
{
1789
PyObject *r1;
1790
int r2;
1791
1792
if(!PyAnySet_Check(w))
1793
Py_RETURN_NOTIMPLEMENTED;
1794
1795
switch (op) {
1796
case Py_EQ:
1797
if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1798
Py_RETURN_FALSE;
1799
if (v->hash != -1 &&
1800
((PySetObject *)w)->hash != -1 &&
1801
v->hash != ((PySetObject *)w)->hash)
1802
Py_RETURN_FALSE;
1803
return set_issubset(v, w);
1804
case Py_NE:
1805
r1 = set_richcompare(v, w, Py_EQ);
1806
if (r1 == NULL)
1807
return NULL;
1808
r2 = PyObject_IsTrue(r1);
1809
Py_DECREF(r1);
1810
if (r2 < 0)
1811
return NULL;
1812
return PyBool_FromLong(!r2);
1813
case Py_LE:
1814
return set_issubset(v, w);
1815
case Py_GE:
1816
return set_issuperset(v, w);
1817
case Py_LT:
1818
if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1819
Py_RETURN_FALSE;
1820
return set_issubset(v, w);
1821
case Py_GT:
1822
if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1823
Py_RETURN_FALSE;
1824
return set_issuperset(v, w);
1825
}
1826
Py_RETURN_NOTIMPLEMENTED;
1827
}
1828
1829
static PyObject *
1830
set_add(PySetObject *so, PyObject *key)
1831
{
1832
if (set_add_key(so, key))
1833
return NULL;
1834
Py_RETURN_NONE;
1835
}
1836
1837
PyDoc_STRVAR(add_doc,
1838
"Add an element to a set.\n\
1839
\n\
1840
This has no effect if the element is already present.");
1841
1842
static int
1843
set_contains(PySetObject *so, PyObject *key)
1844
{
1845
PyObject *tmpkey;
1846
int rv;
1847
1848
rv = set_contains_key(so, key);
1849
if (rv < 0) {
1850
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1851
return -1;
1852
PyErr_Clear();
1853
tmpkey = make_new_set(&PyFrozenSet_Type, key);
1854
if (tmpkey == NULL)
1855
return -1;
1856
rv = set_contains_key(so, tmpkey);
1857
Py_DECREF(tmpkey);
1858
}
1859
return rv;
1860
}
1861
1862
static PyObject *
1863
set_direct_contains(PySetObject *so, PyObject *key)
1864
{
1865
long result;
1866
1867
result = set_contains(so, key);
1868
if (result < 0)
1869
return NULL;
1870
return PyBool_FromLong(result);
1871
}
1872
1873
PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1874
1875
static PyObject *
1876
set_remove(PySetObject *so, PyObject *key)
1877
{
1878
PyObject *tmpkey;
1879
int rv;
1880
1881
rv = set_discard_key(so, key);
1882
if (rv < 0) {
1883
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1884
return NULL;
1885
PyErr_Clear();
1886
tmpkey = make_new_set(&PyFrozenSet_Type, key);
1887
if (tmpkey == NULL)
1888
return NULL;
1889
rv = set_discard_key(so, tmpkey);
1890
Py_DECREF(tmpkey);
1891
if (rv < 0)
1892
return NULL;
1893
}
1894
1895
if (rv == DISCARD_NOTFOUND) {
1896
_PyErr_SetKeyError(key);
1897
return NULL;
1898
}
1899
Py_RETURN_NONE;
1900
}
1901
1902
PyDoc_STRVAR(remove_doc,
1903
"Remove an element from a set; it must be a member.\n\
1904
\n\
1905
If the element is not a member, raise a KeyError.");
1906
1907
static PyObject *
1908
set_discard(PySetObject *so, PyObject *key)
1909
{
1910
PyObject *tmpkey;
1911
int rv;
1912
1913
rv = set_discard_key(so, key);
1914
if (rv < 0) {
1915
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1916
return NULL;
1917
PyErr_Clear();
1918
tmpkey = make_new_set(&PyFrozenSet_Type, key);
1919
if (tmpkey == NULL)
1920
return NULL;
1921
rv = set_discard_key(so, tmpkey);
1922
Py_DECREF(tmpkey);
1923
if (rv < 0)
1924
return NULL;
1925
}
1926
Py_RETURN_NONE;
1927
}
1928
1929
PyDoc_STRVAR(discard_doc,
1930
"Remove an element from a set if it is a member.\n\
1931
\n\
1932
Unlike set.remove(), the discard() method does not raise\n\
1933
an exception when an element is missing from the set.");
1934
1935
static PyObject *
1936
set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1937
{
1938
PyObject *keys=NULL, *args=NULL, *result=NULL, *state=NULL;
1939
1940
keys = PySequence_List((PyObject *)so);
1941
if (keys == NULL)
1942
goto done;
1943
args = PyTuple_Pack(1, keys);
1944
if (args == NULL)
1945
goto done;
1946
state = _PyObject_GetState((PyObject *)so);
1947
if (state == NULL)
1948
goto done;
1949
result = PyTuple_Pack(3, Py_TYPE(so), args, state);
1950
done:
1951
Py_XDECREF(args);
1952
Py_XDECREF(keys);
1953
Py_XDECREF(state);
1954
return result;
1955
}
1956
1957
static PyObject *
1958
set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1959
{
1960
size_t res = _PyObject_SIZE(Py_TYPE(so));
1961
if (so->table != so->smalltable) {
1962
res += ((size_t)so->mask + 1) * sizeof(setentry);
1963
}
1964
return PyLong_FromSize_t(res);
1965
}
1966
1967
PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1968
static int
1969
set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1970
{
1971
PyObject *iterable = NULL;
1972
1973
if (!_PyArg_NoKeywords("set", kwds))
1974
return -1;
1975
if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1976
return -1;
1977
if (self->fill)
1978
set_clear_internal(self);
1979
self->hash = -1;
1980
if (iterable == NULL)
1981
return 0;
1982
return set_update_internal(self, iterable);
1983
}
1984
1985
static PyObject*
1986
set_vectorcall(PyObject *type, PyObject * const*args,
1987
size_t nargsf, PyObject *kwnames)
1988
{
1989
assert(PyType_Check(type));
1990
1991
if (!_PyArg_NoKwnames("set", kwnames)) {
1992
return NULL;
1993
}
1994
1995
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1996
if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
1997
return NULL;
1998
}
1999
2000
if (nargs) {
2001
return make_new_set(_PyType_CAST(type), args[0]);
2002
}
2003
2004
return make_new_set(_PyType_CAST(type), NULL);
2005
}
2006
2007
static PySequenceMethods set_as_sequence = {
2008
set_len, /* sq_length */
2009
0, /* sq_concat */
2010
0, /* sq_repeat */
2011
0, /* sq_item */
2012
0, /* sq_slice */
2013
0, /* sq_ass_item */
2014
0, /* sq_ass_slice */
2015
(objobjproc)set_contains, /* sq_contains */
2016
};
2017
2018
/* set object ********************************************************/
2019
2020
#ifdef Py_DEBUG
2021
static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
2022
2023
PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2024
All is well if assertions don't fail.");
2025
#endif
2026
2027
static PyMethodDef set_methods[] = {
2028
{"add", (PyCFunction)set_add, METH_O,
2029
add_doc},
2030
{"clear", (PyCFunction)set_clear, METH_NOARGS,
2031
clear_doc},
2032
{"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2033
contains_doc},
2034
{"copy", (PyCFunction)set_copy, METH_NOARGS,
2035
copy_doc},
2036
{"discard", (PyCFunction)set_discard, METH_O,
2037
discard_doc},
2038
{"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2039
difference_doc},
2040
{"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2041
difference_update_doc},
2042
{"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2043
intersection_doc},
2044
{"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2045
intersection_update_doc},
2046
{"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2047
isdisjoint_doc},
2048
{"issubset", (PyCFunction)set_issubset, METH_O,
2049
issubset_doc},
2050
{"issuperset", (PyCFunction)set_issuperset, METH_O,
2051
issuperset_doc},
2052
{"pop", (PyCFunction)set_pop, METH_NOARGS,
2053
pop_doc},
2054
{"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2055
reduce_doc},
2056
{"remove", (PyCFunction)set_remove, METH_O,
2057
remove_doc},
2058
{"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2059
sizeof_doc},
2060
{"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2061
symmetric_difference_doc},
2062
{"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2063
symmetric_difference_update_doc},
2064
#ifdef Py_DEBUG
2065
{"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2066
test_c_api_doc},
2067
#endif
2068
{"union", (PyCFunction)set_union, METH_VARARGS,
2069
union_doc},
2070
{"update", (PyCFunction)set_update, METH_VARARGS,
2071
update_doc},
2072
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2073
{NULL, NULL} /* sentinel */
2074
};
2075
2076
static PyNumberMethods set_as_number = {
2077
0, /*nb_add*/
2078
(binaryfunc)set_sub, /*nb_subtract*/
2079
0, /*nb_multiply*/
2080
0, /*nb_remainder*/
2081
0, /*nb_divmod*/
2082
0, /*nb_power*/
2083
0, /*nb_negative*/
2084
0, /*nb_positive*/
2085
0, /*nb_absolute*/
2086
0, /*nb_bool*/
2087
0, /*nb_invert*/
2088
0, /*nb_lshift*/
2089
0, /*nb_rshift*/
2090
(binaryfunc)set_and, /*nb_and*/
2091
(binaryfunc)set_xor, /*nb_xor*/
2092
(binaryfunc)set_or, /*nb_or*/
2093
0, /*nb_int*/
2094
0, /*nb_reserved*/
2095
0, /*nb_float*/
2096
0, /*nb_inplace_add*/
2097
(binaryfunc)set_isub, /*nb_inplace_subtract*/
2098
0, /*nb_inplace_multiply*/
2099
0, /*nb_inplace_remainder*/
2100
0, /*nb_inplace_power*/
2101
0, /*nb_inplace_lshift*/
2102
0, /*nb_inplace_rshift*/
2103
(binaryfunc)set_iand, /*nb_inplace_and*/
2104
(binaryfunc)set_ixor, /*nb_inplace_xor*/
2105
(binaryfunc)set_ior, /*nb_inplace_or*/
2106
};
2107
2108
PyDoc_STRVAR(set_doc,
2109
"set() -> new empty set object\n\
2110
set(iterable) -> new set object\n\
2111
\n\
2112
Build an unordered collection of unique elements.");
2113
2114
PyTypeObject PySet_Type = {
2115
PyVarObject_HEAD_INIT(&PyType_Type, 0)
2116
"set", /* tp_name */
2117
sizeof(PySetObject), /* tp_basicsize */
2118
0, /* tp_itemsize */
2119
/* methods */
2120
(destructor)set_dealloc, /* tp_dealloc */
2121
0, /* tp_vectorcall_offset */
2122
0, /* tp_getattr */
2123
0, /* tp_setattr */
2124
0, /* tp_as_async */
2125
(reprfunc)set_repr, /* tp_repr */
2126
&set_as_number, /* tp_as_number */
2127
&set_as_sequence, /* tp_as_sequence */
2128
0, /* tp_as_mapping */
2129
PyObject_HashNotImplemented, /* tp_hash */
2130
0, /* tp_call */
2131
0, /* tp_str */
2132
PyObject_GenericGetAttr, /* tp_getattro */
2133
0, /* tp_setattro */
2134
0, /* tp_as_buffer */
2135
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2136
Py_TPFLAGS_BASETYPE |
2137
_Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2138
set_doc, /* tp_doc */
2139
(traverseproc)set_traverse, /* tp_traverse */
2140
(inquiry)set_clear_internal, /* tp_clear */
2141
(richcmpfunc)set_richcompare, /* tp_richcompare */
2142
offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2143
(getiterfunc)set_iter, /* tp_iter */
2144
0, /* tp_iternext */
2145
set_methods, /* tp_methods */
2146
0, /* tp_members */
2147
0, /* tp_getset */
2148
0, /* tp_base */
2149
0, /* tp_dict */
2150
0, /* tp_descr_get */
2151
0, /* tp_descr_set */
2152
0, /* tp_dictoffset */
2153
(initproc)set_init, /* tp_init */
2154
PyType_GenericAlloc, /* tp_alloc */
2155
set_new, /* tp_new */
2156
PyObject_GC_Del, /* tp_free */
2157
.tp_vectorcall = set_vectorcall,
2158
};
2159
2160
/* frozenset object ********************************************************/
2161
2162
2163
static PyMethodDef frozenset_methods[] = {
2164
{"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2165
contains_doc},
2166
{"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2167
copy_doc},
2168
{"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2169
difference_doc},
2170
{"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2171
intersection_doc},
2172
{"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2173
isdisjoint_doc},
2174
{"issubset", (PyCFunction)set_issubset, METH_O,
2175
issubset_doc},
2176
{"issuperset", (PyCFunction)set_issuperset, METH_O,
2177
issuperset_doc},
2178
{"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2179
reduce_doc},
2180
{"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2181
sizeof_doc},
2182
{"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2183
symmetric_difference_doc},
2184
{"union", (PyCFunction)set_union, METH_VARARGS,
2185
union_doc},
2186
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2187
{NULL, NULL} /* sentinel */
2188
};
2189
2190
static PyNumberMethods frozenset_as_number = {
2191
0, /*nb_add*/
2192
(binaryfunc)set_sub, /*nb_subtract*/
2193
0, /*nb_multiply*/
2194
0, /*nb_remainder*/
2195
0, /*nb_divmod*/
2196
0, /*nb_power*/
2197
0, /*nb_negative*/
2198
0, /*nb_positive*/
2199
0, /*nb_absolute*/
2200
0, /*nb_bool*/
2201
0, /*nb_invert*/
2202
0, /*nb_lshift*/
2203
0, /*nb_rshift*/
2204
(binaryfunc)set_and, /*nb_and*/
2205
(binaryfunc)set_xor, /*nb_xor*/
2206
(binaryfunc)set_or, /*nb_or*/
2207
};
2208
2209
PyDoc_STRVAR(frozenset_doc,
2210
"frozenset() -> empty frozenset object\n\
2211
frozenset(iterable) -> frozenset object\n\
2212
\n\
2213
Build an immutable unordered collection of unique elements.");
2214
2215
PyTypeObject PyFrozenSet_Type = {
2216
PyVarObject_HEAD_INIT(&PyType_Type, 0)
2217
"frozenset", /* tp_name */
2218
sizeof(PySetObject), /* tp_basicsize */
2219
0, /* tp_itemsize */
2220
/* methods */
2221
(destructor)set_dealloc, /* tp_dealloc */
2222
0, /* tp_vectorcall_offset */
2223
0, /* tp_getattr */
2224
0, /* tp_setattr */
2225
0, /* tp_as_async */
2226
(reprfunc)set_repr, /* tp_repr */
2227
&frozenset_as_number, /* tp_as_number */
2228
&set_as_sequence, /* tp_as_sequence */
2229
0, /* tp_as_mapping */
2230
frozenset_hash, /* tp_hash */
2231
0, /* tp_call */
2232
0, /* tp_str */
2233
PyObject_GenericGetAttr, /* tp_getattro */
2234
0, /* tp_setattro */
2235
0, /* tp_as_buffer */
2236
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2237
Py_TPFLAGS_BASETYPE |
2238
_Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2239
frozenset_doc, /* tp_doc */
2240
(traverseproc)set_traverse, /* tp_traverse */
2241
(inquiry)set_clear_internal, /* tp_clear */
2242
(richcmpfunc)set_richcompare, /* tp_richcompare */
2243
offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2244
(getiterfunc)set_iter, /* tp_iter */
2245
0, /* tp_iternext */
2246
frozenset_methods, /* tp_methods */
2247
0, /* tp_members */
2248
0, /* tp_getset */
2249
0, /* tp_base */
2250
0, /* tp_dict */
2251
0, /* tp_descr_get */
2252
0, /* tp_descr_set */
2253
0, /* tp_dictoffset */
2254
0, /* tp_init */
2255
PyType_GenericAlloc, /* tp_alloc */
2256
frozenset_new, /* tp_new */
2257
PyObject_GC_Del, /* tp_free */
2258
.tp_vectorcall = frozenset_vectorcall,
2259
};
2260
2261
2262
/***** C API functions *************************************************/
2263
2264
PyObject *
2265
PySet_New(PyObject *iterable)
2266
{
2267
return make_new_set(&PySet_Type, iterable);
2268
}
2269
2270
PyObject *
2271
PyFrozenSet_New(PyObject *iterable)
2272
{
2273
return make_new_set(&PyFrozenSet_Type, iterable);
2274
}
2275
2276
Py_ssize_t
2277
PySet_Size(PyObject *anyset)
2278
{
2279
if (!PyAnySet_Check(anyset)) {
2280
PyErr_BadInternalCall();
2281
return -1;
2282
}
2283
return PySet_GET_SIZE(anyset);
2284
}
2285
2286
int
2287
PySet_Clear(PyObject *set)
2288
{
2289
if (!PySet_Check(set)) {
2290
PyErr_BadInternalCall();
2291
return -1;
2292
}
2293
return set_clear_internal((PySetObject *)set);
2294
}
2295
2296
int
2297
PySet_Contains(PyObject *anyset, PyObject *key)
2298
{
2299
if (!PyAnySet_Check(anyset)) {
2300
PyErr_BadInternalCall();
2301
return -1;
2302
}
2303
return set_contains_key((PySetObject *)anyset, key);
2304
}
2305
2306
int
2307
PySet_Discard(PyObject *set, PyObject *key)
2308
{
2309
if (!PySet_Check(set)) {
2310
PyErr_BadInternalCall();
2311
return -1;
2312
}
2313
return set_discard_key((PySetObject *)set, key);
2314
}
2315
2316
int
2317
PySet_Add(PyObject *anyset, PyObject *key)
2318
{
2319
if (!PySet_Check(anyset) &&
2320
(!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2321
PyErr_BadInternalCall();
2322
return -1;
2323
}
2324
return set_add_key((PySetObject *)anyset, key);
2325
}
2326
2327
int
2328
_PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2329
{
2330
setentry *entry;
2331
2332
if (!PyAnySet_Check(set)) {
2333
PyErr_BadInternalCall();
2334
return -1;
2335
}
2336
if (set_next((PySetObject *)set, pos, &entry) == 0)
2337
return 0;
2338
*key = entry->key;
2339
*hash = entry->hash;
2340
return 1;
2341
}
2342
2343
PyObject *
2344
PySet_Pop(PyObject *set)
2345
{
2346
if (!PySet_Check(set)) {
2347
PyErr_BadInternalCall();
2348
return NULL;
2349
}
2350
return set_pop((PySetObject *)set, NULL);
2351
}
2352
2353
int
2354
_PySet_Update(PyObject *set, PyObject *iterable)
2355
{
2356
if (!PySet_Check(set)) {
2357
PyErr_BadInternalCall();
2358
return -1;
2359
}
2360
return set_update_internal((PySetObject *)set, iterable);
2361
}
2362
2363
/* Exported for the gdb plugin's benefit. */
2364
PyObject *_PySet_Dummy = dummy;
2365
2366
#ifdef Py_DEBUG
2367
2368
/* Test code to be called with any three element set.
2369
Returns True and original set is restored. */
2370
2371
#define assertRaises(call_return_value, exception) \
2372
do { \
2373
assert(call_return_value); \
2374
assert(PyErr_ExceptionMatches(exception)); \
2375
PyErr_Clear(); \
2376
} while(0)
2377
2378
static PyObject *
2379
test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2380
{
2381
Py_ssize_t count;
2382
const char *s;
2383
Py_ssize_t i;
2384
PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2385
PyObject *ob = (PyObject *)so;
2386
Py_hash_t hash;
2387
PyObject *str;
2388
2389
/* Verify preconditions */
2390
assert(PyAnySet_Check(ob));
2391
assert(PyAnySet_CheckExact(ob));
2392
assert(!PyFrozenSet_CheckExact(ob));
2393
2394
/* so.clear(); so |= set("abc"); */
2395
str = PyUnicode_FromString("abc");
2396
if (str == NULL)
2397
return NULL;
2398
set_clear_internal(so);
2399
if (set_update_internal(so, str)) {
2400
Py_DECREF(str);
2401
return NULL;
2402
}
2403
Py_DECREF(str);
2404
2405
/* Exercise type/size checks */
2406
assert(PySet_Size(ob) == 3);
2407
assert(PySet_GET_SIZE(ob) == 3);
2408
2409
/* Raise TypeError for non-iterable constructor arguments */
2410
assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2411
assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2412
2413
/* Raise TypeError for unhashable key */
2414
dup = PySet_New(ob);
2415
assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2416
assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2417
assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2418
2419
/* Exercise successful pop, contains, add, and discard */
2420
elem = PySet_Pop(ob);
2421
assert(PySet_Contains(ob, elem) == 0);
2422
assert(PySet_GET_SIZE(ob) == 2);
2423
assert(PySet_Add(ob, elem) == 0);
2424
assert(PySet_Contains(ob, elem) == 1);
2425
assert(PySet_GET_SIZE(ob) == 3);
2426
assert(PySet_Discard(ob, elem) == 1);
2427
assert(PySet_GET_SIZE(ob) == 2);
2428
assert(PySet_Discard(ob, elem) == 0);
2429
assert(PySet_GET_SIZE(ob) == 2);
2430
2431
/* Exercise clear */
2432
dup2 = PySet_New(dup);
2433
assert(PySet_Clear(dup2) == 0);
2434
assert(PySet_Size(dup2) == 0);
2435
Py_DECREF(dup2);
2436
2437
/* Raise SystemError on clear or update of frozen set */
2438
f = PyFrozenSet_New(dup);
2439
assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2440
assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2441
assert(PySet_Add(f, elem) == 0);
2442
Py_INCREF(f);
2443
assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2444
Py_DECREF(f);
2445
Py_DECREF(f);
2446
2447
/* Exercise direct iteration */
2448
i = 0, count = 0;
2449
while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2450
s = PyUnicode_AsUTF8(x);
2451
assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2452
count++;
2453
}
2454
assert(count == 3);
2455
2456
/* Exercise updates */
2457
dup2 = PySet_New(NULL);
2458
assert(_PySet_Update(dup2, dup) == 0);
2459
assert(PySet_Size(dup2) == 3);
2460
assert(_PySet_Update(dup2, dup) == 0);
2461
assert(PySet_Size(dup2) == 3);
2462
Py_DECREF(dup2);
2463
2464
/* Raise SystemError when self argument is not a set or frozenset. */
2465
t = PyTuple_New(0);
2466
assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2467
assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2468
Py_DECREF(t);
2469
2470
/* Raise SystemError when self argument is not a set. */
2471
f = PyFrozenSet_New(dup);
2472
assert(PySet_Size(f) == 3);
2473
assert(PyFrozenSet_CheckExact(f));
2474
assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2475
assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2476
Py_DECREF(f);
2477
2478
/* Raise KeyError when popping from an empty set */
2479
assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2480
Py_DECREF(ob);
2481
assert(PySet_GET_SIZE(ob) == 0);
2482
assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2483
2484
/* Restore the set from the copy using the PyNumber API */
2485
assert(PyNumber_InPlaceOr(ob, dup) == ob);
2486
Py_DECREF(ob);
2487
2488
/* Verify constructors accept NULL arguments */
2489
f = PySet_New(NULL);
2490
assert(f != NULL);
2491
assert(PySet_GET_SIZE(f) == 0);
2492
Py_DECREF(f);
2493
f = PyFrozenSet_New(NULL);
2494
assert(f != NULL);
2495
assert(PyFrozenSet_CheckExact(f));
2496
assert(PySet_GET_SIZE(f) == 0);
2497
Py_DECREF(f);
2498
2499
Py_DECREF(elem);
2500
Py_DECREF(dup);
2501
Py_RETURN_TRUE;
2502
}
2503
2504
#undef assertRaises
2505
2506
#endif
2507
2508
/***** Dummy Struct *************************************************/
2509
2510
static PyObject *
2511
dummy_repr(PyObject *op)
2512
{
2513
return PyUnicode_FromString("<dummy key>");
2514
}
2515
2516
static void _Py_NO_RETURN
2517
dummy_dealloc(PyObject* ignore)
2518
{
2519
Py_FatalError("deallocating <dummy key>");
2520
}
2521
2522
static PyTypeObject _PySetDummy_Type = {
2523
PyVarObject_HEAD_INIT(&PyType_Type, 0)
2524
"<dummy key> type",
2525
0,
2526
0,
2527
dummy_dealloc, /*tp_dealloc*/ /*never called*/
2528
0, /*tp_vectorcall_offset*/
2529
0, /*tp_getattr*/
2530
0, /*tp_setattr*/
2531
0, /*tp_as_async*/
2532
dummy_repr, /*tp_repr*/
2533
0, /*tp_as_number*/
2534
0, /*tp_as_sequence*/
2535
0, /*tp_as_mapping*/
2536
0, /*tp_hash */
2537
0, /*tp_call */
2538
0, /*tp_str */
2539
0, /*tp_getattro */
2540
0, /*tp_setattro */
2541
0, /*tp_as_buffer */
2542
Py_TPFLAGS_DEFAULT, /*tp_flags */
2543
};
2544
2545
static PyObject _dummy_struct = {
2546
_PyObject_EXTRA_INIT
2547
{ _Py_IMMORTAL_REFCNT },
2548
&_PySetDummy_Type
2549
};
2550
2551