Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/db/btree/bt_delete.c
39535 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1990, 1993, 1994
5
* The Regents of the University of California. All rights reserved.
6
*
7
* This code is derived from software contributed to Berkeley by
8
* Mike Olson.
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
18
* 3. Neither the name of the University nor the names of its contributors
19
* may be used to endorse or promote products derived from this software
20
* without specific prior written permission.
21
*
22
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
* SUCH DAMAGE.
33
*/
34
35
#include <sys/types.h>
36
37
#include <errno.h>
38
#include <stdio.h>
39
#include <string.h>
40
41
#include <db.h>
42
#include "btree.h"
43
44
static int __bt_bdelete(BTREE *, const DBT *);
45
static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int);
46
static int __bt_pdelete(BTREE *, PAGE *);
47
static int __bt_relink(BTREE *, PAGE *);
48
static int __bt_stkacq(BTREE *, PAGE **, CURSOR *);
49
50
/*
51
* __bt_delete
52
* Delete the item(s) referenced by a key.
53
*
54
* Return RET_SPECIAL if the key is not found.
55
*/
56
int
57
__bt_delete(const DB *dbp, const DBT *key, u_int flags)
58
{
59
BTREE *t;
60
CURSOR *c;
61
PAGE *h;
62
int status;
63
64
t = dbp->internal;
65
66
/* Toss any page pinned across calls. */
67
if (t->bt_pinned != NULL) {
68
mpool_put(t->bt_mp, t->bt_pinned, 0);
69
t->bt_pinned = NULL;
70
}
71
72
/* Check for change to a read-only tree. */
73
if (F_ISSET(t, B_RDONLY)) {
74
errno = EPERM;
75
return (RET_ERROR);
76
}
77
78
switch (flags) {
79
case 0:
80
status = __bt_bdelete(t, key);
81
break;
82
case R_CURSOR:
83
/*
84
* If flags is R_CURSOR, delete the cursor. Must already
85
* have started a scan and not have already deleted it.
86
*/
87
c = &t->bt_cursor;
88
if (F_ISSET(c, CURS_INIT)) {
89
if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
90
return (RET_SPECIAL);
91
if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
92
return (RET_ERROR);
93
94
/*
95
* If the page is about to be emptied, we'll need to
96
* delete it, which means we have to acquire a stack.
97
*/
98
if (NEXTINDEX(h) == 1)
99
if (__bt_stkacq(t, &h, &t->bt_cursor))
100
return (RET_ERROR);
101
102
status = __bt_dleaf(t, NULL, h, c->pg.index);
103
104
if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
105
if (__bt_pdelete(t, h))
106
return (RET_ERROR);
107
} else
108
mpool_put(t->bt_mp,
109
h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
110
break;
111
}
112
/* FALLTHROUGH */
113
default:
114
errno = EINVAL;
115
return (RET_ERROR);
116
}
117
if (status == RET_SUCCESS)
118
F_SET(t, B_MODIFIED);
119
return (status);
120
}
121
122
/*
123
* __bt_stkacq --
124
* Acquire a stack so we can delete a cursor entry.
125
*
126
* Parameters:
127
* t: tree
128
* hp: pointer to current, pinned PAGE pointer
129
* c: pointer to the cursor
130
*
131
* Returns:
132
* 0 on success, 1 on failure
133
*/
134
static int
135
__bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c)
136
{
137
BINTERNAL *bi;
138
EPG *e;
139
EPGNO *parent;
140
PAGE *h;
141
indx_t idx;
142
pgno_t pgno;
143
recno_t nextpg, prevpg;
144
int exact, level;
145
146
/*
147
* Find the first occurrence of the key in the tree. Toss the
148
* currently locked page so we don't hit an already-locked page.
149
*/
150
h = *hp;
151
mpool_put(t->bt_mp, h, 0);
152
if ((e = __bt_search(t, &c->key, &exact)) == NULL)
153
return (1);
154
h = e->page;
155
156
/* See if we got it in one shot. */
157
if (h->pgno == c->pg.pgno)
158
goto ret;
159
160
/*
161
* Move right, looking for the page. At each move we have to move
162
* up the stack until we don't have to move to the next page. If
163
* we have to change pages at an internal level, we have to fix the
164
* stack back up.
165
*/
166
while (h->pgno != c->pg.pgno) {
167
if ((nextpg = h->nextpg) == P_INVALID)
168
break;
169
mpool_put(t->bt_mp, h, 0);
170
171
/* Move up the stack. */
172
for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
173
/* Get the parent page. */
174
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
175
return (1);
176
177
/* Move to the next index. */
178
if (parent->index != NEXTINDEX(h) - 1) {
179
idx = parent->index + 1;
180
BT_PUSH(t, h->pgno, idx);
181
break;
182
}
183
mpool_put(t->bt_mp, h, 0);
184
}
185
186
/* Restore the stack. */
187
while (level--) {
188
/* Push the next level down onto the stack. */
189
bi = GETBINTERNAL(h, idx);
190
pgno = bi->pgno;
191
BT_PUSH(t, pgno, 0);
192
193
/* Lose the currently pinned page. */
194
mpool_put(t->bt_mp, h, 0);
195
196
/* Get the next level down. */
197
if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
198
return (1);
199
idx = 0;
200
}
201
mpool_put(t->bt_mp, h, 0);
202
if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
203
return (1);
204
}
205
206
if (h->pgno == c->pg.pgno)
207
goto ret;
208
209
/* Reacquire the original stack. */
210
mpool_put(t->bt_mp, h, 0);
211
if ((e = __bt_search(t, &c->key, &exact)) == NULL)
212
return (1);
213
h = e->page;
214
215
/*
216
* Move left, looking for the page. At each move we have to move
217
* up the stack until we don't have to change pages to move to the
218
* next page. If we have to change pages at an internal level, we
219
* have to fix the stack back up.
220
*/
221
while (h->pgno != c->pg.pgno) {
222
if ((prevpg = h->prevpg) == P_INVALID)
223
break;
224
mpool_put(t->bt_mp, h, 0);
225
226
/* Move up the stack. */
227
for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
228
/* Get the parent page. */
229
if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
230
return (1);
231
232
/* Move to the next index. */
233
if (parent->index != 0) {
234
idx = parent->index - 1;
235
BT_PUSH(t, h->pgno, idx);
236
break;
237
}
238
mpool_put(t->bt_mp, h, 0);
239
}
240
241
/* Restore the stack. */
242
while (level--) {
243
/* Push the next level down onto the stack. */
244
bi = GETBINTERNAL(h, idx);
245
pgno = bi->pgno;
246
247
/* Lose the currently pinned page. */
248
mpool_put(t->bt_mp, h, 0);
249
250
/* Get the next level down. */
251
if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
252
return (1);
253
254
idx = NEXTINDEX(h) - 1;
255
BT_PUSH(t, pgno, idx);
256
}
257
mpool_put(t->bt_mp, h, 0);
258
if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
259
return (1);
260
}
261
262
263
ret: mpool_put(t->bt_mp, h, 0);
264
return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
265
}
266
267
/*
268
* __bt_bdelete --
269
* Delete all key/data pairs matching the specified key.
270
*
271
* Parameters:
272
* t: tree
273
* key: key to delete
274
*
275
* Returns:
276
* RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
277
*/
278
static int
279
__bt_bdelete(BTREE *t, const DBT *key)
280
{
281
EPG *e;
282
PAGE *h;
283
int deleted, exact, redo;
284
285
deleted = 0;
286
287
/* Find any matching record; __bt_search pins the page. */
288
loop: if ((e = __bt_search(t, key, &exact)) == NULL)
289
return (deleted ? RET_SUCCESS : RET_ERROR);
290
if (!exact) {
291
mpool_put(t->bt_mp, e->page, 0);
292
return (deleted ? RET_SUCCESS : RET_SPECIAL);
293
}
294
295
/*
296
* Delete forward, then delete backward, from the found key. If
297
* there are duplicates and we reach either side of the page, do
298
* the key search again, so that we get them all.
299
*/
300
redo = 0;
301
h = e->page;
302
do {
303
if (__bt_dleaf(t, key, h, e->index)) {
304
mpool_put(t->bt_mp, h, 0);
305
return (RET_ERROR);
306
}
307
if (F_ISSET(t, B_NODUPS)) {
308
if (NEXTINDEX(h) == 0) {
309
if (__bt_pdelete(t, h))
310
return (RET_ERROR);
311
} else
312
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
313
return (RET_SUCCESS);
314
}
315
deleted = 1;
316
} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
317
318
/* Check for right-hand edge of the page. */
319
if (e->index == NEXTINDEX(h))
320
redo = 1;
321
322
/* Delete from the key to the beginning of the page. */
323
while (e->index-- > 0) {
324
if (__bt_cmp(t, key, e) != 0)
325
break;
326
if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
327
mpool_put(t->bt_mp, h, 0);
328
return (RET_ERROR);
329
}
330
if (e->index == 0)
331
redo = 1;
332
}
333
334
/* Check for an empty page. */
335
if (NEXTINDEX(h) == 0) {
336
if (__bt_pdelete(t, h))
337
return (RET_ERROR);
338
goto loop;
339
}
340
341
/* Put the page. */
342
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
343
344
if (redo)
345
goto loop;
346
return (RET_SUCCESS);
347
}
348
349
/*
350
* __bt_pdelete --
351
* Delete a single page from the tree.
352
*
353
* Parameters:
354
* t: tree
355
* h: leaf page
356
*
357
* Returns:
358
* RET_SUCCESS, RET_ERROR.
359
*
360
* Side-effects:
361
* mpool_put's the page
362
*/
363
static int
364
__bt_pdelete(BTREE *t, PAGE *h)
365
{
366
BINTERNAL *bi;
367
PAGE *pg;
368
EPGNO *parent;
369
indx_t cnt, idx, *ip, offset;
370
u_int32_t nksize;
371
char *from;
372
373
/*
374
* Walk the parent page stack -- a LIFO stack of the pages that were
375
* traversed when we searched for the page where the delete occurred.
376
* Each stack entry is a page number and a page index offset. The
377
* offset is for the page traversed on the search. We've just deleted
378
* a page, so we have to delete the key from the parent page.
379
*
380
* If the delete from the parent page makes it empty, this process may
381
* continue all the way up the tree. We stop if we reach the root page
382
* (which is never deleted, it's just not worth the effort) or if the
383
* delete does not empty the page.
384
*/
385
while ((parent = BT_POP(t)) != NULL) {
386
/* Get the parent page. */
387
if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
388
return (RET_ERROR);
389
390
idx = parent->index;
391
bi = GETBINTERNAL(pg, idx);
392
393
/* Free any overflow pages. */
394
if (bi->flags & P_BIGKEY &&
395
__ovfl_delete(t, bi->bytes) == RET_ERROR) {
396
mpool_put(t->bt_mp, pg, 0);
397
return (RET_ERROR);
398
}
399
400
/*
401
* Free the parent if it has only the one key and it's not the
402
* root page. If it's the rootpage, turn it back into an empty
403
* leaf page.
404
*/
405
if (NEXTINDEX(pg) == 1) {
406
if (pg->pgno == P_ROOT) {
407
pg->lower = BTDATAOFF;
408
pg->upper = t->bt_psize;
409
pg->flags = P_BLEAF;
410
} else {
411
if (__bt_relink(t, pg) || __bt_free(t, pg))
412
return (RET_ERROR);
413
continue;
414
}
415
} else {
416
/* Pack remaining key items at the end of the page. */
417
nksize = NBINTERNAL(bi->ksize);
418
from = (char *)pg + pg->upper;
419
memmove(from + nksize, from, (char *)bi - from);
420
pg->upper += nksize;
421
422
/* Adjust indices' offsets, shift the indices down. */
423
offset = pg->linp[idx];
424
for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
425
if (ip[0] < offset)
426
ip[0] += nksize;
427
for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
428
ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
429
pg->lower -= sizeof(indx_t);
430
}
431
432
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
433
break;
434
}
435
436
/* Free the leaf page, as long as it wasn't the root. */
437
if (h->pgno == P_ROOT) {
438
mpool_put(t->bt_mp, h, MPOOL_DIRTY);
439
return (RET_SUCCESS);
440
}
441
return (__bt_relink(t, h) || __bt_free(t, h));
442
}
443
444
/*
445
* __bt_dleaf --
446
* Delete a single record from a leaf page.
447
*
448
* Parameters:
449
* t: tree
450
* key: referenced key
451
* h: page
452
* idx: index on page to delete
453
*
454
* Returns:
455
* RET_SUCCESS, RET_ERROR.
456
*/
457
int
458
__bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx)
459
{
460
BLEAF *bl;
461
indx_t cnt, *ip, offset;
462
u_int32_t nbytes;
463
void *to;
464
char *from;
465
466
/* If this record is referenced by the cursor, delete the cursor. */
467
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
468
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
469
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
470
__bt_curdel(t, key, h, idx))
471
return (RET_ERROR);
472
473
/* If the entry uses overflow pages, make them available for reuse. */
474
to = bl = GETBLEAF(h, idx);
475
if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
476
return (RET_ERROR);
477
if (bl->flags & P_BIGDATA &&
478
__ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
479
return (RET_ERROR);
480
481
/* Pack the remaining key/data items at the end of the page. */
482
nbytes = NBLEAF(bl);
483
from = (char *)h + h->upper;
484
memmove(from + nbytes, from, (char *)to - from);
485
h->upper += nbytes;
486
487
/* Adjust the indices' offsets, shift the indices down. */
488
offset = h->linp[idx];
489
for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
490
if (ip[0] < offset)
491
ip[0] += nbytes;
492
for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
493
ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
494
h->lower -= sizeof(indx_t);
495
496
/* If the cursor is on this page, adjust it as necessary. */
497
if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
498
!F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
499
t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
500
--t->bt_cursor.pg.index;
501
502
return (RET_SUCCESS);
503
}
504
505
/*
506
* __bt_curdel --
507
* Delete the cursor.
508
*
509
* Parameters:
510
* t: tree
511
* key: referenced key (or NULL)
512
* h: page
513
* idx: index on page to delete
514
*
515
* Returns:
516
* RET_SUCCESS, RET_ERROR.
517
*/
518
static int
519
__bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx)
520
{
521
CURSOR *c;
522
EPG e;
523
PAGE *pg;
524
int curcopy, status;
525
526
/*
527
* If there are duplicates, move forward or backward to one.
528
* Otherwise, copy the key into the cursor area.
529
*/
530
c = &t->bt_cursor;
531
F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
532
533
curcopy = 0;
534
if (!F_ISSET(t, B_NODUPS)) {
535
/*
536
* We're going to have to do comparisons. If we weren't
537
* provided a copy of the key, i.e. the user is deleting
538
* the current cursor position, get one.
539
*/
540
if (key == NULL) {
541
e.page = h;
542
e.index = idx;
543
if ((status = __bt_ret(t, &e,
544
&c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
545
return (status);
546
curcopy = 1;
547
key = &c->key;
548
}
549
/* Check previous key, if not at the beginning of the page. */
550
if (idx > 0) {
551
e.page = h;
552
e.index = idx - 1;
553
if (__bt_cmp(t, key, &e) == 0) {
554
F_SET(c, CURS_BEFORE);
555
goto dup2;
556
}
557
}
558
/* Check next key, if not at the end of the page. */
559
if (idx < NEXTINDEX(h) - 1) {
560
e.page = h;
561
e.index = idx + 1;
562
if (__bt_cmp(t, key, &e) == 0) {
563
F_SET(c, CURS_AFTER);
564
goto dup2;
565
}
566
}
567
/* Check previous key if at the beginning of the page. */
568
if (idx == 0 && h->prevpg != P_INVALID) {
569
if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
570
return (RET_ERROR);
571
e.page = pg;
572
e.index = NEXTINDEX(pg) - 1;
573
if (__bt_cmp(t, key, &e) == 0) {
574
F_SET(c, CURS_BEFORE);
575
goto dup1;
576
}
577
mpool_put(t->bt_mp, pg, 0);
578
}
579
/* Check next key if at the end of the page. */
580
if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
581
if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
582
return (RET_ERROR);
583
e.page = pg;
584
e.index = 0;
585
if (__bt_cmp(t, key, &e) == 0) {
586
F_SET(c, CURS_AFTER);
587
dup1: mpool_put(t->bt_mp, pg, 0);
588
dup2: c->pg.pgno = e.page->pgno;
589
c->pg.index = e.index;
590
return (RET_SUCCESS);
591
}
592
mpool_put(t->bt_mp, pg, 0);
593
}
594
}
595
e.page = h;
596
e.index = idx;
597
if (curcopy || (status =
598
__bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
599
F_SET(c, CURS_ACQUIRE);
600
return (RET_SUCCESS);
601
}
602
return (status);
603
}
604
605
/*
606
* __bt_relink --
607
* Link around a deleted page.
608
*
609
* Parameters:
610
* t: tree
611
* h: page to be deleted
612
*/
613
static int
614
__bt_relink(BTREE *t, PAGE *h)
615
{
616
PAGE *pg;
617
618
if (h->nextpg != P_INVALID) {
619
if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
620
return (RET_ERROR);
621
pg->prevpg = h->prevpg;
622
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
623
}
624
if (h->prevpg != P_INVALID) {
625
if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
626
return (RET_ERROR);
627
pg->nextpg = h->nextpg;
628
mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
629
}
630
return (0);
631
}
632
633