/*-1* SPDX-License-Identifier: BSD-3-Clause2*3* Copyright (c) 1990, 1993, 19944* The Regents of the University of California. All rights reserved.5*6* This code is derived from software contributed to Berkeley by7* Mike Olson.8*9* Redistribution and use in source and binary forms, with or without10* modification, are permitted provided that the following conditions11* are met:12* 1. Redistributions of source code must retain the above copyright13* notice, this list of conditions and the following disclaimer.14* 2. Redistributions in binary form must reproduce the above copyright15* notice, this list of conditions and the following disclaimer in the16* documentation and/or other materials provided with the distribution.17* 3. Neither the name of the University nor the names of its contributors18* may be used to endorse or promote products derived from this software19* without specific prior written permission.20*21* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND22* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE23* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE24* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE25* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL26* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS27* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)28* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT29* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY30* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF31* SUCH DAMAGE.32*/3334#include <sys/types.h>3536#include <errno.h>37#include <stddef.h>38#include <stdio.h>39#include <stdlib.h>4041#include <db.h>42#include "btree.h"4344static int __bt_first(BTREE *, const DBT *, EPG *, int *);45static int __bt_seqadv(BTREE *, EPG *, int);46static int __bt_seqset(BTREE *, EPG *, DBT *, int);4748/*49* Sequential scan support.50*51* The tree can be scanned sequentially, starting from either end of the52* tree or from any specific key. A scan request before any scanning is53* done is initialized as starting from the least node.54*/5556/*57* __bt_seq --58* Btree sequential scan interface.59*60* Parameters:61* dbp: pointer to access method62* key: key for positioning and return value63* data: data return value64* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.65*66* Returns:67* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.68*/69int70__bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)71{72BTREE *t;73EPG e;74int status;7576t = dbp->internal;7778/* Toss any page pinned across calls. */79if (t->bt_pinned != NULL) {80mpool_put(t->bt_mp, t->bt_pinned, 0);81t->bt_pinned = NULL;82}8384/*85* If scan uninitialized as yet, or starting at a specific record, set86* the scan to a specific key. Both __bt_seqset and __bt_seqadv pin87* the page the cursor references if they're successful.88*/89switch (flags) {90case R_NEXT:91case R_PREV:92if (F_ISSET(&t->bt_cursor, CURS_INIT)) {93status = __bt_seqadv(t, &e, flags);94break;95}96/* FALLTHROUGH */97case R_FIRST:98case R_LAST:99case R_CURSOR:100status = __bt_seqset(t, &e, key, flags);101break;102default:103errno = EINVAL;104return (RET_ERROR);105}106107if (status == RET_SUCCESS) {108__bt_setcur(t, e.page->pgno, e.index);109110status =111__bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);112113/*114* If the user is doing concurrent access, we copied the115* key/data, toss the page.116*/117if (F_ISSET(t, B_DB_LOCK))118mpool_put(t->bt_mp, e.page, 0);119else120t->bt_pinned = e.page;121}122return (status);123}124125/*126* __bt_seqset --127* Set the sequential scan to a specific key.128*129* Parameters:130* t: tree131* ep: storage for returned key132* key: key for initial scan position133* flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV134*135* Side effects:136* Pins the page the cursor references.137*138* Returns:139* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.140*/141static int142__bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags)143{144PAGE *h;145pgno_t pg;146int exact;147148/*149* Find the first, last or specific key in the tree and point the150* cursor at it. The cursor may not be moved until a new key has151* been found.152*/153switch (flags) {154case R_CURSOR: /* Keyed scan. */155/*156* Find the first instance of the key or the smallest key157* which is greater than or equal to the specified key.158*/159if (key->data == NULL || key->size == 0) {160errno = EINVAL;161return (RET_ERROR);162}163return (__bt_first(t, key, ep, &exact));164case R_FIRST: /* First record. */165case R_NEXT:166/* Walk down the left-hand side of the tree. */167for (pg = P_ROOT;;) {168if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)169return (RET_ERROR);170171/* Check for an empty tree. */172if (NEXTINDEX(h) == 0) {173mpool_put(t->bt_mp, h, 0);174return (RET_SPECIAL);175}176177if (h->flags & (P_BLEAF | P_RLEAF))178break;179pg = GETBINTERNAL(h, 0)->pgno;180mpool_put(t->bt_mp, h, 0);181}182ep->page = h;183ep->index = 0;184break;185case R_LAST: /* Last record. */186case R_PREV:187/* Walk down the right-hand side of the tree. */188for (pg = P_ROOT;;) {189if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)190return (RET_ERROR);191192/* Check for an empty tree. */193if (NEXTINDEX(h) == 0) {194mpool_put(t->bt_mp, h, 0);195return (RET_SPECIAL);196}197198if (h->flags & (P_BLEAF | P_RLEAF))199break;200pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;201mpool_put(t->bt_mp, h, 0);202}203204ep->page = h;205ep->index = NEXTINDEX(h) - 1;206break;207}208return (RET_SUCCESS);209}210211/*212* __bt_seqadvance --213* Advance the sequential scan.214*215* Parameters:216* t: tree217* flags: R_NEXT, R_PREV218*219* Side effects:220* Pins the page the new key/data record is on.221*222* Returns:223* RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.224*/225static int226__bt_seqadv(BTREE *t, EPG *ep, int flags)227{228CURSOR *c;229PAGE *h;230indx_t idx;231pgno_t pg;232int exact;233234/*235* There are a couple of states that we can be in. The cursor has236* been initialized by the time we get here, but that's all we know.237*/238c = &t->bt_cursor;239240/*241* The cursor was deleted where there weren't any duplicate records,242* so the key was saved. Find out where that key would go in the243* current tree. It doesn't matter if the returned key is an exact244* match or not -- if it's an exact match, the record was added after245* the delete so we can just return it. If not, as long as there's246* a record there, return it.247*/248if (F_ISSET(c, CURS_ACQUIRE))249return (__bt_first(t, &c->key, ep, &exact));250251/* Get the page referenced by the cursor. */252if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)253return (RET_ERROR);254255/*256* Find the next/previous record in the tree and point the cursor at257* it. The cursor may not be moved until a new key has been found.258*/259switch (flags) {260case R_NEXT: /* Next record. */261/*262* The cursor was deleted in duplicate records, and moved263* forward to a record that has yet to be returned. Clear264* that flag, and return the record.265*/266if (F_ISSET(c, CURS_AFTER))267goto usecurrent;268idx = c->pg.index;269if (++idx == NEXTINDEX(h)) {270pg = h->nextpg;271mpool_put(t->bt_mp, h, 0);272if (pg == P_INVALID)273return (RET_SPECIAL);274if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)275return (RET_ERROR);276idx = 0;277}278break;279case R_PREV: /* Previous record. */280/*281* The cursor was deleted in duplicate records, and moved282* backward to a record that has yet to be returned. Clear283* that flag, and return the record.284*/285if (F_ISSET(c, CURS_BEFORE)) {286usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE);287ep->page = h;288ep->index = c->pg.index;289return (RET_SUCCESS);290}291idx = c->pg.index;292if (idx == 0) {293pg = h->prevpg;294mpool_put(t->bt_mp, h, 0);295if (pg == P_INVALID)296return (RET_SPECIAL);297if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)298return (RET_ERROR);299idx = NEXTINDEX(h) - 1;300} else301--idx;302break;303}304305ep->page = h;306ep->index = idx;307return (RET_SUCCESS);308}309310/*311* __bt_first --312* Find the first entry.313*314* Parameters:315* t: the tree316* key: the key317* erval: return EPG318* exactp: pointer to exact match flag319*320* Returns:321* The first entry in the tree greater than or equal to key,322* or RET_SPECIAL if no such key exists.323*/324static int325__bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp)326{327PAGE *h;328EPG *ep, save;329pgno_t pg;330331/*332* Find any matching record; __bt_search pins the page.333*334* If it's an exact match and duplicates are possible, walk backwards335* in the tree until we find the first one. Otherwise, make sure it's336* a valid key (__bt_search may return an index just past the end of a337* page) and return it.338*/339if ((ep = __bt_search(t, key, exactp)) == NULL)340return (0);341if (*exactp) {342if (F_ISSET(t, B_NODUPS)) {343*erval = *ep;344return (RET_SUCCESS);345}346347/*348* Walk backwards, as long as the entry matches and there are349* keys left in the tree. Save a copy of each match in case350* we go too far.351*/352save = *ep;353h = ep->page;354do {355if (save.page->pgno != ep->page->pgno) {356mpool_put(t->bt_mp, save.page, 0);357save = *ep;358} else359save.index = ep->index;360361/*362* Don't unpin the page the last (or original) match363* was on, but make sure it's unpinned if an error364* occurs.365*/366if (ep->index == 0) {367if (h->prevpg == P_INVALID)368break;369if (h->pgno != save.page->pgno)370mpool_put(t->bt_mp, h, 0);371if ((h = mpool_get(t->bt_mp,372h->prevpg, 0)) == NULL) {373if (h->pgno == save.page->pgno)374mpool_put(t->bt_mp,375save.page, 0);376return (RET_ERROR);377}378ep->page = h;379ep->index = NEXTINDEX(h);380}381--ep->index;382} while (__bt_cmp(t, key, ep) == 0);383384/*385* Reach here with the last page that was looked at pinned,386* which may or may not be the same as the last (or original)387* match page. If it's not useful, release it.388*/389if (h->pgno != save.page->pgno)390mpool_put(t->bt_mp, h, 0);391392*erval = save;393return (RET_SUCCESS);394}395396/* If at the end of a page, find the next entry. */397if (ep->index == NEXTINDEX(ep->page)) {398h = ep->page;399pg = h->nextpg;400mpool_put(t->bt_mp, h, 0);401if (pg == P_INVALID)402return (RET_SPECIAL);403if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)404return (RET_ERROR);405ep->index = 0;406ep->page = h;407}408*erval = *ep;409return (RET_SUCCESS);410}411412/*413* __bt_setcur --414* Set the cursor to an entry in the tree.415*416* Parameters:417* t: the tree418* pgno: page number419* idx: page index420*/421void422__bt_setcur(BTREE *t, pgno_t pgno, u_int idx)423{424/* Lose any already deleted key. */425if (t->bt_cursor.key.data != NULL) {426free(t->bt_cursor.key.data);427t->bt_cursor.key.size = 0;428t->bt_cursor.key.data = NULL;429}430F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);431432/* Update the cursor. */433t->bt_cursor.pg.pgno = pgno;434t->bt_cursor.pg.index = idx;435F_SET(&t->bt_cursor, CURS_INIT);436}437438439