Path: blob/main/contrib/libdivsufsort/lib/utils.c
103373 views
/*1* utils.c for libdivsufsort2* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person5* obtaining a copy of this software and associated documentation6* files (the "Software"), to deal in the Software without7* restriction, including without limitation the rights to use,8* copy, modify, merge, publish, distribute, sublicense, and/or sell9* copies of the Software, and to permit persons to whom the10* Software is furnished to do so, subject to the following11* conditions:12*13* The above copyright notice and this permission notice shall be14* included in all copies or substantial portions of the Software.15*16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,17* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES18* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND19* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT20* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,21* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING22* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR23* OTHER DEALINGS IN THE SOFTWARE.24*/2526#include "divsufsort_private.h"272829/*- Private Function -*/3031/* Binary search for inverse bwt. */32static33saidx_t34binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {35saidx_t half, i;36for(i = 0, half = size >> 1;370 < size;38size = half, half >>= 1) {39if(A[i + half] < value) {40i += half + 1;41half -= (size & 1) ^ 1;42}43}44return i;45}464748/*- Functions -*/4950/* Burrows-Wheeler transform. */51saint_t52bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,53saidx_t n, saidx_t *idx) {54saidx_t *A, i, j, p, t;55saint_t c;5657/* Check arguments. */58if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }59if(n <= 1) {60if(n == 1) { U[0] = T[0]; }61*idx = n;62return 0;63}6465if((A = SA) == NULL) {66i = divbwt(T, U, NULL, n);67if(0 <= i) { *idx = i; i = 0; }68return (saint_t)i;69}7071/* BW transform. */72if(T == U) {73t = n;74for(i = 0, j = 0; i < n; ++i) {75p = t - 1;76t = A[i];77if(0 <= p) {78c = T[j];79U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];80A[j] = c;81j++;82} else {83*idx = i;84}85}86p = t - 1;87if(0 <= p) {88c = T[j];89U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];90A[j] = c;91} else {92*idx = i;93}94} else {95U[0] = T[n - 1];96for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }97*idx = i + 1;98for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }99}100101if(SA == NULL) {102/* Deallocate memory. */103free(A);104}105106return 0;107}108109/* Inverse Burrows-Wheeler transform. */110saint_t111inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,112saidx_t n, saidx_t idx) {113saidx_t C[ALPHABET_SIZE];114sauchar_t D[ALPHABET_SIZE];115saidx_t *B;116saidx_t i, p;117saint_t c, d;118119/* Check arguments. */120if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||121(n < idx) || ((0 < n) && (idx == 0))) {122return -1;123}124if(n <= 1) { return 0; }125126if((B = A) == NULL) {127/* Allocate n*sizeof(saidx_t) bytes of memory. */128if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }129}130131/* Inverse BW transform. */132for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }133for(i = 0; i < n; ++i) { ++C[T[i]]; }134for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {135p = C[c];136if(0 < p) {137C[c] = i;138D[d++] = (sauchar_t)c;139i += p;140}141}142for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }143for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }144for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }145for(i = 0, p = idx; i < n; ++i) {146U[i] = D[binarysearch_lower(C, d, p)];147p = B[p - 1];148}149150if(A == NULL) {151/* Deallocate memory. */152free(B);153}154155return 0;156}157158/* Checks the suffix array SA of the string T. */159saint_t160sufcheck(const sauchar_t *T, const saidx_t *SA,161saidx_t n, saint_t verbose) {162saidx_t C[ALPHABET_SIZE];163saidx_t i, p, q, t;164saint_t c;165166if(verbose) { fprintf(stderr, "sufcheck: "); }167168/* Check arguments. */169if((T == NULL) || (SA == NULL) || (n < 0)) {170if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }171return -1;172}173if(n == 0) {174if(verbose) { fprintf(stderr, "Done.\n"); }175return 0;176}177178/* check range: [0..n-1] */179for(i = 0; i < n; ++i) {180if((SA[i] < 0) || (n <= SA[i])) {181if(verbose) {182fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"183" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",184n - 1, i, SA[i]);185}186return -2;187}188}189190/* check first characters. */191for(i = 1; i < n; ++i) {192if(T[SA[i - 1]] > T[SA[i]]) {193if(verbose) {194fprintf(stderr, "Suffixes in wrong order.\n"195" T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"196" > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",197i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);198}199return -3;200}201}202203/* check suffixes. */204for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }205for(i = 0; i < n; ++i) { ++C[T[i]]; }206for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {207t = C[i];208C[i] = p;209p += t;210}211212q = C[T[n - 1]];213C[T[n - 1]] += 1;214for(i = 0; i < n; ++i) {215p = SA[i];216if(0 < p) {217c = T[--p];218t = C[c];219} else {220c = T[p = n - 1];221t = q;222}223if((t < 0) || (p != SA[t])) {224if(verbose) {225fprintf(stderr, "Suffix in wrong position.\n"226" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"227" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",228t, (0 <= t) ? SA[t] : -1, i, SA[i]);229}230return -4;231}232if(t != q) {233++C[c];234if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }235}236}237238if(1 <= verbose) { fprintf(stderr, "Done.\n"); }239return 0;240}241242243static244int245_compare(const sauchar_t *T, saidx_t Tsize,246const sauchar_t *P, saidx_t Psize,247saidx_t suf, saidx_t *match) {248saidx_t i, j;249saint_t r;250for(i = suf + *match, j = *match, r = 0;251(i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }252*match = j;253return (r == 0) ? -(j != Psize) : r;254}255256/* Search for the pattern P in the string T. */257saidx_t258sa_search(const sauchar_t *T, saidx_t Tsize,259const sauchar_t *P, saidx_t Psize,260const saidx_t *SA, saidx_t SAsize,261saidx_t *idx) {262saidx_t size, lsize, rsize, half;263saidx_t match, lmatch, rmatch;264saidx_t llmatch, lrmatch, rlmatch, rrmatch;265saidx_t i, j, k;266saint_t r;267268if(idx != NULL) { *idx = -1; }269if((T == NULL) || (P == NULL) || (SA == NULL) ||270(Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }271if((Tsize == 0) || (SAsize == 0)) { return 0; }272if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }273274for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;2750 < size;276size = half, half >>= 1) {277match = MIN(lmatch, rmatch);278r = _compare(T, Tsize, P, Psize, SA[i + half], &match);279if(r < 0) {280i += half + 1;281half -= (size & 1) ^ 1;282lmatch = match;283} else if(r > 0) {284rmatch = match;285} else {286lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;287288/* left part */289for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;2900 < lsize;291lsize = half, half >>= 1) {292lmatch = MIN(llmatch, lrmatch);293r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);294if(r < 0) {295j += half + 1;296half -= (lsize & 1) ^ 1;297llmatch = lmatch;298} else {299lrmatch = lmatch;300}301}302303/* right part */304for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;3050 < rsize;306rsize = half, half >>= 1) {307rmatch = MIN(rlmatch, rrmatch);308r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);309if(r <= 0) {310k += half + 1;311half -= (rsize & 1) ^ 1;312rlmatch = rmatch;313} else {314rrmatch = rmatch;315}316}317318break;319}320}321322if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }323return k - j;324}325326/* Search for the character c in the string T. */327saidx_t328sa_simplesearch(const sauchar_t *T, saidx_t Tsize,329const saidx_t *SA, saidx_t SAsize,330saint_t c, saidx_t *idx) {331saidx_t size, lsize, rsize, half;332saidx_t i, j, k, p;333saint_t r;334335if(idx != NULL) { *idx = -1; }336if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }337if((Tsize == 0) || (SAsize == 0)) { return 0; }338339for(i = j = k = 0, size = SAsize, half = size >> 1;3400 < size;341size = half, half >>= 1) {342p = SA[i + half];343r = (p < Tsize) ? T[p] - c : -1;344if(r < 0) {345i += half + 1;346half -= (size & 1) ^ 1;347} else if(r == 0) {348lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;349350/* left part */351for(half = lsize >> 1;3520 < lsize;353lsize = half, half >>= 1) {354p = SA[j + half];355r = (p < Tsize) ? T[p] - c : -1;356if(r < 0) {357j += half + 1;358half -= (lsize & 1) ^ 1;359}360}361362/* right part */363for(half = rsize >> 1;3640 < rsize;365rsize = half, half >>= 1) {366p = SA[k + half];367r = (p < Tsize) ? T[p] - c : -1;368if(r <= 0) {369k += half + 1;370half -= (rsize & 1) ^ 1;371}372}373374break;375}376}377378if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }379return k - j;380}381382383