Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/libdivsufsort/lib/utils.c
39536 views
1
/*
2
* utils.c for libdivsufsort
3
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person
6
* obtaining a copy of this software and associated documentation
7
* files (the "Software"), to deal in the Software without
8
* restriction, including without limitation the rights to use,
9
* copy, modify, merge, publish, distribute, sublicense, and/or sell
10
* copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following
12
* conditions:
13
*
14
* The above copyright notice and this permission notice shall be
15
* included in all copies or substantial portions of the Software.
16
*
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
* OTHER DEALINGS IN THE SOFTWARE.
25
*/
26
27
#include "divsufsort_private.h"
28
29
30
/*- Private Function -*/
31
32
/* Binary search for inverse bwt. */
33
static
34
saidx_t
35
binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
36
saidx_t half, i;
37
for(i = 0, half = size >> 1;
38
0 < size;
39
size = half, half >>= 1) {
40
if(A[i + half] < value) {
41
i += half + 1;
42
half -= (size & 1) ^ 1;
43
}
44
}
45
return i;
46
}
47
48
49
/*- Functions -*/
50
51
/* Burrows-Wheeler transform. */
52
saint_t
53
bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
54
saidx_t n, saidx_t *idx) {
55
saidx_t *A, i, j, p, t;
56
saint_t c;
57
58
/* Check arguments. */
59
if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
60
if(n <= 1) {
61
if(n == 1) { U[0] = T[0]; }
62
*idx = n;
63
return 0;
64
}
65
66
if((A = SA) == NULL) {
67
i = divbwt(T, U, NULL, n);
68
if(0 <= i) { *idx = i; i = 0; }
69
return (saint_t)i;
70
}
71
72
/* BW transform. */
73
if(T == U) {
74
t = n;
75
for(i = 0, j = 0; i < n; ++i) {
76
p = t - 1;
77
t = A[i];
78
if(0 <= p) {
79
c = T[j];
80
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
81
A[j] = c;
82
j++;
83
} else {
84
*idx = i;
85
}
86
}
87
p = t - 1;
88
if(0 <= p) {
89
c = T[j];
90
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
91
A[j] = c;
92
} else {
93
*idx = i;
94
}
95
} else {
96
U[0] = T[n - 1];
97
for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
98
*idx = i + 1;
99
for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
100
}
101
102
if(SA == NULL) {
103
/* Deallocate memory. */
104
free(A);
105
}
106
107
return 0;
108
}
109
110
/* Inverse Burrows-Wheeler transform. */
111
saint_t
112
inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
113
saidx_t n, saidx_t idx) {
114
saidx_t C[ALPHABET_SIZE];
115
sauchar_t D[ALPHABET_SIZE];
116
saidx_t *B;
117
saidx_t i, p;
118
saint_t c, d;
119
120
/* Check arguments. */
121
if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
122
(n < idx) || ((0 < n) && (idx == 0))) {
123
return -1;
124
}
125
if(n <= 1) { return 0; }
126
127
if((B = A) == NULL) {
128
/* Allocate n*sizeof(saidx_t) bytes of memory. */
129
if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
130
}
131
132
/* Inverse BW transform. */
133
for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
134
for(i = 0; i < n; ++i) { ++C[T[i]]; }
135
for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
136
p = C[c];
137
if(0 < p) {
138
C[c] = i;
139
D[d++] = (sauchar_t)c;
140
i += p;
141
}
142
}
143
for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
144
for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
145
for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
146
for(i = 0, p = idx; i < n; ++i) {
147
U[i] = D[binarysearch_lower(C, d, p)];
148
p = B[p - 1];
149
}
150
151
if(A == NULL) {
152
/* Deallocate memory. */
153
free(B);
154
}
155
156
return 0;
157
}
158
159
/* Checks the suffix array SA of the string T. */
160
saint_t
161
sufcheck(const sauchar_t *T, const saidx_t *SA,
162
saidx_t n, saint_t verbose) {
163
saidx_t C[ALPHABET_SIZE];
164
saidx_t i, p, q, t;
165
saint_t c;
166
167
if(verbose) { fprintf(stderr, "sufcheck: "); }
168
169
/* Check arguments. */
170
if((T == NULL) || (SA == NULL) || (n < 0)) {
171
if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
172
return -1;
173
}
174
if(n == 0) {
175
if(verbose) { fprintf(stderr, "Done.\n"); }
176
return 0;
177
}
178
179
/* check range: [0..n-1] */
180
for(i = 0; i < n; ++i) {
181
if((SA[i] < 0) || (n <= SA[i])) {
182
if(verbose) {
183
fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
184
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
185
n - 1, i, SA[i]);
186
}
187
return -2;
188
}
189
}
190
191
/* check first characters. */
192
for(i = 1; i < n; ++i) {
193
if(T[SA[i - 1]] > T[SA[i]]) {
194
if(verbose) {
195
fprintf(stderr, "Suffixes in wrong order.\n"
196
" T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
197
" > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
198
i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
199
}
200
return -3;
201
}
202
}
203
204
/* check suffixes. */
205
for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
206
for(i = 0; i < n; ++i) { ++C[T[i]]; }
207
for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
208
t = C[i];
209
C[i] = p;
210
p += t;
211
}
212
213
q = C[T[n - 1]];
214
C[T[n - 1]] += 1;
215
for(i = 0; i < n; ++i) {
216
p = SA[i];
217
if(0 < p) {
218
c = T[--p];
219
t = C[c];
220
} else {
221
c = T[p = n - 1];
222
t = q;
223
}
224
if((t < 0) || (p != SA[t])) {
225
if(verbose) {
226
fprintf(stderr, "Suffix in wrong position.\n"
227
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
228
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
229
t, (0 <= t) ? SA[t] : -1, i, SA[i]);
230
}
231
return -4;
232
}
233
if(t != q) {
234
++C[c];
235
if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
236
}
237
}
238
239
if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
240
return 0;
241
}
242
243
244
static
245
int
246
_compare(const sauchar_t *T, saidx_t Tsize,
247
const sauchar_t *P, saidx_t Psize,
248
saidx_t suf, saidx_t *match) {
249
saidx_t i, j;
250
saint_t r;
251
for(i = suf + *match, j = *match, r = 0;
252
(i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
253
*match = j;
254
return (r == 0) ? -(j != Psize) : r;
255
}
256
257
/* Search for the pattern P in the string T. */
258
saidx_t
259
sa_search(const sauchar_t *T, saidx_t Tsize,
260
const sauchar_t *P, saidx_t Psize,
261
const saidx_t *SA, saidx_t SAsize,
262
saidx_t *idx) {
263
saidx_t size, lsize, rsize, half;
264
saidx_t match, lmatch, rmatch;
265
saidx_t llmatch, lrmatch, rlmatch, rrmatch;
266
saidx_t i, j, k;
267
saint_t r;
268
269
if(idx != NULL) { *idx = -1; }
270
if((T == NULL) || (P == NULL) || (SA == NULL) ||
271
(Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
272
if((Tsize == 0) || (SAsize == 0)) { return 0; }
273
if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
274
275
for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
276
0 < size;
277
size = half, half >>= 1) {
278
match = MIN(lmatch, rmatch);
279
r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
280
if(r < 0) {
281
i += half + 1;
282
half -= (size & 1) ^ 1;
283
lmatch = match;
284
} else if(r > 0) {
285
rmatch = match;
286
} else {
287
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
288
289
/* left part */
290
for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
291
0 < lsize;
292
lsize = half, half >>= 1) {
293
lmatch = MIN(llmatch, lrmatch);
294
r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
295
if(r < 0) {
296
j += half + 1;
297
half -= (lsize & 1) ^ 1;
298
llmatch = lmatch;
299
} else {
300
lrmatch = lmatch;
301
}
302
}
303
304
/* right part */
305
for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
306
0 < rsize;
307
rsize = half, half >>= 1) {
308
rmatch = MIN(rlmatch, rrmatch);
309
r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
310
if(r <= 0) {
311
k += half + 1;
312
half -= (rsize & 1) ^ 1;
313
rlmatch = rmatch;
314
} else {
315
rrmatch = rmatch;
316
}
317
}
318
319
break;
320
}
321
}
322
323
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
324
return k - j;
325
}
326
327
/* Search for the character c in the string T. */
328
saidx_t
329
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
330
const saidx_t *SA, saidx_t SAsize,
331
saint_t c, saidx_t *idx) {
332
saidx_t size, lsize, rsize, half;
333
saidx_t i, j, k, p;
334
saint_t r;
335
336
if(idx != NULL) { *idx = -1; }
337
if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
338
if((Tsize == 0) || (SAsize == 0)) { return 0; }
339
340
for(i = j = k = 0, size = SAsize, half = size >> 1;
341
0 < size;
342
size = half, half >>= 1) {
343
p = SA[i + half];
344
r = (p < Tsize) ? T[p] - c : -1;
345
if(r < 0) {
346
i += half + 1;
347
half -= (size & 1) ^ 1;
348
} else if(r == 0) {
349
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
350
351
/* left part */
352
for(half = lsize >> 1;
353
0 < lsize;
354
lsize = half, half >>= 1) {
355
p = SA[j + half];
356
r = (p < Tsize) ? T[p] - c : -1;
357
if(r < 0) {
358
j += half + 1;
359
half -= (lsize & 1) ^ 1;
360
}
361
}
362
363
/* right part */
364
for(half = rsize >> 1;
365
0 < rsize;
366
rsize = half, half >>= 1) {
367
p = SA[k + half];
368
r = (p < Tsize) ? T[p] - c : -1;
369
if(r <= 0) {
370
k += half + 1;
371
half -= (rsize & 1) ^ 1;
372
}
373
}
374
375
break;
376
}
377
}
378
379
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
380
return k - j;
381
}
382
383