Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/icu4c/common/bmpset.cpp
9903 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
* Copyright (C) 2007-2012, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
******************************************************************************
10
* file name: bmpset.cpp
11
* encoding: UTF-8
12
* tab size: 8 (not used)
13
* indentation:4
14
*
15
* created on: 2007jan29
16
* created by: Markus W. Scherer
17
*/
18
19
#include "unicode/utypes.h"
20
#include "unicode/uniset.h"
21
#include "unicode/utf8.h"
22
#include "unicode/utf16.h"
23
#include "cmemory.h"
24
#include "bmpset.h"
25
#include "uassert.h"
26
27
U_NAMESPACE_BEGIN
28
29
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
30
list(parentList), listLength(parentListLength) {
31
uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
32
uprv_memset(table7FF, 0, sizeof(table7FF));
33
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
34
35
/*
36
* Set the list indexes for binary searches for
37
* U+0800, U+1000, U+2000, .., U+F000, U+10000.
38
* U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
39
* looked up in the bit tables.
40
* The last pair of indexes is for finding supplementary code points.
41
*/
42
list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
43
int32_t i;
44
for(i=1; i<=0x10; ++i) {
45
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
46
}
47
list4kStarts[0x11]=listLength-1;
48
containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);
49
50
initBits();
51
overrideIllegal();
52
}
53
54
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
55
containsFFFD(otherBMPSet.containsFFFD),
56
list(newParentList), listLength(newParentListLength) {
57
uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
58
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
59
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
60
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
61
}
62
63
BMPSet::~BMPSet() {
64
}
65
66
/*
67
* Set bits in a bit rectangle in "vertical" bit organization.
68
* start<limit<=0x800
69
*/
70
static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
71
U_ASSERT(start<limit);
72
U_ASSERT(limit<=0x800);
73
74
int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
75
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
76
77
// Set one bit indicating an all-one block.
78
uint32_t bits = static_cast<uint32_t>(1) << lead;
79
if((start+1)==limit) { // Single-character shortcut.
80
table[trail]|=bits;
81
return;
82
}
83
84
int32_t limitLead=limit>>6;
85
int32_t limitTrail=limit&0x3f;
86
87
if(lead==limitLead) {
88
// Partial vertical bit column.
89
while(trail<limitTrail) {
90
table[trail++]|=bits;
91
}
92
} else {
93
// Partial vertical bit column,
94
// followed by a bit rectangle,
95
// followed by another partial vertical bit column.
96
if(trail>0) {
97
do {
98
table[trail++]|=bits;
99
} while(trail<64);
100
++lead;
101
}
102
if(lead<limitLead) {
103
bits = ~((static_cast<unsigned>(1) << lead) - 1);
104
if(limitLead<0x20) {
105
bits &= (static_cast<unsigned>(1) << limitLead) - 1;
106
}
107
for(trail=0; trail<64; ++trail) {
108
table[trail]|=bits;
109
}
110
}
111
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
112
// In that case, bits=1<<limitLead is undefined but the bits value
113
// is not used because trail<limitTrail is already false.
114
bits = static_cast<uint32_t>(1) << ((limitLead == 0x20) ? (limitLead - 1) : limitLead);
115
for(trail=0; trail<limitTrail; ++trail) {
116
table[trail]|=bits;
117
}
118
}
119
}
120
121
void BMPSet::initBits() {
122
UChar32 start, limit;
123
int32_t listIndex=0;
124
125
// Set latin1Contains[].
126
do {
127
start=list[listIndex++];
128
if(listIndex<listLength) {
129
limit=list[listIndex++];
130
} else {
131
limit=0x110000;
132
}
133
if(start>=0x100) {
134
break;
135
}
136
do {
137
latin1Contains[start++]=1;
138
} while(start<limit && start<0x100);
139
} while(limit<=0x100);
140
141
// Find the first range overlapping with (or after) 80..FF again,
142
// to include them in table7FF as well.
143
for(listIndex=0;;) {
144
start=list[listIndex++];
145
if(listIndex<listLength) {
146
limit=list[listIndex++];
147
} else {
148
limit=0x110000;
149
}
150
if(limit>0x80) {
151
if(start<0x80) {
152
start=0x80;
153
}
154
break;
155
}
156
}
157
158
// Set table7FF[].
159
while(start<0x800) {
160
set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
161
if(limit>0x800) {
162
start=0x800;
163
break;
164
}
165
166
start=list[listIndex++];
167
if(listIndex<listLength) {
168
limit=list[listIndex++];
169
} else {
170
limit=0x110000;
171
}
172
}
173
174
// Set bmpBlockBits[].
175
int32_t minStart=0x800;
176
while(start<0x10000) {
177
if(limit>0x10000) {
178
limit=0x10000;
179
}
180
181
if(start<minStart) {
182
start=minStart;
183
}
184
if(start<limit) { // Else: Another range entirely in a known mixed-value block.
185
if(start&0x3f) {
186
// Mixed-value block of 64 code points.
187
start>>=6;
188
bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
189
start=(start+1)<<6; // Round up to the next block boundary.
190
minStart=start; // Ignore further ranges in this block.
191
}
192
if(start<limit) {
193
if(start<(limit&~0x3f)) {
194
// Multiple all-ones blocks of 64 code points each.
195
set32x64Bits(bmpBlockBits, start>>6, limit>>6);
196
}
197
198
if(limit&0x3f) {
199
// Mixed-value block of 64 code points.
200
limit>>=6;
201
bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
202
limit=(limit+1)<<6; // Round up to the next block boundary.
203
minStart=limit; // Ignore further ranges in this block.
204
}
205
}
206
}
207
208
if(limit==0x10000) {
209
break;
210
}
211
212
start=list[listIndex++];
213
if(listIndex<listLength) {
214
limit=list[listIndex++];
215
} else {
216
limit=0x110000;
217
}
218
}
219
}
220
221
/*
222
* Override some bits and bytes to the result of contains(FFFD)
223
* for faster validity checking at runtime.
224
* No need to set 0 values where they were reset to 0 in the constructor
225
* and not modified by initBits().
226
* (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
227
* Need to set 0 values for surrogates D800..DFFF.
228
*/
229
void BMPSet::overrideIllegal() {
230
uint32_t bits, mask;
231
int32_t i;
232
233
if(containsFFFD) {
234
bits=3; // Lead bytes 0xC0 and 0xC1.
235
for(i=0; i<64; ++i) {
236
table7FF[i]|=bits;
237
}
238
239
bits=1; // Lead byte 0xE0.
240
for(i=0; i<32; ++i) { // First half of 4k block.
241
bmpBlockBits[i]|=bits;
242
}
243
244
mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
245
bits=1<<0xd;
246
for(i=32; i<64; ++i) { // Second half of 4k block.
247
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
248
}
249
} else {
250
mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
251
for(i=32; i<64; ++i) { // Second half of 4k block.
252
bmpBlockBits[i]&=mask;
253
}
254
}
255
}
256
257
int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
258
/* Examples:
259
findCodePoint(c)
260
set list[] c=0 1 3 4 7 8
261
=== ============== ===========
262
[] [110000] 0 0 0 0 0 0
263
[\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
264
[\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
265
[:Any:] [0, 110000] 1 1 1 1 1 1
266
*/
267
268
// Return the smallest i such that c < list[i]. Assume
269
// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
270
if (c < list[lo])
271
return lo;
272
// High runner test. c is often after the last range, so an
273
// initial check for this condition pays off.
274
if (lo >= hi || c >= list[hi-1])
275
return hi;
276
// invariant: c >= list[lo]
277
// invariant: c < list[hi]
278
for (;;) {
279
int32_t i = (lo + hi) >> 1;
280
if (i == lo) {
281
break; // Found!
282
} else if (c < list[i]) {
283
hi = i;
284
} else {
285
lo = i;
286
}
287
}
288
return hi;
289
}
290
291
UBool
292
BMPSet::contains(UChar32 c) const {
293
if (static_cast<uint32_t>(c) <= 0xff) {
294
return latin1Contains[c];
295
} else if (static_cast<uint32_t>(c) <= 0x7ff) {
296
return (table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0;
297
} else if (static_cast<uint32_t>(c) < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
298
int lead=c>>12;
299
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
300
if(twoBits<=1) {
301
// All 64 code points with the same bits 15..6
302
// are either in the set or not.
303
return twoBits;
304
} else {
305
// Look up the code point in its 4k block of code points.
306
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
307
}
308
} else if (static_cast<uint32_t>(c) <= 0x10ffff) {
309
// surrogate or supplementary code point
310
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
311
} else {
312
// Out-of-range code points get false, consistent with long-standing
313
// behavior of UnicodeSet::contains(c).
314
return false;
315
}
316
}
317
318
/*
319
* Check for sufficient length for trail unit for each surrogate pair.
320
* Handle single surrogates as surrogate code points as usual in ICU.
321
*/
322
const char16_t *
323
BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const {
324
char16_t c, c2;
325
326
if(spanCondition) {
327
// span
328
do {
329
c=*s;
330
if(c<=0xff) {
331
if(!latin1Contains[c]) {
332
break;
333
}
334
} else if(c<=0x7ff) {
335
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
336
break;
337
}
338
} else if(c<0xd800 || c>=0xe000) {
339
int lead=c>>12;
340
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
341
if(twoBits<=1) {
342
// All 64 code points with the same bits 15..6
343
// are either in the set or not.
344
if(twoBits==0) {
345
break;
346
}
347
} else {
348
// Look up the code point in its 4k block of code points.
349
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
350
break;
351
}
352
}
353
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
354
// surrogate code point
355
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
356
break;
357
}
358
} else {
359
// surrogate pair
360
if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
361
break;
362
}
363
++s;
364
}
365
} while(++s<limit);
366
} else {
367
// span not
368
do {
369
c=*s;
370
if(c<=0xff) {
371
if(latin1Contains[c]) {
372
break;
373
}
374
} else if(c<=0x7ff) {
375
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
376
break;
377
}
378
} else if(c<0xd800 || c>=0xe000) {
379
int lead=c>>12;
380
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
381
if(twoBits<=1) {
382
// All 64 code points with the same bits 15..6
383
// are either in the set or not.
384
if(twoBits!=0) {
385
break;
386
}
387
} else {
388
// Look up the code point in its 4k block of code points.
389
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
390
break;
391
}
392
}
393
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
394
// surrogate code point
395
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
396
break;
397
}
398
} else {
399
// surrogate pair
400
if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
401
break;
402
}
403
++s;
404
}
405
} while(++s<limit);
406
}
407
return s;
408
}
409
410
/* Symmetrical with span(). */
411
const char16_t *
412
BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const {
413
char16_t c, c2;
414
415
if(spanCondition) {
416
// span
417
for(;;) {
418
c=*(--limit);
419
if(c<=0xff) {
420
if(!latin1Contains[c]) {
421
break;
422
}
423
} else if(c<=0x7ff) {
424
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
425
break;
426
}
427
} else if(c<0xd800 || c>=0xe000) {
428
int lead=c>>12;
429
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
430
if(twoBits<=1) {
431
// All 64 code points with the same bits 15..6
432
// are either in the set or not.
433
if(twoBits==0) {
434
break;
435
}
436
} else {
437
// Look up the code point in its 4k block of code points.
438
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
439
break;
440
}
441
}
442
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
443
// surrogate code point
444
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
445
break;
446
}
447
} else {
448
// surrogate pair
449
if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
450
break;
451
}
452
--limit;
453
}
454
if(s==limit) {
455
return s;
456
}
457
}
458
} else {
459
// span not
460
for(;;) {
461
c=*(--limit);
462
if(c<=0xff) {
463
if(latin1Contains[c]) {
464
break;
465
}
466
} else if(c<=0x7ff) {
467
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
468
break;
469
}
470
} else if(c<0xd800 || c>=0xe000) {
471
int lead=c>>12;
472
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
473
if(twoBits<=1) {
474
// All 64 code points with the same bits 15..6
475
// are either in the set or not.
476
if(twoBits!=0) {
477
break;
478
}
479
} else {
480
// Look up the code point in its 4k block of code points.
481
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
482
break;
483
}
484
}
485
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
486
// surrogate code point
487
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
488
break;
489
}
490
} else {
491
// surrogate pair
492
if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
493
break;
494
}
495
--limit;
496
}
497
if(s==limit) {
498
return s;
499
}
500
}
501
}
502
return limit+1;
503
}
504
505
/*
506
* Precheck for sufficient trail bytes at end of string only once per span.
507
* Check validity.
508
*/
509
const uint8_t *
510
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
511
const uint8_t *limit=s+length;
512
uint8_t b=*s;
513
if(U8_IS_SINGLE(b)) {
514
// Initial all-ASCII span.
515
if(spanCondition) {
516
do {
517
if(!latin1Contains[b] || ++s==limit) {
518
return s;
519
}
520
b=*s;
521
} while(U8_IS_SINGLE(b));
522
} else {
523
do {
524
if(latin1Contains[b] || ++s==limit) {
525
return s;
526
}
527
b=*s;
528
} while(U8_IS_SINGLE(b));
529
}
530
length = static_cast<int32_t>(limit - s);
531
}
532
533
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
534
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
535
}
536
537
const uint8_t *limit0=limit;
538
539
/*
540
* Make sure that the last 1/2/3/4-byte sequence before limit is complete
541
* or runs into a lead byte.
542
* In the span loop compare s with limit only once
543
* per multi-byte character.
544
*
545
* Give a trailing illegal sequence the same value as the result of contains(FFFD),
546
* including it if that is part of the span, otherwise set limit0 to before
547
* the truncated sequence.
548
*/
549
b=*(limit-1);
550
if (static_cast<int8_t>(b) < 0) {
551
// b>=0x80: lead or trail byte
552
if(b<0xc0) {
553
// single trail byte, check for preceding 3- or 4-byte lead byte
554
if(length>=2 && (b=*(limit-2))>=0xe0) {
555
limit-=2;
556
if(containsFFFD!=spanCondition) {
557
limit0=limit;
558
}
559
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
560
// 4-byte lead byte with only two trail bytes
561
limit-=3;
562
if(containsFFFD!=spanCondition) {
563
limit0=limit;
564
}
565
}
566
} else {
567
// lead byte with no trail bytes
568
--limit;
569
if(containsFFFD!=spanCondition) {
570
limit0=limit;
571
}
572
}
573
}
574
575
uint8_t t1, t2, t3;
576
577
while(s<limit) {
578
b=*s;
579
if(U8_IS_SINGLE(b)) {
580
// ASCII
581
if(spanCondition) {
582
do {
583
if(!latin1Contains[b]) {
584
return s;
585
} else if(++s==limit) {
586
return limit0;
587
}
588
b=*s;
589
} while(U8_IS_SINGLE(b));
590
} else {
591
do {
592
if(latin1Contains[b]) {
593
return s;
594
} else if(++s==limit) {
595
return limit0;
596
}
597
b=*s;
598
} while(U8_IS_SINGLE(b));
599
}
600
}
601
++s; // Advance past the lead byte.
602
if(b>=0xe0) {
603
if(b<0xf0) {
604
if( /* handle U+0000..U+FFFF inline */
605
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
606
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f
607
) {
608
b&=0xf;
609
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
610
if(twoBits<=1) {
611
// All 64 code points with this lead byte and middle trail byte
612
// are either in the set or not.
613
if (twoBits != static_cast<uint32_t>(spanCondition)) {
614
return s-1;
615
}
616
} else {
617
// Look up the code point in its 4k block of code points.
618
UChar32 c=(b<<12)|(t1<<6)|t2;
619
if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
620
return s-1;
621
}
622
}
623
s+=2;
624
continue;
625
}
626
} else if( /* handle U+10000..U+10FFFF inline */
627
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
628
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f &&
629
(t3 = static_cast<uint8_t>(s[2] - 0x80)) <= 0x3f
630
) {
631
// Give an illegal sequence the same value as the result of contains(FFFD).
632
UChar32 c = (static_cast<UChar32>(b - 0xf0) << 18) | (static_cast<UChar32>(t1) << 12) | (t2 << 6) | t3;
633
if( ( (0x10000<=c && c<=0x10ffff) ?
634
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
635
containsFFFD
636
) != spanCondition
637
) {
638
return s-1;
639
}
640
s+=3;
641
continue;
642
}
643
} else {
644
if( /* handle U+0000..U+07FF inline */
645
b>=0xc0 &&
646
(t1 = static_cast<uint8_t>(*s - 0x80)) <= 0x3f
647
) {
648
if (static_cast<USetSpanCondition>((table7FF[t1] & (static_cast<uint32_t>(1) << (b & 0x1f))) != 0) != spanCondition) {
649
return s-1;
650
}
651
++s;
652
continue;
653
}
654
}
655
656
// Give an illegal sequence the same value as the result of contains(FFFD).
657
// Handle each byte of an illegal sequence separately to simplify the code;
658
// no need to optimize error handling.
659
if(containsFFFD!=spanCondition) {
660
return s-1;
661
}
662
}
663
664
return limit0;
665
}
666
667
/*
668
* While going backwards through UTF-8 optimize only for ASCII.
669
* Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
670
* possible to tell from the last byte in a multi-byte sequence how many
671
* preceding bytes there should be. Therefore, going backwards through UTF-8
672
* is much harder than going forward.
673
*/
674
int32_t
675
BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
676
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
677
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
678
}
679
680
uint8_t b;
681
682
do {
683
b=s[--length];
684
if(U8_IS_SINGLE(b)) {
685
// ASCII sub-span
686
if(spanCondition) {
687
do {
688
if(!latin1Contains[b]) {
689
return length+1;
690
} else if(length==0) {
691
return 0;
692
}
693
b=s[--length];
694
} while(U8_IS_SINGLE(b));
695
} else {
696
do {
697
if(latin1Contains[b]) {
698
return length+1;
699
} else if(length==0) {
700
return 0;
701
}
702
b=s[--length];
703
} while(U8_IS_SINGLE(b));
704
}
705
}
706
707
int32_t prev=length;
708
UChar32 c;
709
// trail byte: collect a multi-byte character
710
// (or lead byte in last-trail position)
711
c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
712
// c is a valid code point, not ASCII, not a surrogate
713
if(c<=0x7ff) {
714
if (static_cast<USetSpanCondition>((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) != spanCondition) {
715
return prev+1;
716
}
717
} else if(c<=0xffff) {
718
int lead=c>>12;
719
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
720
if(twoBits<=1) {
721
// All 64 code points with the same bits 15..6
722
// are either in the set or not.
723
if (twoBits != static_cast<uint32_t>(spanCondition)) {
724
return prev+1;
725
}
726
} else {
727
// Look up the code point in its 4k block of code points.
728
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
729
return prev+1;
730
}
731
}
732
} else {
733
if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
734
return prev+1;
735
}
736
}
737
} while(length>0);
738
return 0;
739
}
740
741
U_NAMESPACE_END
742
743