Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/icui18n/collationcompare.cpp
12343 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1996-2015, International Business Machines
6
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
8
* collationcompare.cpp
9
*
10
* created on: 2012feb14 with new and old collation code
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_COLLATION
17
18
#include "unicode/ucol.h"
19
#include "cmemory.h"
20
#include "collation.h"
21
#include "collationcompare.h"
22
#include "collationiterator.h"
23
#include "collationsettings.h"
24
#include "uassert.h"
25
26
U_NAMESPACE_BEGIN
27
28
UCollationResult
29
CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterator &right,
30
const CollationSettings &settings,
31
UErrorCode &errorCode) {
32
if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
33
34
int32_t options = settings.options;
35
uint32_t variableTop;
36
if((options & CollationSettings::ALTERNATE_MASK) == 0) {
37
variableTop = 0;
38
} else {
39
// +1 so that we can use "<" and primary ignorables test out early.
40
variableTop = settings.variableTop + 1;
41
}
42
UBool anyVariable = false;
43
44
// Fetch CEs, compare primaries, store secondary & tertiary weights.
45
for(;;) {
46
// We fetch CEs until we get a non-ignorable primary or reach the end.
47
uint32_t leftPrimary;
48
do {
49
int64_t ce = left.nextCE(errorCode);
50
leftPrimary = (uint32_t)(ce >> 32);
51
if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
52
// Variable CE, shift it to quaternary level.
53
// Ignore all following primary ignorables, and shift further variable CEs.
54
anyVariable = true;
55
do {
56
// Store only the primary of the variable CE.
57
left.setCurrentCE(ce & INT64_C(0xffffffff00000000));
58
for(;;) {
59
ce = left.nextCE(errorCode);
60
leftPrimary = (uint32_t)(ce >> 32);
61
if(leftPrimary == 0) {
62
left.setCurrentCE(0);
63
} else {
64
break;
65
}
66
}
67
} while(leftPrimary < variableTop &&
68
leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
69
}
70
} while(leftPrimary == 0);
71
72
uint32_t rightPrimary;
73
do {
74
int64_t ce = right.nextCE(errorCode);
75
rightPrimary = (uint32_t)(ce >> 32);
76
if(rightPrimary < variableTop && rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
77
// Variable CE, shift it to quaternary level.
78
// Ignore all following primary ignorables, and shift further variable CEs.
79
anyVariable = true;
80
do {
81
// Store only the primary of the variable CE.
82
right.setCurrentCE(ce & INT64_C(0xffffffff00000000));
83
for(;;) {
84
ce = right.nextCE(errorCode);
85
rightPrimary = (uint32_t)(ce >> 32);
86
if(rightPrimary == 0) {
87
right.setCurrentCE(0);
88
} else {
89
break;
90
}
91
}
92
} while(rightPrimary < variableTop &&
93
rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
94
}
95
} while(rightPrimary == 0);
96
97
if(leftPrimary != rightPrimary) {
98
// Return the primary difference, with script reordering.
99
if(settings.hasReordering()) {
100
leftPrimary = settings.reorder(leftPrimary);
101
rightPrimary = settings.reorder(rightPrimary);
102
}
103
return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
104
}
105
if(leftPrimary == Collation::NO_CE_PRIMARY) { break; }
106
}
107
if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
108
109
// Compare the buffered secondary & tertiary weights.
110
// We might skip the secondary level but continue with the case level
111
// which is turned on separately.
112
if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
113
if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
114
int32_t leftIndex = 0;
115
int32_t rightIndex = 0;
116
for(;;) {
117
uint32_t leftSecondary;
118
do {
119
leftSecondary = ((uint32_t)left.getCE(leftIndex++)) >> 16;
120
} while(leftSecondary == 0);
121
122
uint32_t rightSecondary;
123
do {
124
rightSecondary = ((uint32_t)right.getCE(rightIndex++)) >> 16;
125
} while(rightSecondary == 0);
126
127
if(leftSecondary != rightSecondary) {
128
return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
129
}
130
if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; }
131
}
132
} else {
133
// The backwards secondary level compares secondary weights backwards
134
// within segments separated by the merge separator (U+FFFE, weight 02).
135
int32_t leftStart = 0;
136
int32_t rightStart = 0;
137
for(;;) {
138
// Find the merge separator or the NO_CE terminator.
139
uint32_t p;
140
int32_t leftLimit = leftStart;
141
while((p = (uint32_t)(left.getCE(leftLimit) >> 32)) >
142
Collation::MERGE_SEPARATOR_PRIMARY ||
143
p == 0) {
144
++leftLimit;
145
}
146
int32_t rightLimit = rightStart;
147
while((p = (uint32_t)(right.getCE(rightLimit) >> 32)) >
148
Collation::MERGE_SEPARATOR_PRIMARY ||
149
p == 0) {
150
++rightLimit;
151
}
152
153
// Compare the segments.
154
int32_t leftIndex = leftLimit;
155
int32_t rightIndex = rightLimit;
156
for(;;) {
157
int32_t leftSecondary = 0;
158
while(leftSecondary == 0 && leftIndex > leftStart) {
159
leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 16;
160
}
161
162
int32_t rightSecondary = 0;
163
while(rightSecondary == 0 && rightIndex > rightStart) {
164
rightSecondary = ((uint32_t)right.getCE(--rightIndex)) >> 16;
165
}
166
167
if(leftSecondary != rightSecondary) {
168
return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
169
}
170
if(leftSecondary == 0) { break; }
171
}
172
173
// Did we reach the end of either string?
174
// Both strings have the same number of merge separators,
175
// or else there would have been a primary-level difference.
176
U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
177
if(p == Collation::NO_CE_PRIMARY) { break; }
178
// Skip both merge separators and continue.
179
leftStart = leftLimit + 1;
180
rightStart = rightLimit + 1;
181
}
182
}
183
}
184
185
if((options & CollationSettings::CASE_LEVEL) != 0) {
186
int32_t strength = CollationSettings::getStrength(options);
187
int32_t leftIndex = 0;
188
int32_t rightIndex = 0;
189
for(;;) {
190
uint32_t leftCase, leftLower32, rightCase;
191
if(strength == UCOL_PRIMARY) {
192
// Primary+caseLevel: Ignore case level weights of primary ignorables.
193
// Otherwise we would get a-umlaut > a
194
// which is not desirable for accent-insensitive sorting.
195
// Check for (lower 32 bits) == 0 as well because variable CEs are stored
196
// with only primary weights.
197
int64_t ce;
198
do {
199
ce = left.getCE(leftIndex++);
200
leftCase = (uint32_t)ce;
201
} while((uint32_t)(ce >> 32) == 0 || leftCase == 0);
202
leftLower32 = leftCase;
203
leftCase &= 0xc000;
204
205
do {
206
ce = right.getCE(rightIndex++);
207
rightCase = (uint32_t)ce;
208
} while((uint32_t)(ce >> 32) == 0 || rightCase == 0);
209
rightCase &= 0xc000;
210
} else {
211
// Secondary+caseLevel: By analogy with the above,
212
// ignore case level weights of secondary ignorables.
213
//
214
// Note: A tertiary CE has uppercase case bits (0.0.ut)
215
// to keep tertiary+caseFirst well-formed.
216
//
217
// Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
218
// Otherwise a tertiary CE's uppercase would be no greater than
219
// a primary/secondary CE's uppercase.
220
// (See UCA well-formedness condition 2.)
221
// We could construct a special case weight higher than uppercase,
222
// but it's simpler to always ignore case weights of secondary ignorables,
223
// turning 0.0.ut into 0.0.0.t.
224
// (See LDML Collation, Case Parameters.)
225
do {
226
leftCase = (uint32_t)left.getCE(leftIndex++);
227
} while(leftCase <= 0xffff);
228
leftLower32 = leftCase;
229
leftCase &= 0xc000;
230
231
do {
232
rightCase = (uint32_t)right.getCE(rightIndex++);
233
} while(rightCase <= 0xffff);
234
rightCase &= 0xc000;
235
}
236
237
// No need to handle NO_CE and MERGE_SEPARATOR specially:
238
// There is one case weight for each previous-level weight,
239
// so level length differences were handled there.
240
if(leftCase != rightCase) {
241
if((options & CollationSettings::UPPER_FIRST) == 0) {
242
return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
243
} else {
244
return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
245
}
246
}
247
if((leftLower32 >> 16) == Collation::NO_CE_WEIGHT16) { break; }
248
}
249
}
250
if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
251
252
uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
253
254
int32_t leftIndex = 0;
255
int32_t rightIndex = 0;
256
uint32_t anyQuaternaries = 0;
257
for(;;) {
258
uint32_t leftLower32, leftTertiary;
259
do {
260
leftLower32 = (uint32_t)left.getCE(leftIndex++);
261
anyQuaternaries |= leftLower32;
262
U_ASSERT((leftLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
263
(leftLower32 & 0xc0c0) == 0);
264
leftTertiary = leftLower32 & tertiaryMask;
265
} while(leftTertiary == 0);
266
267
uint32_t rightLower32, rightTertiary;
268
do {
269
rightLower32 = (uint32_t)right.getCE(rightIndex++);
270
anyQuaternaries |= rightLower32;
271
U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
272
(rightLower32 & 0xc0c0) == 0);
273
rightTertiary = rightLower32 & tertiaryMask;
274
} while(rightTertiary == 0);
275
276
if(leftTertiary != rightTertiary) {
277
if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
278
// Pass through NO_CE and keep real tertiary weights larger than that.
279
// Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
280
// to keep tertiary CEs well-formed.
281
// Their case+tertiary weights must be greater than those of
282
// primary and secondary CEs.
283
if(leftTertiary > Collation::NO_CE_WEIGHT16) {
284
if(leftLower32 > 0xffff) {
285
leftTertiary ^= 0xc000;
286
} else {
287
leftTertiary += 0x4000;
288
}
289
}
290
if(rightTertiary > Collation::NO_CE_WEIGHT16) {
291
if(rightLower32 > 0xffff) {
292
rightTertiary ^= 0xc000;
293
} else {
294
rightTertiary += 0x4000;
295
}
296
}
297
}
298
return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
299
}
300
if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; }
301
}
302
if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
303
304
if(!anyVariable && (anyQuaternaries & 0xc0) == 0) {
305
// If there are no "variable" CEs and no non-zero quaternary weights,
306
// then there are no quaternary differences.
307
return UCOL_EQUAL;
308
}
309
310
leftIndex = 0;
311
rightIndex = 0;
312
for(;;) {
313
uint32_t leftQuaternary;
314
do {
315
int64_t ce = left.getCE(leftIndex++);
316
leftQuaternary = (uint32_t)ce & 0xffff;
317
if(leftQuaternary <= Collation::NO_CE_WEIGHT16) {
318
// Variable primary or completely ignorable or NO_CE.
319
leftQuaternary = (uint32_t)(ce >> 32);
320
} else {
321
// Regular CE, not tertiary ignorable.
322
// Preserve the quaternary weight in bits 7..6.
323
leftQuaternary |= 0xffffff3f;
324
}
325
} while(leftQuaternary == 0);
326
327
uint32_t rightQuaternary;
328
do {
329
int64_t ce = right.getCE(rightIndex++);
330
rightQuaternary = (uint32_t)ce & 0xffff;
331
if(rightQuaternary <= Collation::NO_CE_WEIGHT16) {
332
// Variable primary or completely ignorable or NO_CE.
333
rightQuaternary = (uint32_t)(ce >> 32);
334
} else {
335
// Regular CE, not tertiary ignorable.
336
// Preserve the quaternary weight in bits 7..6.
337
rightQuaternary |= 0xffffff3f;
338
}
339
} while(rightQuaternary == 0);
340
341
if(leftQuaternary != rightQuaternary) {
342
// Return the difference, with script reordering.
343
if(settings.hasReordering()) {
344
leftQuaternary = settings.reorder(leftQuaternary);
345
rightQuaternary = settings.reorder(rightQuaternary);
346
}
347
return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
348
}
349
if(leftQuaternary == Collation::NO_CE_PRIMARY) { break; }
350
}
351
return UCOL_EQUAL;
352
}
353
354
U_NAMESPACE_END
355
356
#endif // !UCONFIG_NO_COLLATION
357
358