Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/normalizer2.h
38827 views
1
/*
2
*******************************************************************************
3
*
4
* Copyright (C) 2009-2013, International Business Machines
5
* Corporation and others. All Rights Reserved.
6
*
7
*******************************************************************************
8
* file name: normalizer2.h
9
* encoding: US-ASCII
10
* tab size: 8 (not used)
11
* indentation:4
12
*
13
* created on: 2009nov22
14
* created by: Markus W. Scherer
15
*/
16
17
#ifndef __NORMALIZER2_H__
18
#define __NORMALIZER2_H__
19
20
/**
21
* \file
22
* \brief C++ API: New API for Unicode Normalization.
23
*/
24
25
#include "unicode/utypes.h"
26
27
#if !UCONFIG_NO_NORMALIZATION
28
29
#include "unicode/uniset.h"
30
#include "unicode/unistr.h"
31
#include "unicode/unorm2.h"
32
33
U_NAMESPACE_BEGIN
34
35
/**
36
* Unicode normalization functionality for standard Unicode normalization or
37
* for using custom mapping tables.
38
* All instances of this class are unmodifiable/immutable.
39
* Instances returned by getInstance() are singletons that must not be deleted by the caller.
40
* The Normalizer2 class is not intended for public subclassing.
41
*
42
* The primary functions are to produce a normalized string and to detect whether
43
* a string is already normalized.
44
* The most commonly used normalization forms are those defined in
45
* http://www.unicode.org/unicode/reports/tr15/
46
* However, this API supports additional normalization forms for specialized purposes.
47
* For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
48
* and can be used in implementations of UTS #46.
49
*
50
* Not only are the standard compose and decompose modes supplied,
51
* but additional modes are provided as documented in the Mode enum.
52
*
53
* Some of the functions in this class identify normalization boundaries.
54
* At a normalization boundary, the portions of the string
55
* before it and starting from it do not interact and can be handled independently.
56
*
57
* The spanQuickCheckYes() stops at a normalization boundary.
58
* When the goal is a normalized string, then the text before the boundary
59
* can be copied, and the remainder can be processed with normalizeSecondAndAppend().
60
*
61
* The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
62
* a character is guaranteed to be at a normalization boundary,
63
* regardless of context.
64
* This is used for moving from one normalization boundary to the next
65
* or preceding boundary, and for performing iterative normalization.
66
*
67
* Iterative normalization is useful when only a small portion of a
68
* longer string needs to be processed.
69
* For example, in ICU, iterative normalization is used by the NormalizationTransliterator
70
* (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
71
* (to process only the substring for which sort key bytes are computed).
72
*
73
* The set of normalization boundaries returned by these functions may not be
74
* complete: There may be more boundaries that could be returned.
75
* Different functions may return different boundaries.
76
* @stable ICU 4.4
77
*/
78
class U_COMMON_API Normalizer2 : public UObject {
79
public:
80
/**
81
* Destructor.
82
* @stable ICU 4.4
83
*/
84
~Normalizer2();
85
86
/**
87
* Returns a Normalizer2 instance for Unicode NFC normalization.
88
* Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
89
* Returns an unmodifiable singleton instance. Do not delete it.
90
* @param errorCode Standard ICU error code. Its input value must
91
* pass the U_SUCCESS() test, or else the function returns
92
* immediately. Check for U_FAILURE() on output or use with
93
* function chaining. (See User Guide for details.)
94
* @return the requested Normalizer2, if successful
95
* @stable ICU 49
96
*/
97
static const Normalizer2 *
98
getNFCInstance(UErrorCode &errorCode);
99
100
/**
101
* Returns a Normalizer2 instance for Unicode NFD normalization.
102
* Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
103
* Returns an unmodifiable singleton instance. Do not delete it.
104
* @param errorCode Standard ICU error code. Its input value must
105
* pass the U_SUCCESS() test, or else the function returns
106
* immediately. Check for U_FAILURE() on output or use with
107
* function chaining. (See User Guide for details.)
108
* @return the requested Normalizer2, if successful
109
* @stable ICU 49
110
*/
111
static const Normalizer2 *
112
getNFDInstance(UErrorCode &errorCode);
113
114
/**
115
* Returns a Normalizer2 instance for Unicode NFKC normalization.
116
* Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
117
* Returns an unmodifiable singleton instance. Do not delete it.
118
* @param errorCode Standard ICU error code. Its input value must
119
* pass the U_SUCCESS() test, or else the function returns
120
* immediately. Check for U_FAILURE() on output or use with
121
* function chaining. (See User Guide for details.)
122
* @return the requested Normalizer2, if successful
123
* @stable ICU 49
124
*/
125
static const Normalizer2 *
126
getNFKCInstance(UErrorCode &errorCode);
127
128
/**
129
* Returns a Normalizer2 instance for Unicode NFKD normalization.
130
* Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
131
* Returns an unmodifiable singleton instance. Do not delete it.
132
* @param errorCode Standard ICU error code. Its input value must
133
* pass the U_SUCCESS() test, or else the function returns
134
* immediately. Check for U_FAILURE() on output or use with
135
* function chaining. (See User Guide for details.)
136
* @return the requested Normalizer2, if successful
137
* @stable ICU 49
138
*/
139
static const Normalizer2 *
140
getNFKDInstance(UErrorCode &errorCode);
141
142
/**
143
* Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
144
* Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
145
* Returns an unmodifiable singleton instance. Do not delete it.
146
* @param errorCode Standard ICU error code. Its input value must
147
* pass the U_SUCCESS() test, or else the function returns
148
* immediately. Check for U_FAILURE() on output or use with
149
* function chaining. (See User Guide for details.)
150
* @return the requested Normalizer2, if successful
151
* @stable ICU 49
152
*/
153
static const Normalizer2 *
154
getNFKCCasefoldInstance(UErrorCode &errorCode);
155
156
/**
157
* Returns a Normalizer2 instance which uses the specified data file
158
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
159
* and which composes or decomposes text according to the specified mode.
160
* Returns an unmodifiable singleton instance. Do not delete it.
161
*
162
* Use packageName=NULL for data files that are part of ICU's own data.
163
* Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
164
* Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
165
* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
166
*
167
* @param packageName NULL for ICU built-in data, otherwise application data package name
168
* @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
169
* @param mode normalization mode (compose or decompose etc.)
170
* @param errorCode Standard ICU error code. Its input value must
171
* pass the U_SUCCESS() test, or else the function returns
172
* immediately. Check for U_FAILURE() on output or use with
173
* function chaining. (See User Guide for details.)
174
* @return the requested Normalizer2, if successful
175
* @stable ICU 4.4
176
*/
177
static const Normalizer2 *
178
getInstance(const char *packageName,
179
const char *name,
180
UNormalization2Mode mode,
181
UErrorCode &errorCode);
182
183
/**
184
* Returns the normalized form of the source string.
185
* @param src source string
186
* @param errorCode Standard ICU error code. Its input value must
187
* pass the U_SUCCESS() test, or else the function returns
188
* immediately. Check for U_FAILURE() on output or use with
189
* function chaining. (See User Guide for details.)
190
* @return normalized src
191
* @stable ICU 4.4
192
*/
193
UnicodeString
194
normalize(const UnicodeString &src, UErrorCode &errorCode) const {
195
UnicodeString result;
196
normalize(src, result, errorCode);
197
return result;
198
}
199
/**
200
* Writes the normalized form of the source string to the destination string
201
* (replacing its contents) and returns the destination string.
202
* The source and destination strings must be different objects.
203
* @param src source string
204
* @param dest destination string; its contents is replaced with normalized src
205
* @param errorCode Standard ICU error code. Its input value must
206
* pass the U_SUCCESS() test, or else the function returns
207
* immediately. Check for U_FAILURE() on output or use with
208
* function chaining. (See User Guide for details.)
209
* @return dest
210
* @stable ICU 4.4
211
*/
212
virtual UnicodeString &
213
normalize(const UnicodeString &src,
214
UnicodeString &dest,
215
UErrorCode &errorCode) const = 0;
216
/**
217
* Appends the normalized form of the second string to the first string
218
* (merging them at the boundary) and returns the first string.
219
* The result is normalized if the first string was normalized.
220
* The first and second strings must be different objects.
221
* @param first string, should be normalized
222
* @param second string, will be normalized
223
* @param errorCode Standard ICU error code. Its input value must
224
* pass the U_SUCCESS() test, or else the function returns
225
* immediately. Check for U_FAILURE() on output or use with
226
* function chaining. (See User Guide for details.)
227
* @return first
228
* @stable ICU 4.4
229
*/
230
virtual UnicodeString &
231
normalizeSecondAndAppend(UnicodeString &first,
232
const UnicodeString &second,
233
UErrorCode &errorCode) const = 0;
234
/**
235
* Appends the second string to the first string
236
* (merging them at the boundary) and returns the first string.
237
* The result is normalized if both the strings were normalized.
238
* The first and second strings must be different objects.
239
* @param first string, should be normalized
240
* @param second string, should be normalized
241
* @param errorCode Standard ICU error code. Its input value must
242
* pass the U_SUCCESS() test, or else the function returns
243
* immediately. Check for U_FAILURE() on output or use with
244
* function chaining. (See User Guide for details.)
245
* @return first
246
* @stable ICU 4.4
247
*/
248
virtual UnicodeString &
249
append(UnicodeString &first,
250
const UnicodeString &second,
251
UErrorCode &errorCode) const = 0;
252
253
/**
254
* Gets the decomposition mapping of c.
255
* Roughly equivalent to normalizing the String form of c
256
* on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
257
* returns FALSE and does not write a string
258
* if c does not have a decomposition mapping in this instance's data.
259
* This function is independent of the mode of the Normalizer2.
260
* @param c code point
261
* @param decomposition String object which will be set to c's
262
* decomposition mapping, if there is one.
263
* @return TRUE if c has a decomposition, otherwise FALSE
264
* @stable ICU 4.6
265
*/
266
virtual UBool
267
getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
268
269
/**
270
* Gets the raw decomposition mapping of c.
271
*
272
* This is similar to the getDecomposition() method but returns the
273
* raw decomposition mapping as specified in UnicodeData.txt or
274
* (for custom data) in the mapping files processed by the gennorm2 tool.
275
* By contrast, getDecomposition() returns the processed,
276
* recursively-decomposed version of this mapping.
277
*
278
* When used on a standard NFKC Normalizer2 instance,
279
* getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
280
*
281
* When used on a standard NFC Normalizer2 instance,
282
* it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
283
* in this case, the result contains either one or two code points (=1..4 UChars).
284
*
285
* This function is independent of the mode of the Normalizer2.
286
* The default implementation returns FALSE.
287
* @param c code point
288
* @param decomposition String object which will be set to c's
289
* raw decomposition mapping, if there is one.
290
* @return TRUE if c has a decomposition, otherwise FALSE
291
* @stable ICU 49
292
*/
293
virtual UBool
294
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
295
296
/**
297
* Performs pairwise composition of a & b and returns the composite if there is one.
298
*
299
* Returns a composite code point c only if c has a two-way mapping to a+b.
300
* In standard Unicode normalization, this means that
301
* c has a canonical decomposition to a+b
302
* and c does not have the Full_Composition_Exclusion property.
303
*
304
* This function is independent of the mode of the Normalizer2.
305
* The default implementation returns a negative value.
306
* @param a A (normalization starter) code point.
307
* @param b Another code point.
308
* @return The non-negative composite code point if there is one; otherwise a negative value.
309
* @stable ICU 49
310
*/
311
virtual UChar32
312
composePair(UChar32 a, UChar32 b) const;
313
314
/**
315
* Gets the combining class of c.
316
* The default implementation returns 0
317
* but all standard implementations return the Unicode Canonical_Combining_Class value.
318
* @param c code point
319
* @return c's combining class
320
* @stable ICU 49
321
*/
322
virtual uint8_t
323
getCombiningClass(UChar32 c) const;
324
325
/**
326
* Tests if the string is normalized.
327
* Internally, in cases where the quickCheck() method would return "maybe"
328
* (which is only possible for the two COMPOSE modes) this method
329
* resolves to "yes" or "no" to provide a definitive result,
330
* at the cost of doing more work in those cases.
331
* @param s input string
332
* @param errorCode Standard ICU error code. Its input value must
333
* pass the U_SUCCESS() test, or else the function returns
334
* immediately. Check for U_FAILURE() on output or use with
335
* function chaining. (See User Guide for details.)
336
* @return TRUE if s is normalized
337
* @stable ICU 4.4
338
*/
339
virtual UBool
340
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
341
342
/**
343
* Tests if the string is normalized.
344
* For the two COMPOSE modes, the result could be "maybe" in cases that
345
* would take a little more work to resolve definitively.
346
* Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
347
* combination of quick check + normalization, to avoid
348
* re-checking the "yes" prefix.
349
* @param s input string
350
* @param errorCode Standard ICU error code. Its input value must
351
* pass the U_SUCCESS() test, or else the function returns
352
* immediately. Check for U_FAILURE() on output or use with
353
* function chaining. (See User Guide for details.)
354
* @return UNormalizationCheckResult
355
* @stable ICU 4.4
356
*/
357
virtual UNormalizationCheckResult
358
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
359
360
/**
361
* Returns the end of the normalized substring of the input string.
362
* In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
363
* the substring <code>UnicodeString(s, 0, end)</code>
364
* will pass the quick check with a "yes" result.
365
*
366
* The returned end index is usually one or more characters before the
367
* "no" or "maybe" character: The end index is at a normalization boundary.
368
* (See the class documentation for more about normalization boundaries.)
369
*
370
* When the goal is a normalized string and most input strings are expected
371
* to be normalized already, then call this method,
372
* and if it returns a prefix shorter than the input string,
373
* copy that prefix and use normalizeSecondAndAppend() for the remainder.
374
* @param s input string
375
* @param errorCode Standard ICU error code. Its input value must
376
* pass the U_SUCCESS() test, or else the function returns
377
* immediately. Check for U_FAILURE() on output or use with
378
* function chaining. (See User Guide for details.)
379
* @return "yes" span end index
380
* @stable ICU 4.4
381
*/
382
virtual int32_t
383
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
384
385
/**
386
* Tests if the character always has a normalization boundary before it,
387
* regardless of context.
388
* If true, then the character does not normalization-interact with
389
* preceding characters.
390
* In other words, a string containing this character can be normalized
391
* by processing portions before this character and starting from this
392
* character independently.
393
* This is used for iterative normalization. See the class documentation for details.
394
* @param c character to test
395
* @return TRUE if c has a normalization boundary before it
396
* @stable ICU 4.4
397
*/
398
virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
399
400
/**
401
* Tests if the character always has a normalization boundary after it,
402
* regardless of context.
403
* If true, then the character does not normalization-interact with
404
* following characters.
405
* In other words, a string containing this character can be normalized
406
* by processing portions up to this character and after this
407
* character independently.
408
* This is used for iterative normalization. See the class documentation for details.
409
* Note that this operation may be significantly slower than hasBoundaryBefore().
410
* @param c character to test
411
* @return TRUE if c has a normalization boundary after it
412
* @stable ICU 4.4
413
*/
414
virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
415
416
/**
417
* Tests if the character is normalization-inert.
418
* If true, then the character does not change, nor normalization-interact with
419
* preceding or following characters.
420
* In other words, a string containing this character can be normalized
421
* by processing portions before this character and after this
422
* character independently.
423
* This is used for iterative normalization. See the class documentation for details.
424
* Note that this operation may be significantly slower than hasBoundaryBefore().
425
* @param c character to test
426
* @return TRUE if c is normalization-inert
427
* @stable ICU 4.4
428
*/
429
virtual UBool isInert(UChar32 c) const = 0;
430
};
431
432
/**
433
* Normalization filtered by a UnicodeSet.
434
* Normalizes portions of the text contained in the filter set and leaves
435
* portions not contained in the filter set unchanged.
436
* Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
437
* Not-in-the-filter text is treated as "is normalized" and "quick check yes".
438
* This class implements all of (and only) the Normalizer2 API.
439
* An instance of this class is unmodifiable/immutable but is constructed and
440
* must be destructed by the owner.
441
* @stable ICU 4.4
442
*/
443
class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
444
public:
445
/**
446
* Constructs a filtered normalizer wrapping any Normalizer2 instance
447
* and a filter set.
448
* Both are aliased and must not be modified or deleted while this object
449
* is used.
450
* The filter set should be frozen; otherwise the performance will suffer greatly.
451
* @param n2 wrapped Normalizer2 instance
452
* @param filterSet UnicodeSet which determines the characters to be normalized
453
* @stable ICU 4.4
454
*/
455
FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
456
norm2(n2), set(filterSet) {}
457
458
/**
459
* Destructor.
460
* @stable ICU 4.4
461
*/
462
~FilteredNormalizer2();
463
464
/**
465
* Writes the normalized form of the source string to the destination string
466
* (replacing its contents) and returns the destination string.
467
* The source and destination strings must be different objects.
468
* @param src source string
469
* @param dest destination string; its contents is replaced with normalized src
470
* @param errorCode Standard ICU error code. Its input value must
471
* pass the U_SUCCESS() test, or else the function returns
472
* immediately. Check for U_FAILURE() on output or use with
473
* function chaining. (See User Guide for details.)
474
* @return dest
475
* @stable ICU 4.4
476
*/
477
virtual UnicodeString &
478
normalize(const UnicodeString &src,
479
UnicodeString &dest,
480
UErrorCode &errorCode) const;
481
/**
482
* Appends the normalized form of the second string to the first string
483
* (merging them at the boundary) and returns the first string.
484
* The result is normalized if the first string was normalized.
485
* The first and second strings must be different objects.
486
* @param first string, should be normalized
487
* @param second string, will be normalized
488
* @param errorCode Standard ICU error code. Its input value must
489
* pass the U_SUCCESS() test, or else the function returns
490
* immediately. Check for U_FAILURE() on output or use with
491
* function chaining. (See User Guide for details.)
492
* @return first
493
* @stable ICU 4.4
494
*/
495
virtual UnicodeString &
496
normalizeSecondAndAppend(UnicodeString &first,
497
const UnicodeString &second,
498
UErrorCode &errorCode) const;
499
/**
500
* Appends the second string to the first string
501
* (merging them at the boundary) and returns the first string.
502
* The result is normalized if both the strings were normalized.
503
* The first and second strings must be different objects.
504
* @param first string, should be normalized
505
* @param second string, should be normalized
506
* @param errorCode Standard ICU error code. Its input value must
507
* pass the U_SUCCESS() test, or else the function returns
508
* immediately. Check for U_FAILURE() on output or use with
509
* function chaining. (See User Guide for details.)
510
* @return first
511
* @stable ICU 4.4
512
*/
513
virtual UnicodeString &
514
append(UnicodeString &first,
515
const UnicodeString &second,
516
UErrorCode &errorCode) const;
517
518
/**
519
* Gets the decomposition mapping of c.
520
* For details see the base class documentation.
521
*
522
* This function is independent of the mode of the Normalizer2.
523
* @param c code point
524
* @param decomposition String object which will be set to c's
525
* decomposition mapping, if there is one.
526
* @return TRUE if c has a decomposition, otherwise FALSE
527
* @stable ICU 4.6
528
*/
529
virtual UBool
530
getDecomposition(UChar32 c, UnicodeString &decomposition) const;
531
532
/**
533
* Gets the raw decomposition mapping of c.
534
* For details see the base class documentation.
535
*
536
* This function is independent of the mode of the Normalizer2.
537
* @param c code point
538
* @param decomposition String object which will be set to c's
539
* raw decomposition mapping, if there is one.
540
* @return TRUE if c has a decomposition, otherwise FALSE
541
* @stable ICU 49
542
*/
543
virtual UBool
544
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
545
546
/**
547
* Performs pairwise composition of a & b and returns the composite if there is one.
548
* For details see the base class documentation.
549
*
550
* This function is independent of the mode of the Normalizer2.
551
* @param a A (normalization starter) code point.
552
* @param b Another code point.
553
* @return The non-negative composite code point if there is one; otherwise a negative value.
554
* @stable ICU 49
555
*/
556
virtual UChar32
557
composePair(UChar32 a, UChar32 b) const;
558
559
/**
560
* Gets the combining class of c.
561
* The default implementation returns 0
562
* but all standard implementations return the Unicode Canonical_Combining_Class value.
563
* @param c code point
564
* @return c's combining class
565
* @stable ICU 49
566
*/
567
virtual uint8_t
568
getCombiningClass(UChar32 c) const;
569
570
/**
571
* Tests if the string is normalized.
572
* For details see the Normalizer2 base class documentation.
573
* @param s input string
574
* @param errorCode Standard ICU error code. Its input value must
575
* pass the U_SUCCESS() test, or else the function returns
576
* immediately. Check for U_FAILURE() on output or use with
577
* function chaining. (See User Guide for details.)
578
* @return TRUE if s is normalized
579
* @stable ICU 4.4
580
*/
581
virtual UBool
582
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
583
/**
584
* Tests if the string is normalized.
585
* For details see the Normalizer2 base class documentation.
586
* @param s input string
587
* @param errorCode Standard ICU error code. Its input value must
588
* pass the U_SUCCESS() test, or else the function returns
589
* immediately. Check for U_FAILURE() on output or use with
590
* function chaining. (See User Guide for details.)
591
* @return UNormalizationCheckResult
592
* @stable ICU 4.4
593
*/
594
virtual UNormalizationCheckResult
595
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
596
/**
597
* Returns the end of the normalized substring of the input string.
598
* For details see the Normalizer2 base class documentation.
599
* @param s input string
600
* @param errorCode Standard ICU error code. Its input value must
601
* pass the U_SUCCESS() test, or else the function returns
602
* immediately. Check for U_FAILURE() on output or use with
603
* function chaining. (See User Guide for details.)
604
* @return "yes" span end index
605
* @stable ICU 4.4
606
*/
607
virtual int32_t
608
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
609
610
/**
611
* Tests if the character always has a normalization boundary before it,
612
* regardless of context.
613
* For details see the Normalizer2 base class documentation.
614
* @param c character to test
615
* @return TRUE if c has a normalization boundary before it
616
* @stable ICU 4.4
617
*/
618
virtual UBool hasBoundaryBefore(UChar32 c) const;
619
620
/**
621
* Tests if the character always has a normalization boundary after it,
622
* regardless of context.
623
* For details see the Normalizer2 base class documentation.
624
* @param c character to test
625
* @return TRUE if c has a normalization boundary after it
626
* @stable ICU 4.4
627
*/
628
virtual UBool hasBoundaryAfter(UChar32 c) const;
629
630
/**
631
* Tests if the character is normalization-inert.
632
* For details see the Normalizer2 base class documentation.
633
* @param c character to test
634
* @return TRUE if c is normalization-inert
635
* @stable ICU 4.4
636
*/
637
virtual UBool isInert(UChar32 c) const;
638
private:
639
UnicodeString &
640
normalize(const UnicodeString &src,
641
UnicodeString &dest,
642
USetSpanCondition spanCondition,
643
UErrorCode &errorCode) const;
644
645
UnicodeString &
646
normalizeSecondAndAppend(UnicodeString &first,
647
const UnicodeString &second,
648
UBool doNormalize,
649
UErrorCode &errorCode) const;
650
651
const Normalizer2 &norm2;
652
const UnicodeSet &set;
653
};
654
655
U_NAMESPACE_END
656
657
#endif // !UCONFIG_NO_NORMALIZATION
658
#endif // __NORMALIZER2_H__
659
660