CoCalc -- normalizer2.h

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/normalizer2.h
⁴⁸⁰⁰⁶ views
1
/*
2
*******************************************************************************
3
*
4
*   Copyright (C) 2009-2013, International Business Machines
5
*   Corporation and others.  All Rights Reserved.
6
*
7
*******************************************************************************
8
*   file name:  normalizer2.h
9
*   encoding:   US-ASCII
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2009nov22
14
*   created by: Markus W. Scherer
15
*/
16

17
#ifndef __NORMALIZER2_H__
18
#define __NORMALIZER2_H__
19

20
/**
21
 * \file
22
 * \brief C++ API: New API for Unicode Normalization.
23
 */
24

25
#include "unicode/utypes.h"
26

27
#if !UCONFIG_NO_NORMALIZATION
28

29
#include "unicode/uniset.h"
30
#include "unicode/unistr.h"
31
#include "unicode/unorm2.h"
32

33
U_NAMESPACE_BEGIN
34

35
/**
36
 * Unicode normalization functionality for standard Unicode normalization or
37
 * for using custom mapping tables.
38
 * All instances of this class are unmodifiable/immutable.
39
 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
40
 * The Normalizer2 class is not intended for public subclassing.
41
 *
42
 * The primary functions are to produce a normalized string and to detect whether
43
 * a string is already normalized.
44
 * The most commonly used normalization forms are those defined in
45
 * http://www.unicode.org/unicode/reports/tr15/
46
 * However, this API supports additional normalization forms for specialized purposes.
47
 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
48
 * and can be used in implementations of UTS #46.
49
 *
50
 * Not only are the standard compose and decompose modes supplied,
51
 * but additional modes are provided as documented in the Mode enum.
52
 *
53
 * Some of the functions in this class identify normalization boundaries.
54
 * At a normalization boundary, the portions of the string
55
 * before it and starting from it do not interact and can be handled independently.
56
 *
57
 * The spanQuickCheckYes() stops at a normalization boundary.
58
 * When the goal is a normalized string, then the text before the boundary
59
 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
60
 *
61
 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
62
 * a character is guaranteed to be at a normalization boundary,
63
 * regardless of context.
64
 * This is used for moving from one normalization boundary to the next
65
 * or preceding boundary, and for performing iterative normalization.
66
 *
67
 * Iterative normalization is useful when only a small portion of a
68
 * longer string needs to be processed.
69
 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
70
 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
71
 * (to process only the substring for which sort key bytes are computed).
72
 *
73
 * The set of normalization boundaries returned by these functions may not be
74
 * complete: There may be more boundaries that could be returned.
75
 * Different functions may return different boundaries.
76
 * @stable ICU 4.4
77
 */
78
class U_COMMON_API Normalizer2 : public UObject {
79
public:
80
    /**
81
     * Destructor.
82
     * @stable ICU 4.4
83
     */
84
    ~Normalizer2();
85

86
    /**
87
     * Returns a Normalizer2 instance for Unicode NFC normalization.
88
     * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
89
     * Returns an unmodifiable singleton instance. Do not delete it.
90
     * @param errorCode Standard ICU error code. Its input value must
91
     *                  pass the U_SUCCESS() test, or else the function returns
92
     *                  immediately. Check for U_FAILURE() on output or use with
93
     *                  function chaining. (See User Guide for details.)
94
     * @return the requested Normalizer2, if successful
95
     * @stable ICU 49
96
     */
97
    static const Normalizer2 *
98
    getNFCInstance(UErrorCode &errorCode);
99

100
    /**
101
     * Returns a Normalizer2 instance for Unicode NFD normalization.
102
     * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
103
     * Returns an unmodifiable singleton instance. Do not delete it.
104
     * @param errorCode Standard ICU error code. Its input value must
105
     *                  pass the U_SUCCESS() test, or else the function returns
106
     *                  immediately. Check for U_FAILURE() on output or use with
107
     *                  function chaining. (See User Guide for details.)
108
     * @return the requested Normalizer2, if successful
109
     * @stable ICU 49
110
     */
111
    static const Normalizer2 *
112
    getNFDInstance(UErrorCode &errorCode);
113

114
    /**
115
     * Returns a Normalizer2 instance for Unicode NFKC normalization.
116
     * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
117
     * Returns an unmodifiable singleton instance. Do not delete it.
118
     * @param errorCode Standard ICU error code. Its input value must
119
     *                  pass the U_SUCCESS() test, or else the function returns
120
     *                  immediately. Check for U_FAILURE() on output or use with
121
     *                  function chaining. (See User Guide for details.)
122
     * @return the requested Normalizer2, if successful
123
     * @stable ICU 49
124
     */
125
    static const Normalizer2 *
126
    getNFKCInstance(UErrorCode &errorCode);
127

128
    /**
129
     * Returns a Normalizer2 instance for Unicode NFKD normalization.
130
     * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
131
     * Returns an unmodifiable singleton instance. Do not delete it.
132
     * @param errorCode Standard ICU error code. Its input value must
133
     *                  pass the U_SUCCESS() test, or else the function returns
134
     *                  immediately. Check for U_FAILURE() on output or use with
135
     *                  function chaining. (See User Guide for details.)
136
     * @return the requested Normalizer2, if successful
137
     * @stable ICU 49
138
     */
139
    static const Normalizer2 *
140
    getNFKDInstance(UErrorCode &errorCode);
141

142
    /**
143
     * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
144
     * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
145
     * Returns an unmodifiable singleton instance. Do not delete it.
146
     * @param errorCode Standard ICU error code. Its input value must
147
     *                  pass the U_SUCCESS() test, or else the function returns
148
     *                  immediately. Check for U_FAILURE() on output or use with
149
     *                  function chaining. (See User Guide for details.)
150
     * @return the requested Normalizer2, if successful
151
     * @stable ICU 49
152
     */
153
    static const Normalizer2 *
154
    getNFKCCasefoldInstance(UErrorCode &errorCode);
155

156
    /**
157
     * Returns a Normalizer2 instance which uses the specified data file
158
     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
159
     * and which composes or decomposes text according to the specified mode.
160
     * Returns an unmodifiable singleton instance. Do not delete it.
161
     *
162
     * Use packageName=NULL for data files that are part of ICU's own data.
163
     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
164
     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
165
     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
166
     *
167
     * @param packageName NULL for ICU built-in data, otherwise application data package name
168
     * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
169
     * @param mode normalization mode (compose or decompose etc.)
170
     * @param errorCode Standard ICU error code. Its input value must
171
     *                  pass the U_SUCCESS() test, or else the function returns
172
     *                  immediately. Check for U_FAILURE() on output or use with
173
     *                  function chaining. (See User Guide for details.)
174
     * @return the requested Normalizer2, if successful
175
     * @stable ICU 4.4
176
     */
177
    static const Normalizer2 *
178
    getInstance(const char *packageName,
179
                const char *name,
180
                UNormalization2Mode mode,
181
                UErrorCode &errorCode);
182

183
    /**
184
     * Returns the normalized form of the source string.
185
     * @param src source string
186
     * @param errorCode Standard ICU error code. Its input value must
187
     *                  pass the U_SUCCESS() test, or else the function returns
188
     *                  immediately. Check for U_FAILURE() on output or use with
189
     *                  function chaining. (See User Guide for details.)
190
     * @return normalized src
191
     * @stable ICU 4.4
192
     */
193
    UnicodeString
194
    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
195
        UnicodeString result;
196
        normalize(src, result, errorCode);
197
        return result;
198
    }
199
    /**
200
     * Writes the normalized form of the source string to the destination string
201
     * (replacing its contents) and returns the destination string.
202
     * The source and destination strings must be different objects.
203
     * @param src source string
204
     * @param dest destination string; its contents is replaced with normalized src
205
     * @param errorCode Standard ICU error code. Its input value must
206
     *                  pass the U_SUCCESS() test, or else the function returns
207
     *                  immediately. Check for U_FAILURE() on output or use with
208
     *                  function chaining. (See User Guide for details.)
209
     * @return dest
210
     * @stable ICU 4.4
211
     */
212
    virtual UnicodeString &
213
    normalize(const UnicodeString &src,
214
              UnicodeString &dest,
215
              UErrorCode &errorCode) const = 0;
216
    /**
217
     * Appends the normalized form of the second string to the first string
218
     * (merging them at the boundary) and returns the first string.
219
     * The result is normalized if the first string was normalized.
220
     * The first and second strings must be different objects.
221
     * @param first string, should be normalized
222
     * @param second string, will be normalized
223
     * @param errorCode Standard ICU error code. Its input value must
224
     *                  pass the U_SUCCESS() test, or else the function returns
225
     *                  immediately. Check for U_FAILURE() on output or use with
226
     *                  function chaining. (See User Guide for details.)
227
     * @return first
228
     * @stable ICU 4.4
229
     */
230
    virtual UnicodeString &
231
    normalizeSecondAndAppend(UnicodeString &first,
232
                             const UnicodeString &second,
233
                             UErrorCode &errorCode) const = 0;
234
    /**
235
     * Appends the second string to the first string
236
     * (merging them at the boundary) and returns the first string.
237
     * The result is normalized if both the strings were normalized.
238
     * The first and second strings must be different objects.
239
     * @param first string, should be normalized
240
     * @param second string, should be normalized
241
     * @param errorCode Standard ICU error code. Its input value must
242
     *                  pass the U_SUCCESS() test, or else the function returns
243
     *                  immediately. Check for U_FAILURE() on output or use with
244
     *                  function chaining. (See User Guide for details.)
245
     * @return first
246
     * @stable ICU 4.4
247
     */
248
    virtual UnicodeString &
249
    append(UnicodeString &first,
250
           const UnicodeString &second,
251
           UErrorCode &errorCode) const = 0;
252

253
    /**
254
     * Gets the decomposition mapping of c.
255
     * Roughly equivalent to normalizing the String form of c
256
     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
257
     * returns FALSE and does not write a string
258
     * if c does not have a decomposition mapping in this instance's data.
259
     * This function is independent of the mode of the Normalizer2.
260
     * @param c code point
261
     * @param decomposition String object which will be set to c's
262
     *                      decomposition mapping, if there is one.
263
     * @return TRUE if c has a decomposition, otherwise FALSE
264
     * @stable ICU 4.6
265
     */
266
    virtual UBool
267
    getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
268

269
    /**
270
     * Gets the raw decomposition mapping of c.
271
     *
272
     * This is similar to the getDecomposition() method but returns the
273
     * raw decomposition mapping as specified in UnicodeData.txt or
274
     * (for custom data) in the mapping files processed by the gennorm2 tool.
275
     * By contrast, getDecomposition() returns the processed,
276
     * recursively-decomposed version of this mapping.
277
     *
278
     * When used on a standard NFKC Normalizer2 instance,
279
     * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
280
     *
281
     * When used on a standard NFC Normalizer2 instance,
282
     * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
283
     * in this case, the result contains either one or two code points (=1..4 UChars).
284
     *
285
     * This function is independent of the mode of the Normalizer2.
286
     * The default implementation returns FALSE.
287
     * @param c code point
288
     * @param decomposition String object which will be set to c's
289
     *                      raw decomposition mapping, if there is one.
290
     * @return TRUE if c has a decomposition, otherwise FALSE
291
     * @stable ICU 49
292
     */
293
    virtual UBool
294
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
295

296
    /**
297
     * Performs pairwise composition of a & b and returns the composite if there is one.
298
     *
299
     * Returns a composite code point c only if c has a two-way mapping to a+b.
300
     * In standard Unicode normalization, this means that
301
     * c has a canonical decomposition to a+b
302
     * and c does not have the Full_Composition_Exclusion property.
303
     *
304
     * This function is independent of the mode of the Normalizer2.
305
     * The default implementation returns a negative value.
306
     * @param a A (normalization starter) code point.
307
     * @param b Another code point.
308
     * @return The non-negative composite code point if there is one; otherwise a negative value.
309
     * @stable ICU 49
310
     */
311
    virtual UChar32
312
    composePair(UChar32 a, UChar32 b) const;
313

314
    /**
315
     * Gets the combining class of c.
316
     * The default implementation returns 0
317
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
318
     * @param c code point
319
     * @return c's combining class
320
     * @stable ICU 49
321
     */
322
    virtual uint8_t
323
    getCombiningClass(UChar32 c) const;
324

325
    /**
326
     * Tests if the string is normalized.
327
     * Internally, in cases where the quickCheck() method would return "maybe"
328
     * (which is only possible for the two COMPOSE modes) this method
329
     * resolves to "yes" or "no" to provide a definitive result,
330
     * at the cost of doing more work in those cases.
331
     * @param s input string
332
     * @param errorCode Standard ICU error code. Its input value must
333
     *                  pass the U_SUCCESS() test, or else the function returns
334
     *                  immediately. Check for U_FAILURE() on output or use with
335
     *                  function chaining. (See User Guide for details.)
336
     * @return TRUE if s is normalized
337
     * @stable ICU 4.4
338
     */
339
    virtual UBool
340
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
341

342
    /**
343
     * Tests if the string is normalized.
344
     * For the two COMPOSE modes, the result could be "maybe" in cases that
345
     * would take a little more work to resolve definitively.
346
     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
347
     * combination of quick check + normalization, to avoid
348
     * re-checking the "yes" prefix.
349
     * @param s input string
350
     * @param errorCode Standard ICU error code. Its input value must
351
     *                  pass the U_SUCCESS() test, or else the function returns
352
     *                  immediately. Check for U_FAILURE() on output or use with
353
     *                  function chaining. (See User Guide for details.)
354
     * @return UNormalizationCheckResult
355
     * @stable ICU 4.4
356
     */
357
    virtual UNormalizationCheckResult
358
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
359

360
    /**
361
     * Returns the end of the normalized substring of the input string.
362
     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
363
     * the substring <code>UnicodeString(s, 0, end)</code>
364
     * will pass the quick check with a "yes" result.
365
     *
366
     * The returned end index is usually one or more characters before the
367
     * "no" or "maybe" character: The end index is at a normalization boundary.
368
     * (See the class documentation for more about normalization boundaries.)
369
     *
370
     * When the goal is a normalized string and most input strings are expected
371
     * to be normalized already, then call this method,
372
     * and if it returns a prefix shorter than the input string,
373
     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
374
     * @param s input string
375
     * @param errorCode Standard ICU error code. Its input value must
376
     *                  pass the U_SUCCESS() test, or else the function returns
377
     *                  immediately. Check for U_FAILURE() on output or use with
378
     *                  function chaining. (See User Guide for details.)
379
     * @return "yes" span end index
380
     * @stable ICU 4.4
381
     */
382
    virtual int32_t
383
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
384

385
    /**
386
     * Tests if the character always has a normalization boundary before it,
387
     * regardless of context.
388
     * If true, then the character does not normalization-interact with
389
     * preceding characters.
390
     * In other words, a string containing this character can be normalized
391
     * by processing portions before this character and starting from this
392
     * character independently.
393
     * This is used for iterative normalization. See the class documentation for details.
394
     * @param c character to test
395
     * @return TRUE if c has a normalization boundary before it
396
     * @stable ICU 4.4
397
     */
398
    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
399

400
    /**
401
     * Tests if the character always has a normalization boundary after it,
402
     * regardless of context.
403
     * If true, then the character does not normalization-interact with
404
     * following characters.
405
     * In other words, a string containing this character can be normalized
406
     * by processing portions up to this character and after this
407
     * character independently.
408
     * This is used for iterative normalization. See the class documentation for details.
409
     * Note that this operation may be significantly slower than hasBoundaryBefore().
410
     * @param c character to test
411
     * @return TRUE if c has a normalization boundary after it
412
     * @stable ICU 4.4
413
     */
414
    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
415

416
    /**
417
     * Tests if the character is normalization-inert.
418
     * If true, then the character does not change, nor normalization-interact with
419
     * preceding or following characters.
420
     * In other words, a string containing this character can be normalized
421
     * by processing portions before this character and after this
422
     * character independently.
423
     * This is used for iterative normalization. See the class documentation for details.
424
     * Note that this operation may be significantly slower than hasBoundaryBefore().
425
     * @param c character to test
426
     * @return TRUE if c is normalization-inert
427
     * @stable ICU 4.4
428
     */
429
    virtual UBool isInert(UChar32 c) const = 0;
430
};
431

432
/**
433
 * Normalization filtered by a UnicodeSet.
434
 * Normalizes portions of the text contained in the filter set and leaves
435
 * portions not contained in the filter set unchanged.
436
 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
437
 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
438
 * This class implements all of (and only) the Normalizer2 API.
439
 * An instance of this class is unmodifiable/immutable but is constructed and
440
 * must be destructed by the owner.
441
 * @stable ICU 4.4
442
 */
443
class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
444
public:
445
    /**
446
     * Constructs a filtered normalizer wrapping any Normalizer2 instance
447
     * and a filter set.
448
     * Both are aliased and must not be modified or deleted while this object
449
     * is used.
450
     * The filter set should be frozen; otherwise the performance will suffer greatly.
451
     * @param n2 wrapped Normalizer2 instance
452
     * @param filterSet UnicodeSet which determines the characters to be normalized
453
     * @stable ICU 4.4
454
     */
455
    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
456
            norm2(n2), set(filterSet) {}
457

458
    /**
459
     * Destructor.
460
     * @stable ICU 4.4
461
     */
462
    ~FilteredNormalizer2();
463

464
    /**
465
     * Writes the normalized form of the source string to the destination string
466
     * (replacing its contents) and returns the destination string.
467
     * The source and destination strings must be different objects.
468
     * @param src source string
469
     * @param dest destination string; its contents is replaced with normalized src
470
     * @param errorCode Standard ICU error code. Its input value must
471
     *                  pass the U_SUCCESS() test, or else the function returns
472
     *                  immediately. Check for U_FAILURE() on output or use with
473
     *                  function chaining. (See User Guide for details.)
474
     * @return dest
475
     * @stable ICU 4.4
476
     */
477
    virtual UnicodeString &
478
    normalize(const UnicodeString &src,
479
              UnicodeString &dest,
480
              UErrorCode &errorCode) const;
481
    /**
482
     * Appends the normalized form of the second string to the first string
483
     * (merging them at the boundary) and returns the first string.
484
     * The result is normalized if the first string was normalized.
485
     * The first and second strings must be different objects.
486
     * @param first string, should be normalized
487
     * @param second string, will be normalized
488
     * @param errorCode Standard ICU error code. Its input value must
489
     *                  pass the U_SUCCESS() test, or else the function returns
490
     *                  immediately. Check for U_FAILURE() on output or use with
491
     *                  function chaining. (See User Guide for details.)
492
     * @return first
493
     * @stable ICU 4.4
494
     */
495
    virtual UnicodeString &
496
    normalizeSecondAndAppend(UnicodeString &first,
497
                             const UnicodeString &second,
498
                             UErrorCode &errorCode) const;
499
    /**
500
     * Appends the second string to the first string
501
     * (merging them at the boundary) and returns the first string.
502
     * The result is normalized if both the strings were normalized.
503
     * The first and second strings must be different objects.
504
     * @param first string, should be normalized
505
     * @param second string, should be normalized
506
     * @param errorCode Standard ICU error code. Its input value must
507
     *                  pass the U_SUCCESS() test, or else the function returns
508
     *                  immediately. Check for U_FAILURE() on output or use with
509
     *                  function chaining. (See User Guide for details.)
510
     * @return first
511
     * @stable ICU 4.4
512
     */
513
    virtual UnicodeString &
514
    append(UnicodeString &first,
515
           const UnicodeString &second,
516
           UErrorCode &errorCode) const;
517

518
    /**
519
     * Gets the decomposition mapping of c.
520
     * For details see the base class documentation.
521
     *
522
     * This function is independent of the mode of the Normalizer2.
523
     * @param c code point
524
     * @param decomposition String object which will be set to c's
525
     *                      decomposition mapping, if there is one.
526
     * @return TRUE if c has a decomposition, otherwise FALSE
527
     * @stable ICU 4.6
528
     */
529
    virtual UBool
530
    getDecomposition(UChar32 c, UnicodeString &decomposition) const;
531

532
    /**
533
     * Gets the raw decomposition mapping of c.
534
     * For details see the base class documentation.
535
     *
536
     * This function is independent of the mode of the Normalizer2.
537
     * @param c code point
538
     * @param decomposition String object which will be set to c's
539
     *                      raw decomposition mapping, if there is one.
540
     * @return TRUE if c has a decomposition, otherwise FALSE
541
     * @stable ICU 49
542
     */
543
    virtual UBool
544
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
545

546
    /**
547
     * Performs pairwise composition of a & b and returns the composite if there is one.
548
     * For details see the base class documentation.
549
     *
550
     * This function is independent of the mode of the Normalizer2.
551
     * @param a A (normalization starter) code point.
552
     * @param b Another code point.
553
     * @return The non-negative composite code point if there is one; otherwise a negative value.
554
     * @stable ICU 49
555
     */
556
    virtual UChar32
557
    composePair(UChar32 a, UChar32 b) const;
558

559
    /**
560
     * Gets the combining class of c.
561
     * The default implementation returns 0
562
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
563
     * @param c code point
564
     * @return c's combining class
565
     * @stable ICU 49
566
     */
567
    virtual uint8_t
568
    getCombiningClass(UChar32 c) const;
569

570
    /**
571
     * Tests if the string is normalized.
572
     * For details see the Normalizer2 base class documentation.
573
     * @param s input string
574
     * @param errorCode Standard ICU error code. Its input value must
575
     *                  pass the U_SUCCESS() test, or else the function returns
576
     *                  immediately. Check for U_FAILURE() on output or use with
577
     *                  function chaining. (See User Guide for details.)
578
     * @return TRUE if s is normalized
579
     * @stable ICU 4.4
580
     */
581
    virtual UBool
582
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
583
    /**
584
     * Tests if the string is normalized.
585
     * For details see the Normalizer2 base class documentation.
586
     * @param s input string
587
     * @param errorCode Standard ICU error code. Its input value must
588
     *                  pass the U_SUCCESS() test, or else the function returns
589
     *                  immediately. Check for U_FAILURE() on output or use with
590
     *                  function chaining. (See User Guide for details.)
591
     * @return UNormalizationCheckResult
592
     * @stable ICU 4.4
593
     */
594
    virtual UNormalizationCheckResult
595
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
596
    /**
597
     * Returns the end of the normalized substring of the input string.
598
     * For details see the Normalizer2 base class documentation.
599
     * @param s input string
600
     * @param errorCode Standard ICU error code. Its input value must
601
     *                  pass the U_SUCCESS() test, or else the function returns
602
     *                  immediately. Check for U_FAILURE() on output or use with
603
     *                  function chaining. (See User Guide for details.)
604
     * @return "yes" span end index
605
     * @stable ICU 4.4
606
     */
607
    virtual int32_t
608
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
609

610
    /**
611
     * Tests if the character always has a normalization boundary before it,
612
     * regardless of context.
613
     * For details see the Normalizer2 base class documentation.
614
     * @param c character to test
615
     * @return TRUE if c has a normalization boundary before it
616
     * @stable ICU 4.4
617
     */
618
    virtual UBool hasBoundaryBefore(UChar32 c) const;
619

620
    /**
621
     * Tests if the character always has a normalization boundary after it,
622
     * regardless of context.
623
     * For details see the Normalizer2 base class documentation.
624
     * @param c character to test
625
     * @return TRUE if c has a normalization boundary after it
626
     * @stable ICU 4.4
627
     */
628
    virtual UBool hasBoundaryAfter(UChar32 c) const;
629

630
    /**
631
     * Tests if the character is normalization-inert.
632
     * For details see the Normalizer2 base class documentation.
633
     * @param c character to test
634
     * @return TRUE if c is normalization-inert
635
     * @stable ICU 4.4
636
     */
637
    virtual UBool isInert(UChar32 c) const;
638
private:
639
    UnicodeString &
640
    normalize(const UnicodeString &src,
641
              UnicodeString &dest,
642
              USetSpanCondition spanCondition,
643
              UErrorCode &errorCode) const;
644

645
    UnicodeString &
646
    normalizeSecondAndAppend(UnicodeString &first,
647
                             const UnicodeString &second,
648
                             UBool doNormalize,
649
                             UErrorCode &errorCode) const;
650

651
    const Normalizer2 &norm2;
652
    const UnicodeSet &set;
653
};
654

655
U_NAMESPACE_END
656

657
#endif  // !UCONFIG_NO_NORMALIZATION
658
#endif  // __NORMALIZER2_H__
659

660
Product

Resources

Company