Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-aarch32-jdk8u
Path: blob/jdk8u272-b10-aarch32-20201026/jdk/src/share/native/common/unicode/messagepattern.h
48773 views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2011-2013, International Business Machines
6
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
8
* file name: messagepattern.h
9
* encoding: UTF-8
10
* tab size: 8 (not used)
11
* indentation:4
12
*
13
* created on: 2011mar14
14
* created by: Markus W. Scherer
15
*/
16
17
#ifndef __MESSAGEPATTERN_H__
18
#define __MESSAGEPATTERN_H__
19
20
/**
21
* \file
22
* \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
23
*/
24
25
#include "unicode/utypes.h"
26
27
#if !UCONFIG_NO_FORMATTING
28
29
#include "unicode/parseerr.h"
30
#include "unicode/unistr.h"
31
32
/**
33
* Mode for when an apostrophe starts quoted literal text for MessageFormat output.
34
* The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
35
* (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
36
* <p>
37
* A pair of adjacent apostrophes always results in a single apostrophe in the output,
38
* even when the pair is between two single, text-quoting apostrophes.
39
* <p>
40
* The following table shows examples of desired MessageFormat.format() output
41
* with the pattern strings that yield that output.
42
* <p>
43
* <table>
44
* <tr>
45
* <th>Desired output</th>
46
* <th>DOUBLE_OPTIONAL</th>
47
* <th>DOUBLE_REQUIRED</th>
48
* </tr>
49
* <tr>
50
* <td>I see {many}</td>
51
* <td>I see '{many}'</td>
52
* <td>(same)</td>
53
* </tr>
54
* <tr>
55
* <td>I said {'Wow!'}</td>
56
* <td>I said '{''Wow!''}'</td>
57
* <td>(same)</td>
58
* </tr>
59
* <tr>
60
* <td>I don't know</td>
61
* <td>I don't know OR<br> I don''t know</td>
62
* <td>I don''t know</td>
63
* </tr>
64
* </table>
65
* @stable ICU 4.8
66
* @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
67
*/
68
enum UMessagePatternApostropheMode {
69
/**
70
* A literal apostrophe is represented by
71
* either a single or a double apostrophe pattern character.
72
* Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
73
* if it immediately precedes a curly brace {},
74
* or a pipe symbol | if inside a choice format,
75
* or a pound symbol # if inside a plural format.
76
* <p>
77
* This is the default behavior starting with ICU 4.8.
78
* @stable ICU 4.8
79
*/
80
UMSGPAT_APOS_DOUBLE_OPTIONAL,
81
/**
82
* A literal apostrophe must be represented by
83
* a double apostrophe pattern character.
84
* A single apostrophe always starts quoted literal text.
85
* <p>
86
* This is the behavior of ICU 4.6 and earlier, and of the JDK.
87
* @stable ICU 4.8
88
*/
89
UMSGPAT_APOS_DOUBLE_REQUIRED
90
};
91
/**
92
* @stable ICU 4.8
93
*/
94
typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
95
96
/**
97
* MessagePattern::Part type constants.
98
* @stable ICU 4.8
99
*/
100
enum UMessagePatternPartType {
101
/**
102
* Start of a message pattern (main or nested).
103
* The length is 0 for the top-level message
104
* and for a choice argument sub-message, otherwise 1 for the '{'.
105
* The value indicates the nesting level, starting with 0 for the main message.
106
* <p>
107
* There is always a later MSG_LIMIT part.
108
* @stable ICU 4.8
109
*/
110
UMSGPAT_PART_TYPE_MSG_START,
111
/**
112
* End of a message pattern (main or nested).
113
* The length is 0 for the top-level message and
114
* the last sub-message of a choice argument,
115
* otherwise 1 for the '}' or (in a choice argument style) the '|'.
116
* The value indicates the nesting level, starting with 0 for the main message.
117
* @stable ICU 4.8
118
*/
119
UMSGPAT_PART_TYPE_MSG_LIMIT,
120
/**
121
* Indicates a substring of the pattern string which is to be skipped when formatting.
122
* For example, an apostrophe that begins or ends quoted text
123
* would be indicated with such a part.
124
* The value is undefined and currently always 0.
125
* @stable ICU 4.8
126
*/
127
UMSGPAT_PART_TYPE_SKIP_SYNTAX,
128
/**
129
* Indicates that a syntax character needs to be inserted for auto-quoting.
130
* The length is 0.
131
* The value is the character code of the insertion character. (U+0027=APOSTROPHE)
132
* @stable ICU 4.8
133
*/
134
UMSGPAT_PART_TYPE_INSERT_CHAR,
135
/**
136
* Indicates a syntactic (non-escaped) # symbol in a plural variant.
137
* When formatting, replace this part's substring with the
138
* (value-offset) for the plural argument value.
139
* The value is undefined and currently always 0.
140
* @stable ICU 4.8
141
*/
142
UMSGPAT_PART_TYPE_REPLACE_NUMBER,
143
/**
144
* Start of an argument.
145
* The length is 1 for the '{'.
146
* The value is the ordinal value of the ArgType. Use getArgType().
147
* <p>
148
* This part is followed by either an ARG_NUMBER or ARG_NAME,
149
* followed by optional argument sub-parts (see UMessagePatternArgType constants)
150
* and finally an ARG_LIMIT part.
151
* @stable ICU 4.8
152
*/
153
UMSGPAT_PART_TYPE_ARG_START,
154
/**
155
* End of an argument.
156
* The length is 1 for the '}'.
157
* The value is the ordinal value of the ArgType. Use getArgType().
158
* @stable ICU 4.8
159
*/
160
UMSGPAT_PART_TYPE_ARG_LIMIT,
161
/**
162
* The argument number, provided by the value.
163
* @stable ICU 4.8
164
*/
165
UMSGPAT_PART_TYPE_ARG_NUMBER,
166
/**
167
* The argument name.
168
* The value is undefined and currently always 0.
169
* @stable ICU 4.8
170
*/
171
UMSGPAT_PART_TYPE_ARG_NAME,
172
/**
173
* The argument type.
174
* The value is undefined and currently always 0.
175
* @stable ICU 4.8
176
*/
177
UMSGPAT_PART_TYPE_ARG_TYPE,
178
/**
179
* The argument style text.
180
* The value is undefined and currently always 0.
181
* @stable ICU 4.8
182
*/
183
UMSGPAT_PART_TYPE_ARG_STYLE,
184
/**
185
* A selector substring in a "complex" argument style.
186
* The value is undefined and currently always 0.
187
* @stable ICU 4.8
188
*/
189
UMSGPAT_PART_TYPE_ARG_SELECTOR,
190
/**
191
* An integer value, for example the offset or an explicit selector value
192
* in a PluralFormat style.
193
* The part value is the integer value.
194
* @stable ICU 4.8
195
*/
196
UMSGPAT_PART_TYPE_ARG_INT,
197
/**
198
* A numeric value, for example the offset or an explicit selector value
199
* in a PluralFormat style.
200
* The part value is an index into an internal array of numeric values;
201
* use getNumericValue().
202
* @stable ICU 4.8
203
*/
204
UMSGPAT_PART_TYPE_ARG_DOUBLE
205
};
206
/**
207
* @stable ICU 4.8
208
*/
209
typedef enum UMessagePatternPartType UMessagePatternPartType;
210
211
/**
212
* Argument type constants.
213
* Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
214
*
215
* Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
216
* with a nesting level one greater than the surrounding message.
217
* @stable ICU 4.8
218
*/
219
enum UMessagePatternArgType {
220
/**
221
* The argument has no specified type.
222
* @stable ICU 4.8
223
*/
224
UMSGPAT_ARG_TYPE_NONE,
225
/**
226
* The argument has a "simple" type which is provided by the ARG_TYPE part.
227
* An ARG_STYLE part might follow that.
228
* @stable ICU 4.8
229
*/
230
UMSGPAT_ARG_TYPE_SIMPLE,
231
/**
232
* The argument is a ChoiceFormat with one or more
233
* ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
234
* @stable ICU 4.8
235
*/
236
UMSGPAT_ARG_TYPE_CHOICE,
237
/**
238
* The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
239
* (e.g., offset:1)
240
* and one or more (ARG_SELECTOR [explicit-value] message) tuples.
241
* If the selector has an explicit value (e.g., =2), then
242
* that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
243
* Otherwise the message immediately follows the ARG_SELECTOR.
244
* @stable ICU 4.8
245
*/
246
UMSGPAT_ARG_TYPE_PLURAL,
247
/**
248
* The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
249
* @stable ICU 4.8
250
*/
251
UMSGPAT_ARG_TYPE_SELECT,
252
/**
253
* The argument is an ordinal-number PluralFormat
254
* with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
255
* @stable ICU 50
256
*/
257
UMSGPAT_ARG_TYPE_SELECTORDINAL
258
};
259
/**
260
* @stable ICU 4.8
261
*/
262
typedef enum UMessagePatternArgType UMessagePatternArgType;
263
264
/**
265
* \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
266
* Returns TRUE if the argument type has a plural style part sequence and semantics,
267
* for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
268
* @stable ICU 50
269
*/
270
#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
271
((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
272
273
enum {
274
/**
275
* Return value from MessagePattern.validateArgumentName() for when
276
* the string is a valid "pattern identifier" but not a number.
277
* @stable ICU 4.8
278
*/
279
UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
280
281
/**
282
* Return value from MessagePattern.validateArgumentName() for when
283
* the string is invalid.
284
* It might not be a valid "pattern identifier",
285
* or it have only ASCII digits but there is a leading zero or the number is too large.
286
* @stable ICU 4.8
287
*/
288
UMSGPAT_ARG_NAME_NOT_VALID=-2
289
};
290
291
/**
292
* Special value that is returned by getNumericValue(Part) when no
293
* numeric value is defined for a part.
294
* @see MessagePattern.getNumericValue()
295
* @stable ICU 4.8
296
*/
297
#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
298
299
U_NAMESPACE_BEGIN
300
301
class MessagePatternDoubleList;
302
class MessagePatternPartsList;
303
304
/**
305
* Parses and represents ICU MessageFormat patterns.
306
* Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
307
* Used in the implementations of those classes as well as in tools
308
* for message validation, translation and format conversion.
309
* <p>
310
* The parser handles all syntax relevant for identifying message arguments.
311
* This includes "complex" arguments whose style strings contain
312
* nested MessageFormat pattern substrings.
313
* For "simple" arguments (with no nested MessageFormat pattern substrings),
314
* the argument style is not parsed any further.
315
* <p>
316
* The parser handles named and numbered message arguments and allows both in one message.
317
* <p>
318
* Once a pattern has been parsed successfully, iterate through the parsed data
319
* with countParts(), getPart() and related methods.
320
* <p>
321
* The data logically represents a parse tree, but is stored and accessed
322
* as a list of "parts" for fast and simple parsing and to minimize object allocations.
323
* Arguments and nested messages are best handled via recursion.
324
* For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
325
* the index of the corresponding _LIMIT "part".
326
* <p>
327
* List of "parts":
328
* <pre>
329
* message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
330
* argument = noneArg | simpleArg | complexArg
331
* complexArg = choiceArg | pluralArg | selectArg
332
*
333
* noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
334
* simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
335
* choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
336
* pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
337
* selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
338
*
339
* choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
340
* pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
341
* selectStyle = (ARG_SELECTOR message)+
342
* </pre>
343
* <ul>
344
* <li>Literal output text is not represented directly by "parts" but accessed
345
* between parts of a message, from one part's getLimit() to the next part's getIndex().
346
* <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
347
* <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
348
* the less-than-or-equal-to sign (U+2264).
349
* <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
350
* The optional numeric Part between each (ARG_SELECTOR, message) pair
351
* is the value of an explicit-number selector like "=2",
352
* otherwise the selector is a non-numeric identifier.
353
* <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
354
* </ul>
355
* <p>
356
* This class is not intended for public subclassing.
357
*
358
* @stable ICU 4.8
359
*/
360
class U_COMMON_API MessagePattern : public UObject {
361
public:
362
/**
363
* Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
364
* @param errorCode Standard ICU error code. Its input value must
365
* pass the U_SUCCESS() test, or else the function returns
366
* immediately. Check for U_FAILURE() on output or use with
367
* function chaining. (See User Guide for details.)
368
* @stable ICU 4.8
369
*/
370
MessagePattern(UErrorCode &errorCode);
371
372
/**
373
* Constructs an empty MessagePattern.
374
* @param mode Explicit UMessagePatternApostropheMode.
375
* @param errorCode Standard ICU error code. Its input value must
376
* pass the U_SUCCESS() test, or else the function returns
377
* immediately. Check for U_FAILURE() on output or use with
378
* function chaining. (See User Guide for details.)
379
* @stable ICU 4.8
380
*/
381
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
382
383
/**
384
* Constructs a MessagePattern with default UMessagePatternApostropheMode and
385
* parses the MessageFormat pattern string.
386
* @param pattern a MessageFormat pattern string
387
* @param parseError Struct to receive information on the position
388
* of an error within the pattern.
389
* Can be NULL.
390
* @param errorCode Standard ICU error code. Its input value must
391
* pass the U_SUCCESS() test, or else the function returns
392
* immediately. Check for U_FAILURE() on output or use with
393
* function chaining. (See User Guide for details.)
394
* TODO: turn @throws into UErrorCode specifics?
395
* @throws IllegalArgumentException for syntax errors in the pattern string
396
* @throws IndexOutOfBoundsException if certain limits are exceeded
397
* (e.g., argument number too high, argument name too long, etc.)
398
* @throws NumberFormatException if a number could not be parsed
399
* @stable ICU 4.8
400
*/
401
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
402
403
/**
404
* Copy constructor.
405
* @param other Object to copy.
406
* @stable ICU 4.8
407
*/
408
MessagePattern(const MessagePattern &other);
409
410
/**
411
* Assignment operator.
412
* @param other Object to copy.
413
* @return *this=other
414
* @stable ICU 4.8
415
*/
416
MessagePattern &operator=(const MessagePattern &other);
417
418
/**
419
* Destructor.
420
* @stable ICU 4.8
421
*/
422
virtual ~MessagePattern();
423
424
/**
425
* Parses a MessageFormat pattern string.
426
* @param pattern a MessageFormat pattern string
427
* @param parseError Struct to receive information on the position
428
* of an error within the pattern.
429
* Can be NULL.
430
* @param errorCode Standard ICU error code. Its input value must
431
* pass the U_SUCCESS() test, or else the function returns
432
* immediately. Check for U_FAILURE() on output or use with
433
* function chaining. (See User Guide for details.)
434
* @return *this
435
* @throws IllegalArgumentException for syntax errors in the pattern string
436
* @throws IndexOutOfBoundsException if certain limits are exceeded
437
* (e.g., argument number too high, argument name too long, etc.)
438
* @throws NumberFormatException if a number could not be parsed
439
* @stable ICU 4.8
440
*/
441
MessagePattern &parse(const UnicodeString &pattern,
442
UParseError *parseError, UErrorCode &errorCode);
443
444
/**
445
* Parses a ChoiceFormat pattern string.
446
* @param pattern a ChoiceFormat pattern string
447
* @param parseError Struct to receive information on the position
448
* of an error within the pattern.
449
* Can be NULL.
450
* @param errorCode Standard ICU error code. Its input value must
451
* pass the U_SUCCESS() test, or else the function returns
452
* immediately. Check for U_FAILURE() on output or use with
453
* function chaining. (See User Guide for details.)
454
* @return *this
455
* @throws IllegalArgumentException for syntax errors in the pattern string
456
* @throws IndexOutOfBoundsException if certain limits are exceeded
457
* (e.g., argument number too high, argument name too long, etc.)
458
* @throws NumberFormatException if a number could not be parsed
459
* @stable ICU 4.8
460
*/
461
MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
462
UParseError *parseError, UErrorCode &errorCode);
463
464
/**
465
* Parses a PluralFormat pattern string.
466
* @param pattern a PluralFormat pattern string
467
* @param parseError Struct to receive information on the position
468
* of an error within the pattern.
469
* Can be NULL.
470
* @param errorCode Standard ICU error code. Its input value must
471
* pass the U_SUCCESS() test, or else the function returns
472
* immediately. Check for U_FAILURE() on output or use with
473
* function chaining. (See User Guide for details.)
474
* @return *this
475
* @throws IllegalArgumentException for syntax errors in the pattern string
476
* @throws IndexOutOfBoundsException if certain limits are exceeded
477
* (e.g., argument number too high, argument name too long, etc.)
478
* @throws NumberFormatException if a number could not be parsed
479
* @stable ICU 4.8
480
*/
481
MessagePattern &parsePluralStyle(const UnicodeString &pattern,
482
UParseError *parseError, UErrorCode &errorCode);
483
484
/**
485
* Parses a SelectFormat pattern string.
486
* @param pattern a SelectFormat pattern string
487
* @param parseError Struct to receive information on the position
488
* of an error within the pattern.
489
* Can be NULL.
490
* @param errorCode Standard ICU error code. Its input value must
491
* pass the U_SUCCESS() test, or else the function returns
492
* immediately. Check for U_FAILURE() on output or use with
493
* function chaining. (See User Guide for details.)
494
* @return *this
495
* @throws IllegalArgumentException for syntax errors in the pattern string
496
* @throws IndexOutOfBoundsException if certain limits are exceeded
497
* (e.g., argument number too high, argument name too long, etc.)
498
* @throws NumberFormatException if a number could not be parsed
499
* @stable ICU 4.8
500
*/
501
MessagePattern &parseSelectStyle(const UnicodeString &pattern,
502
UParseError *parseError, UErrorCode &errorCode);
503
504
/**
505
* Clears this MessagePattern.
506
* countParts() will return 0.
507
* @stable ICU 4.8
508
*/
509
void clear();
510
511
/**
512
* Clears this MessagePattern and sets the UMessagePatternApostropheMode.
513
* countParts() will return 0.
514
* @param mode The new UMessagePatternApostropheMode.
515
* @stable ICU 4.8
516
*/
517
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
518
clear();
519
aposMode=mode;
520
}
521
522
/**
523
* @param other another object to compare with.
524
* @return TRUE if this object is equivalent to the other one.
525
* @stable ICU 4.8
526
*/
527
UBool operator==(const MessagePattern &other) const;
528
529
/**
530
* @param other another object to compare with.
531
* @return FALSE if this object is equivalent to the other one.
532
* @stable ICU 4.8
533
*/
534
inline UBool operator!=(const MessagePattern &other) const {
535
return !operator==(other);
536
}
537
538
/**
539
* @return A hash code for this object.
540
* @stable ICU 4.8
541
*/
542
int32_t hashCode() const;
543
544
/**
545
* @return this instance's UMessagePatternApostropheMode.
546
* @stable ICU 4.8
547
*/
548
UMessagePatternApostropheMode getApostropheMode() const {
549
return aposMode;
550
}
551
552
// Java has package-private jdkAposMode() here.
553
// In C++, this is declared in the MessageImpl class.
554
555
/**
556
* @return the parsed pattern string (null if none was parsed).
557
* @stable ICU 4.8
558
*/
559
const UnicodeString &getPatternString() const {
560
return msg;
561
}
562
563
/**
564
* Does the parsed pattern have named arguments like {first_name}?
565
* @return TRUE if the parsed pattern has at least one named argument.
566
* @stable ICU 4.8
567
*/
568
UBool hasNamedArguments() const {
569
return hasArgNames;
570
}
571
572
/**
573
* Does the parsed pattern have numbered arguments like {2}?
574
* @return TRUE if the parsed pattern has at least one numbered argument.
575
* @stable ICU 4.8
576
*/
577
UBool hasNumberedArguments() const {
578
return hasArgNumbers;
579
}
580
581
/**
582
* Validates and parses an argument name or argument number string.
583
* An argument name must be a "pattern identifier", that is, it must contain
584
* no Unicode Pattern_Syntax or Pattern_White_Space characters.
585
* If it only contains ASCII digits, then it must be a small integer with no leading zero.
586
* @param name Input string.
587
* @return &gt;=0 if the name is a valid number,
588
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
589
* ARG_NAME_NOT_VALID (-2) if it is neither.
590
* @stable ICU 4.8
591
*/
592
static int32_t validateArgumentName(const UnicodeString &name);
593
594
/**
595
* Returns a version of the parsed pattern string where each ASCII apostrophe
596
* is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
597
* <p>
598
* For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
599
* into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
600
* @return the deep-auto-quoted version of the parsed pattern string.
601
* @see MessageFormat.autoQuoteApostrophe()
602
* @stable ICU 4.8
603
*/
604
UnicodeString autoQuoteApostropheDeep() const;
605
606
class Part;
607
608
/**
609
* Returns the number of "parts" created by parsing the pattern string.
610
* Returns 0 if no pattern has been parsed or clear() was called.
611
* @return the number of pattern parts.
612
* @stable ICU 4.8
613
*/
614
int32_t countParts() const {
615
return partsLength;
616
}
617
618
/**
619
* Gets the i-th pattern "part".
620
* @param i The index of the Part data. (0..countParts()-1)
621
* @return the i-th pattern "part".
622
* @stable ICU 4.8
623
*/
624
const Part &getPart(int32_t i) const {
625
return parts[i];
626
}
627
628
/**
629
* Returns the UMessagePatternPartType of the i-th pattern "part".
630
* Convenience method for getPart(i).getType().
631
* @param i The index of the Part data. (0..countParts()-1)
632
* @return The UMessagePatternPartType of the i-th Part.
633
* @stable ICU 4.8
634
*/
635
UMessagePatternPartType getPartType(int32_t i) const {
636
return getPart(i).type;
637
}
638
639
/**
640
* Returns the pattern index of the specified pattern "part".
641
* Convenience method for getPart(partIndex).getIndex().
642
* @param partIndex The index of the Part data. (0..countParts()-1)
643
* @return The pattern index of this Part.
644
* @stable ICU 4.8
645
*/
646
int32_t getPatternIndex(int32_t partIndex) const {
647
return getPart(partIndex).index;
648
}
649
650
/**
651
* Returns the substring of the pattern string indicated by the Part.
652
* Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
653
* @param part a part of this MessagePattern.
654
* @return the substring associated with part.
655
* @stable ICU 4.8
656
*/
657
UnicodeString getSubstring(const Part &part) const {
658
return msg.tempSubString(part.index, part.length);
659
}
660
661
/**
662
* Compares the part's substring with the input string s.
663
* @param part a part of this MessagePattern.
664
* @param s a string.
665
* @return TRUE if getSubstring(part).equals(s).
666
* @stable ICU 4.8
667
*/
668
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
669
return 0==msg.compare(part.index, part.length, s);
670
}
671
672
/**
673
* Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
674
* @param part a part of this MessagePattern.
675
* @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
676
* @stable ICU 4.8
677
*/
678
double getNumericValue(const Part &part) const;
679
680
/**
681
* Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
682
* @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
683
* @return the "offset:" value.
684
* @stable ICU 4.8
685
*/
686
double getPluralOffset(int32_t pluralStart) const;
687
688
/**
689
* Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
690
* @param start The index of some Part data (0..countParts()-1);
691
* this Part should be of Type ARG_START or MSG_START.
692
* @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
693
* or start itself if getPartType(msgStart)!=ARG|MSG_START.
694
* @stable ICU 4.8
695
*/
696
int32_t getLimitPartIndex(int32_t start) const {
697
int32_t limit=getPart(start).limitPartIndex;
698
if(limit<start) {
699
return start;
700
}
701
return limit;
702
}
703
704
/**
705
* A message pattern "part", representing a pattern parsing event.
706
* There is a part for the start and end of a message or argument,
707
* for quoting and escaping of and with ASCII apostrophes,
708
* and for syntax elements of "complex" arguments.
709
* @stable ICU 4.8
710
*/
711
class Part : public UMemory {
712
public:
713
/**
714
* Default constructor, do not use.
715
* @internal
716
*/
717
Part() {}
718
719
/**
720
* Returns the type of this part.
721
* @return the part type.
722
* @stable ICU 4.8
723
*/
724
UMessagePatternPartType getType() const {
725
return type;
726
}
727
728
/**
729
* Returns the pattern string index associated with this Part.
730
* @return this part's pattern string index.
731
* @stable ICU 4.8
732
*/
733
int32_t getIndex() const {
734
return index;
735
}
736
737
/**
738
* Returns the length of the pattern substring associated with this Part.
739
* This is 0 for some parts.
740
* @return this part's pattern substring length.
741
* @stable ICU 4.8
742
*/
743
int32_t getLength() const {
744
return length;
745
}
746
747
/**
748
* Returns the pattern string limit (exclusive-end) index associated with this Part.
749
* Convenience method for getIndex()+getLength().
750
* @return this part's pattern string limit index, same as getIndex()+getLength().
751
* @stable ICU 4.8
752
*/
753
int32_t getLimit() const {
754
return index+length;
755
}
756
757
/**
758
* Returns a value associated with this part.
759
* See the documentation of each part type for details.
760
* @return the part value.
761
* @stable ICU 4.8
762
*/
763
int32_t getValue() const {
764
return value;
765
}
766
767
/**
768
* Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
769
* otherwise UMSGPAT_ARG_TYPE_NONE.
770
* @return the argument type for this part.
771
* @stable ICU 4.8
772
*/
773
UMessagePatternArgType getArgType() const {
774
UMessagePatternPartType msgType=getType();
775
if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
776
return (UMessagePatternArgType)value;
777
} else {
778
return UMSGPAT_ARG_TYPE_NONE;
779
}
780
}
781
782
/**
783
* Indicates whether the Part type has a numeric value.
784
* If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
785
* @param type The Part type to be tested.
786
* @return TRUE if the Part type has a numeric value.
787
* @stable ICU 4.8
788
*/
789
static UBool hasNumericValue(UMessagePatternPartType type) {
790
return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
791
}
792
793
/**
794
* @param other another object to compare with.
795
* @return TRUE if this object is equivalent to the other one.
796
* @stable ICU 4.8
797
*/
798
UBool operator==(const Part &other) const;
799
800
/**
801
* @param other another object to compare with.
802
* @return FALSE if this object is equivalent to the other one.
803
* @stable ICU 4.8
804
*/
805
inline UBool operator!=(const Part &other) const {
806
return !operator==(other);
807
}
808
809
/**
810
* @return A hash code for this object.
811
* @stable ICU 4.8
812
*/
813
int32_t hashCode() const {
814
return ((type*37+index)*37+length)*37+value;
815
}
816
817
private:
818
friend class MessagePattern;
819
820
static const int32_t MAX_LENGTH=0xffff;
821
static const int32_t MAX_VALUE=0x7fff;
822
823
// Some fields are not final because they are modified during pattern parsing.
824
// After pattern parsing, the parts are effectively immutable.
825
UMessagePatternPartType type;
826
int32_t index;
827
uint16_t length;
828
int16_t value;
829
int32_t limitPartIndex;
830
};
831
832
private:
833
void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
834
835
void postParse();
836
837
int32_t parseMessage(int32_t index, int32_t msgStartLength,
838
int32_t nestingLevel, UMessagePatternArgType parentType,
839
UParseError *parseError, UErrorCode &errorCode);
840
841
int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
842
UParseError *parseError, UErrorCode &errorCode);
843
844
int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
845
846
int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
847
UParseError *parseError, UErrorCode &errorCode);
848
849
int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
850
UParseError *parseError, UErrorCode &errorCode);
851
852
/**
853
* Validates and parses an argument name or argument number string.
854
* This internal method assumes that the input substring is a "pattern identifier".
855
* @return &gt;=0 if the name is a valid number,
856
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
857
* ARG_NAME_NOT_VALID (-2) if it is neither.
858
* @see #validateArgumentName(String)
859
*/
860
static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
861
862
int32_t parseArgNumber(int32_t start, int32_t limit) {
863
return parseArgNumber(msg, start, limit);
864
}
865
866
/**
867
* Parses a number from the specified message substring.
868
* @param start start index into the message string
869
* @param limit limit index into the message string, must be start<limit
870
* @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
871
* @param parseError
872
* @param errorCode
873
*/
874
void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
875
UParseError *parseError, UErrorCode &errorCode);
876
877
// Java has package-private appendReducedApostrophes() here.
878
// In C++, this is declared in the MessageImpl class.
879
880
int32_t skipWhiteSpace(int32_t index);
881
882
int32_t skipIdentifier(int32_t index);
883
884
/**
885
* Skips a sequence of characters that could occur in a double value.
886
* Does not fully parse or validate the value.
887
*/
888
int32_t skipDouble(int32_t index);
889
890
static UBool isArgTypeChar(UChar32 c);
891
892
UBool isChoice(int32_t index);
893
894
UBool isPlural(int32_t index);
895
896
UBool isSelect(int32_t index);
897
898
UBool isOrdinal(int32_t index);
899
900
/**
901
* @return TRUE if we are inside a MessageFormat (sub-)pattern,
902
* as opposed to inside a top-level choice/plural/select pattern.
903
*/
904
UBool inMessageFormatPattern(int32_t nestingLevel);
905
906
/**
907
* @return TRUE if we are in a MessageFormat sub-pattern
908
* of a top-level ChoiceFormat pattern.
909
*/
910
UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
911
912
void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
913
int32_t value, UErrorCode &errorCode);
914
915
void addLimitPart(int32_t start,
916
UMessagePatternPartType type, int32_t index, int32_t length,
917
int32_t value, UErrorCode &errorCode);
918
919
void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
920
921
void setParseError(UParseError *parseError, int32_t index);
922
923
UBool init(UErrorCode &errorCode);
924
UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
925
926
UMessagePatternApostropheMode aposMode;
927
UnicodeString msg;
928
// ArrayList<Part> parts=new ArrayList<Part>();
929
MessagePatternPartsList *partsList;
930
Part *parts;
931
int32_t partsLength;
932
// ArrayList<Double> numericValues;
933
MessagePatternDoubleList *numericValuesList;
934
double *numericValues;
935
int32_t numericValuesLength;
936
UBool hasArgNames;
937
UBool hasArgNumbers;
938
UBool needsAutoQuoting;
939
};
940
941
U_NAMESPACE_END
942
943
#endif // !UCONFIG_NO_FORMATTING
944
945
#endif // __MESSAGEPATTERN_H__
946
947