Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/native/common/unicode/messagepattern.h
38827 views
1
/*
2
*******************************************************************************
3
* Copyright (C) 2011-2013, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
*******************************************************************************
6
* file name: messagepattern.h
7
* encoding: US-ASCII
8
* tab size: 8 (not used)
9
* indentation:4
10
*
11
* created on: 2011mar14
12
* created by: Markus W. Scherer
13
*/
14
15
#ifndef __MESSAGEPATTERN_H__
16
#define __MESSAGEPATTERN_H__
17
18
/**
19
* \file
20
* \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
21
*/
22
23
#include "unicode/utypes.h"
24
25
#if !UCONFIG_NO_FORMATTING
26
27
#include "unicode/parseerr.h"
28
#include "unicode/unistr.h"
29
30
/**
31
* Mode for when an apostrophe starts quoted literal text for MessageFormat output.
32
* The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
33
* (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
34
* <p>
35
* A pair of adjacent apostrophes always results in a single apostrophe in the output,
36
* even when the pair is between two single, text-quoting apostrophes.
37
* <p>
38
* The following table shows examples of desired MessageFormat.format() output
39
* with the pattern strings that yield that output.
40
* <p>
41
* <table>
42
* <tr>
43
* <th>Desired output</th>
44
* <th>DOUBLE_OPTIONAL</th>
45
* <th>DOUBLE_REQUIRED</th>
46
* </tr>
47
* <tr>
48
* <td>I see {many}</td>
49
* <td>I see '{many}'</td>
50
* <td>(same)</td>
51
* </tr>
52
* <tr>
53
* <td>I said {'Wow!'}</td>
54
* <td>I said '{''Wow!''}'</td>
55
* <td>(same)</td>
56
* </tr>
57
* <tr>
58
* <td>I don't know</td>
59
* <td>I don't know OR<br> I don''t know</td>
60
* <td>I don''t know</td>
61
* </tr>
62
* </table>
63
* @stable ICU 4.8
64
* @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
65
*/
66
enum UMessagePatternApostropheMode {
67
/**
68
* A literal apostrophe is represented by
69
* either a single or a double apostrophe pattern character.
70
* Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
71
* if it immediately precedes a curly brace {},
72
* or a pipe symbol | if inside a choice format,
73
* or a pound symbol # if inside a plural format.
74
* <p>
75
* This is the default behavior starting with ICU 4.8.
76
* @stable ICU 4.8
77
*/
78
UMSGPAT_APOS_DOUBLE_OPTIONAL,
79
/**
80
* A literal apostrophe must be represented by
81
* a double apostrophe pattern character.
82
* A single apostrophe always starts quoted literal text.
83
* <p>
84
* This is the behavior of ICU 4.6 and earlier, and of the JDK.
85
* @stable ICU 4.8
86
*/
87
UMSGPAT_APOS_DOUBLE_REQUIRED
88
};
89
/**
90
* @stable ICU 4.8
91
*/
92
typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
93
94
/**
95
* MessagePattern::Part type constants.
96
* @stable ICU 4.8
97
*/
98
enum UMessagePatternPartType {
99
/**
100
* Start of a message pattern (main or nested).
101
* The length is 0 for the top-level message
102
* and for a choice argument sub-message, otherwise 1 for the '{'.
103
* The value indicates the nesting level, starting with 0 for the main message.
104
* <p>
105
* There is always a later MSG_LIMIT part.
106
* @stable ICU 4.8
107
*/
108
UMSGPAT_PART_TYPE_MSG_START,
109
/**
110
* End of a message pattern (main or nested).
111
* The length is 0 for the top-level message and
112
* the last sub-message of a choice argument,
113
* otherwise 1 for the '}' or (in a choice argument style) the '|'.
114
* The value indicates the nesting level, starting with 0 for the main message.
115
* @stable ICU 4.8
116
*/
117
UMSGPAT_PART_TYPE_MSG_LIMIT,
118
/**
119
* Indicates a substring of the pattern string which is to be skipped when formatting.
120
* For example, an apostrophe that begins or ends quoted text
121
* would be indicated with such a part.
122
* The value is undefined and currently always 0.
123
* @stable ICU 4.8
124
*/
125
UMSGPAT_PART_TYPE_SKIP_SYNTAX,
126
/**
127
* Indicates that a syntax character needs to be inserted for auto-quoting.
128
* The length is 0.
129
* The value is the character code of the insertion character. (U+0027=APOSTROPHE)
130
* @stable ICU 4.8
131
*/
132
UMSGPAT_PART_TYPE_INSERT_CHAR,
133
/**
134
* Indicates a syntactic (non-escaped) # symbol in a plural variant.
135
* When formatting, replace this part's substring with the
136
* (value-offset) for the plural argument value.
137
* The value is undefined and currently always 0.
138
* @stable ICU 4.8
139
*/
140
UMSGPAT_PART_TYPE_REPLACE_NUMBER,
141
/**
142
* Start of an argument.
143
* The length is 1 for the '{'.
144
* The value is the ordinal value of the ArgType. Use getArgType().
145
* <p>
146
* This part is followed by either an ARG_NUMBER or ARG_NAME,
147
* followed by optional argument sub-parts (see UMessagePatternArgType constants)
148
* and finally an ARG_LIMIT part.
149
* @stable ICU 4.8
150
*/
151
UMSGPAT_PART_TYPE_ARG_START,
152
/**
153
* End of an argument.
154
* The length is 1 for the '}'.
155
* The value is the ordinal value of the ArgType. Use getArgType().
156
* @stable ICU 4.8
157
*/
158
UMSGPAT_PART_TYPE_ARG_LIMIT,
159
/**
160
* The argument number, provided by the value.
161
* @stable ICU 4.8
162
*/
163
UMSGPAT_PART_TYPE_ARG_NUMBER,
164
/**
165
* The argument name.
166
* The value is undefined and currently always 0.
167
* @stable ICU 4.8
168
*/
169
UMSGPAT_PART_TYPE_ARG_NAME,
170
/**
171
* The argument type.
172
* The value is undefined and currently always 0.
173
* @stable ICU 4.8
174
*/
175
UMSGPAT_PART_TYPE_ARG_TYPE,
176
/**
177
* The argument style text.
178
* The value is undefined and currently always 0.
179
* @stable ICU 4.8
180
*/
181
UMSGPAT_PART_TYPE_ARG_STYLE,
182
/**
183
* A selector substring in a "complex" argument style.
184
* The value is undefined and currently always 0.
185
* @stable ICU 4.8
186
*/
187
UMSGPAT_PART_TYPE_ARG_SELECTOR,
188
/**
189
* An integer value, for example the offset or an explicit selector value
190
* in a PluralFormat style.
191
* The part value is the integer value.
192
* @stable ICU 4.8
193
*/
194
UMSGPAT_PART_TYPE_ARG_INT,
195
/**
196
* A numeric value, for example the offset or an explicit selector value
197
* in a PluralFormat style.
198
* The part value is an index into an internal array of numeric values;
199
* use getNumericValue().
200
* @stable ICU 4.8
201
*/
202
UMSGPAT_PART_TYPE_ARG_DOUBLE
203
};
204
/**
205
* @stable ICU 4.8
206
*/
207
typedef enum UMessagePatternPartType UMessagePatternPartType;
208
209
/**
210
* Argument type constants.
211
* Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
212
*
213
* Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
214
* with a nesting level one greater than the surrounding message.
215
* @stable ICU 4.8
216
*/
217
enum UMessagePatternArgType {
218
/**
219
* The argument has no specified type.
220
* @stable ICU 4.8
221
*/
222
UMSGPAT_ARG_TYPE_NONE,
223
/**
224
* The argument has a "simple" type which is provided by the ARG_TYPE part.
225
* An ARG_STYLE part might follow that.
226
* @stable ICU 4.8
227
*/
228
UMSGPAT_ARG_TYPE_SIMPLE,
229
/**
230
* The argument is a ChoiceFormat with one or more
231
* ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
232
* @stable ICU 4.8
233
*/
234
UMSGPAT_ARG_TYPE_CHOICE,
235
/**
236
* The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
237
* (e.g., offset:1)
238
* and one or more (ARG_SELECTOR [explicit-value] message) tuples.
239
* If the selector has an explicit value (e.g., =2), then
240
* that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
241
* Otherwise the message immediately follows the ARG_SELECTOR.
242
* @stable ICU 4.8
243
*/
244
UMSGPAT_ARG_TYPE_PLURAL,
245
/**
246
* The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
247
* @stable ICU 4.8
248
*/
249
UMSGPAT_ARG_TYPE_SELECT,
250
/**
251
* The argument is an ordinal-number PluralFormat
252
* with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
253
* @stable ICU 50
254
*/
255
UMSGPAT_ARG_TYPE_SELECTORDINAL
256
};
257
/**
258
* @stable ICU 4.8
259
*/
260
typedef enum UMessagePatternArgType UMessagePatternArgType;
261
262
/**
263
* \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
264
* Returns TRUE if the argument type has a plural style part sequence and semantics,
265
* for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
266
* @stable ICU 50
267
*/
268
#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
269
((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
270
271
enum {
272
/**
273
* Return value from MessagePattern.validateArgumentName() for when
274
* the string is a valid "pattern identifier" but not a number.
275
* @stable ICU 4.8
276
*/
277
UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
278
279
/**
280
* Return value from MessagePattern.validateArgumentName() for when
281
* the string is invalid.
282
* It might not be a valid "pattern identifier",
283
* or it have only ASCII digits but there is a leading zero or the number is too large.
284
* @stable ICU 4.8
285
*/
286
UMSGPAT_ARG_NAME_NOT_VALID=-2
287
};
288
289
/**
290
* Special value that is returned by getNumericValue(Part) when no
291
* numeric value is defined for a part.
292
* @see MessagePattern.getNumericValue()
293
* @stable ICU 4.8
294
*/
295
#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
296
297
U_NAMESPACE_BEGIN
298
299
class MessagePatternDoubleList;
300
class MessagePatternPartsList;
301
302
/**
303
* Parses and represents ICU MessageFormat patterns.
304
* Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
305
* Used in the implementations of those classes as well as in tools
306
* for message validation, translation and format conversion.
307
* <p>
308
* The parser handles all syntax relevant for identifying message arguments.
309
* This includes "complex" arguments whose style strings contain
310
* nested MessageFormat pattern substrings.
311
* For "simple" arguments (with no nested MessageFormat pattern substrings),
312
* the argument style is not parsed any further.
313
* <p>
314
* The parser handles named and numbered message arguments and allows both in one message.
315
* <p>
316
* Once a pattern has been parsed successfully, iterate through the parsed data
317
* with countParts(), getPart() and related methods.
318
* <p>
319
* The data logically represents a parse tree, but is stored and accessed
320
* as a list of "parts" for fast and simple parsing and to minimize object allocations.
321
* Arguments and nested messages are best handled via recursion.
322
* For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
323
* the index of the corresponding _LIMIT "part".
324
* <p>
325
* List of "parts":
326
* <pre>
327
* message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
328
* argument = noneArg | simpleArg | complexArg
329
* complexArg = choiceArg | pluralArg | selectArg
330
*
331
* noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
332
* simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
333
* choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
334
* pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
335
* selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
336
*
337
* choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
338
* pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
339
* selectStyle = (ARG_SELECTOR message)+
340
* </pre>
341
* <ul>
342
* <li>Literal output text is not represented directly by "parts" but accessed
343
* between parts of a message, from one part's getLimit() to the next part's getIndex().
344
* <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
345
* <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
346
* the less-than-or-equal-to sign (U+2264).
347
* <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
348
* The optional numeric Part between each (ARG_SELECTOR, message) pair
349
* is the value of an explicit-number selector like "=2",
350
* otherwise the selector is a non-numeric identifier.
351
* <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
352
* </ul>
353
* <p>
354
* This class is not intended for public subclassing.
355
*
356
* @stable ICU 4.8
357
*/
358
class U_COMMON_API MessagePattern : public UObject {
359
public:
360
/**
361
* Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
362
* @param errorCode Standard ICU error code. Its input value must
363
* pass the U_SUCCESS() test, or else the function returns
364
* immediately. Check for U_FAILURE() on output or use with
365
* function chaining. (See User Guide for details.)
366
* @stable ICU 4.8
367
*/
368
MessagePattern(UErrorCode &errorCode);
369
370
/**
371
* Constructs an empty MessagePattern.
372
* @param mode Explicit UMessagePatternApostropheMode.
373
* @param errorCode Standard ICU error code. Its input value must
374
* pass the U_SUCCESS() test, or else the function returns
375
* immediately. Check for U_FAILURE() on output or use with
376
* function chaining. (See User Guide for details.)
377
* @stable ICU 4.8
378
*/
379
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
380
381
/**
382
* Constructs a MessagePattern with default UMessagePatternApostropheMode and
383
* parses the MessageFormat pattern string.
384
* @param pattern a MessageFormat pattern string
385
* @param parseError Struct to receive information on the position
386
* of an error within the pattern.
387
* Can be NULL.
388
* @param errorCode Standard ICU error code. Its input value must
389
* pass the U_SUCCESS() test, or else the function returns
390
* immediately. Check for U_FAILURE() on output or use with
391
* function chaining. (See User Guide for details.)
392
* TODO: turn @throws into UErrorCode specifics?
393
* @throws IllegalArgumentException for syntax errors in the pattern string
394
* @throws IndexOutOfBoundsException if certain limits are exceeded
395
* (e.g., argument number too high, argument name too long, etc.)
396
* @throws NumberFormatException if a number could not be parsed
397
* @stable ICU 4.8
398
*/
399
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
400
401
/**
402
* Copy constructor.
403
* @param other Object to copy.
404
* @stable ICU 4.8
405
*/
406
MessagePattern(const MessagePattern &other);
407
408
/**
409
* Assignment operator.
410
* @param other Object to copy.
411
* @return *this=other
412
* @stable ICU 4.8
413
*/
414
MessagePattern &operator=(const MessagePattern &other);
415
416
/**
417
* Destructor.
418
* @stable ICU 4.8
419
*/
420
virtual ~MessagePattern();
421
422
/**
423
* Parses a MessageFormat pattern string.
424
* @param pattern a MessageFormat pattern string
425
* @param parseError Struct to receive information on the position
426
* of an error within the pattern.
427
* Can be NULL.
428
* @param errorCode Standard ICU error code. Its input value must
429
* pass the U_SUCCESS() test, or else the function returns
430
* immediately. Check for U_FAILURE() on output or use with
431
* function chaining. (See User Guide for details.)
432
* @return *this
433
* @throws IllegalArgumentException for syntax errors in the pattern string
434
* @throws IndexOutOfBoundsException if certain limits are exceeded
435
* (e.g., argument number too high, argument name too long, etc.)
436
* @throws NumberFormatException if a number could not be parsed
437
* @stable ICU 4.8
438
*/
439
MessagePattern &parse(const UnicodeString &pattern,
440
UParseError *parseError, UErrorCode &errorCode);
441
442
/**
443
* Parses a ChoiceFormat pattern string.
444
* @param pattern a ChoiceFormat pattern string
445
* @param parseError Struct to receive information on the position
446
* of an error within the pattern.
447
* Can be NULL.
448
* @param errorCode Standard ICU error code. Its input value must
449
* pass the U_SUCCESS() test, or else the function returns
450
* immediately. Check for U_FAILURE() on output or use with
451
* function chaining. (See User Guide for details.)
452
* @return *this
453
* @throws IllegalArgumentException for syntax errors in the pattern string
454
* @throws IndexOutOfBoundsException if certain limits are exceeded
455
* (e.g., argument number too high, argument name too long, etc.)
456
* @throws NumberFormatException if a number could not be parsed
457
* @stable ICU 4.8
458
*/
459
MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
460
UParseError *parseError, UErrorCode &errorCode);
461
462
/**
463
* Parses a PluralFormat pattern string.
464
* @param pattern a PluralFormat pattern string
465
* @param parseError Struct to receive information on the position
466
* of an error within the pattern.
467
* Can be NULL.
468
* @param errorCode Standard ICU error code. Its input value must
469
* pass the U_SUCCESS() test, or else the function returns
470
* immediately. Check for U_FAILURE() on output or use with
471
* function chaining. (See User Guide for details.)
472
* @return *this
473
* @throws IllegalArgumentException for syntax errors in the pattern string
474
* @throws IndexOutOfBoundsException if certain limits are exceeded
475
* (e.g., argument number too high, argument name too long, etc.)
476
* @throws NumberFormatException if a number could not be parsed
477
* @stable ICU 4.8
478
*/
479
MessagePattern &parsePluralStyle(const UnicodeString &pattern,
480
UParseError *parseError, UErrorCode &errorCode);
481
482
/**
483
* Parses a SelectFormat pattern string.
484
* @param pattern a SelectFormat pattern string
485
* @param parseError Struct to receive information on the position
486
* of an error within the pattern.
487
* Can be NULL.
488
* @param errorCode Standard ICU error code. Its input value must
489
* pass the U_SUCCESS() test, or else the function returns
490
* immediately. Check for U_FAILURE() on output or use with
491
* function chaining. (See User Guide for details.)
492
* @return *this
493
* @throws IllegalArgumentException for syntax errors in the pattern string
494
* @throws IndexOutOfBoundsException if certain limits are exceeded
495
* (e.g., argument number too high, argument name too long, etc.)
496
* @throws NumberFormatException if a number could not be parsed
497
* @stable ICU 4.8
498
*/
499
MessagePattern &parseSelectStyle(const UnicodeString &pattern,
500
UParseError *parseError, UErrorCode &errorCode);
501
502
/**
503
* Clears this MessagePattern.
504
* countParts() will return 0.
505
* @stable ICU 4.8
506
*/
507
void clear();
508
509
/**
510
* Clears this MessagePattern and sets the UMessagePatternApostropheMode.
511
* countParts() will return 0.
512
* @param mode The new UMessagePatternApostropheMode.
513
* @stable ICU 4.8
514
*/
515
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
516
clear();
517
aposMode=mode;
518
}
519
520
/**
521
* @param other another object to compare with.
522
* @return TRUE if this object is equivalent to the other one.
523
* @stable ICU 4.8
524
*/
525
UBool operator==(const MessagePattern &other) const;
526
527
/**
528
* @param other another object to compare with.
529
* @return FALSE if this object is equivalent to the other one.
530
* @stable ICU 4.8
531
*/
532
inline UBool operator!=(const MessagePattern &other) const {
533
return !operator==(other);
534
}
535
536
/**
537
* @return A hash code for this object.
538
* @stable ICU 4.8
539
*/
540
int32_t hashCode() const;
541
542
/**
543
* @return this instance's UMessagePatternApostropheMode.
544
* @stable ICU 4.8
545
*/
546
UMessagePatternApostropheMode getApostropheMode() const {
547
return aposMode;
548
}
549
550
// Java has package-private jdkAposMode() here.
551
// In C++, this is declared in the MessageImpl class.
552
553
/**
554
* @return the parsed pattern string (null if none was parsed).
555
* @stable ICU 4.8
556
*/
557
const UnicodeString &getPatternString() const {
558
return msg;
559
}
560
561
/**
562
* Does the parsed pattern have named arguments like {first_name}?
563
* @return TRUE if the parsed pattern has at least one named argument.
564
* @stable ICU 4.8
565
*/
566
UBool hasNamedArguments() const {
567
return hasArgNames;
568
}
569
570
/**
571
* Does the parsed pattern have numbered arguments like {2}?
572
* @return TRUE if the parsed pattern has at least one numbered argument.
573
* @stable ICU 4.8
574
*/
575
UBool hasNumberedArguments() const {
576
return hasArgNumbers;
577
}
578
579
/**
580
* Validates and parses an argument name or argument number string.
581
* An argument name must be a "pattern identifier", that is, it must contain
582
* no Unicode Pattern_Syntax or Pattern_White_Space characters.
583
* If it only contains ASCII digits, then it must be a small integer with no leading zero.
584
* @param name Input string.
585
* @return &gt;=0 if the name is a valid number,
586
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
587
* ARG_NAME_NOT_VALID (-2) if it is neither.
588
* @stable ICU 4.8
589
*/
590
static int32_t validateArgumentName(const UnicodeString &name);
591
592
/**
593
* Returns a version of the parsed pattern string where each ASCII apostrophe
594
* is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
595
* <p>
596
* For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
597
* into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
598
* @return the deep-auto-quoted version of the parsed pattern string.
599
* @see MessageFormat.autoQuoteApostrophe()
600
* @stable ICU 4.8
601
*/
602
UnicodeString autoQuoteApostropheDeep() const;
603
604
class Part;
605
606
/**
607
* Returns the number of "parts" created by parsing the pattern string.
608
* Returns 0 if no pattern has been parsed or clear() was called.
609
* @return the number of pattern parts.
610
* @stable ICU 4.8
611
*/
612
int32_t countParts() const {
613
return partsLength;
614
}
615
616
/**
617
* Gets the i-th pattern "part".
618
* @param i The index of the Part data. (0..countParts()-1)
619
* @return the i-th pattern "part".
620
* @stable ICU 4.8
621
*/
622
const Part &getPart(int32_t i) const {
623
return parts[i];
624
}
625
626
/**
627
* Returns the UMessagePatternPartType of the i-th pattern "part".
628
* Convenience method for getPart(i).getType().
629
* @param i The index of the Part data. (0..countParts()-1)
630
* @return The UMessagePatternPartType of the i-th Part.
631
* @stable ICU 4.8
632
*/
633
UMessagePatternPartType getPartType(int32_t i) const {
634
return getPart(i).type;
635
}
636
637
/**
638
* Returns the pattern index of the specified pattern "part".
639
* Convenience method for getPart(partIndex).getIndex().
640
* @param partIndex The index of the Part data. (0..countParts()-1)
641
* @return The pattern index of this Part.
642
* @stable ICU 4.8
643
*/
644
int32_t getPatternIndex(int32_t partIndex) const {
645
return getPart(partIndex).index;
646
}
647
648
/**
649
* Returns the substring of the pattern string indicated by the Part.
650
* Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
651
* @param part a part of this MessagePattern.
652
* @return the substring associated with part.
653
* @stable ICU 4.8
654
*/
655
UnicodeString getSubstring(const Part &part) const {
656
return msg.tempSubString(part.index, part.length);
657
}
658
659
/**
660
* Compares the part's substring with the input string s.
661
* @param part a part of this MessagePattern.
662
* @param s a string.
663
* @return TRUE if getSubstring(part).equals(s).
664
* @stable ICU 4.8
665
*/
666
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
667
return 0==msg.compare(part.index, part.length, s);
668
}
669
670
/**
671
* Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
672
* @param part a part of this MessagePattern.
673
* @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
674
* @stable ICU 4.8
675
*/
676
double getNumericValue(const Part &part) const;
677
678
/**
679
* Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
680
* @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
681
* @return the "offset:" value.
682
* @stable ICU 4.8
683
*/
684
double getPluralOffset(int32_t pluralStart) const;
685
686
/**
687
* Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
688
* @param start The index of some Part data (0..countParts()-1);
689
* this Part should be of Type ARG_START or MSG_START.
690
* @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
691
* or start itself if getPartType(msgStart)!=ARG|MSG_START.
692
* @stable ICU 4.8
693
*/
694
int32_t getLimitPartIndex(int32_t start) const {
695
int32_t limit=getPart(start).limitPartIndex;
696
if(limit<start) {
697
return start;
698
}
699
return limit;
700
}
701
702
/**
703
* A message pattern "part", representing a pattern parsing event.
704
* There is a part for the start and end of a message or argument,
705
* for quoting and escaping of and with ASCII apostrophes,
706
* and for syntax elements of "complex" arguments.
707
* @stable ICU 4.8
708
*/
709
class Part : public UMemory {
710
public:
711
/**
712
* Default constructor, do not use.
713
* @internal
714
*/
715
Part() {}
716
717
/**
718
* Returns the type of this part.
719
* @return the part type.
720
* @stable ICU 4.8
721
*/
722
UMessagePatternPartType getType() const {
723
return type;
724
}
725
726
/**
727
* Returns the pattern string index associated with this Part.
728
* @return this part's pattern string index.
729
* @stable ICU 4.8
730
*/
731
int32_t getIndex() const {
732
return index;
733
}
734
735
/**
736
* Returns the length of the pattern substring associated with this Part.
737
* This is 0 for some parts.
738
* @return this part's pattern substring length.
739
* @stable ICU 4.8
740
*/
741
int32_t getLength() const {
742
return length;
743
}
744
745
/**
746
* Returns the pattern string limit (exclusive-end) index associated with this Part.
747
* Convenience method for getIndex()+getLength().
748
* @return this part's pattern string limit index, same as getIndex()+getLength().
749
* @stable ICU 4.8
750
*/
751
int32_t getLimit() const {
752
return index+length;
753
}
754
755
/**
756
* Returns a value associated with this part.
757
* See the documentation of each part type for details.
758
* @return the part value.
759
* @stable ICU 4.8
760
*/
761
int32_t getValue() const {
762
return value;
763
}
764
765
/**
766
* Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
767
* otherwise UMSGPAT_ARG_TYPE_NONE.
768
* @return the argument type for this part.
769
* @stable ICU 4.8
770
*/
771
UMessagePatternArgType getArgType() const {
772
UMessagePatternPartType type=getType();
773
if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
774
return (UMessagePatternArgType)value;
775
} else {
776
return UMSGPAT_ARG_TYPE_NONE;
777
}
778
}
779
780
/**
781
* Indicates whether the Part type has a numeric value.
782
* If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
783
* @param type The Part type to be tested.
784
* @return TRUE if the Part type has a numeric value.
785
* @stable ICU 4.8
786
*/
787
static UBool hasNumericValue(UMessagePatternPartType type) {
788
return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
789
}
790
791
/**
792
* @param other another object to compare with.
793
* @return TRUE if this object is equivalent to the other one.
794
* @stable ICU 4.8
795
*/
796
UBool operator==(const Part &other) const;
797
798
/**
799
* @param other another object to compare with.
800
* @return FALSE if this object is equivalent to the other one.
801
* @stable ICU 4.8
802
*/
803
inline UBool operator!=(const Part &other) const {
804
return !operator==(other);
805
}
806
807
/**
808
* @return A hash code for this object.
809
* @stable ICU 4.8
810
*/
811
int32_t hashCode() const {
812
return ((type*37+index)*37+length)*37+value;
813
}
814
815
private:
816
friend class MessagePattern;
817
818
static const int32_t MAX_LENGTH=0xffff;
819
static const int32_t MAX_VALUE=0x7fff;
820
821
// Some fields are not final because they are modified during pattern parsing.
822
// After pattern parsing, the parts are effectively immutable.
823
UMessagePatternPartType type;
824
int32_t index;
825
uint16_t length;
826
int16_t value;
827
int32_t limitPartIndex;
828
};
829
830
private:
831
void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
832
833
void postParse();
834
835
int32_t parseMessage(int32_t index, int32_t msgStartLength,
836
int32_t nestingLevel, UMessagePatternArgType parentType,
837
UParseError *parseError, UErrorCode &errorCode);
838
839
int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
840
UParseError *parseError, UErrorCode &errorCode);
841
842
int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
843
844
int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
845
UParseError *parseError, UErrorCode &errorCode);
846
847
int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
848
UParseError *parseError, UErrorCode &errorCode);
849
850
/**
851
* Validates and parses an argument name or argument number string.
852
* This internal method assumes that the input substring is a "pattern identifier".
853
* @return &gt;=0 if the name is a valid number,
854
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
855
* ARG_NAME_NOT_VALID (-2) if it is neither.
856
* @see #validateArgumentName(String)
857
*/
858
static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
859
860
int32_t parseArgNumber(int32_t start, int32_t limit) {
861
return parseArgNumber(msg, start, limit);
862
}
863
864
/**
865
* Parses a number from the specified message substring.
866
* @param start start index into the message string
867
* @param limit limit index into the message string, must be start<limit
868
* @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
869
* @param parseError
870
* @param errorCode
871
*/
872
void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
873
UParseError *parseError, UErrorCode &errorCode);
874
875
// Java has package-private appendReducedApostrophes() here.
876
// In C++, this is declared in the MessageImpl class.
877
878
int32_t skipWhiteSpace(int32_t index);
879
880
int32_t skipIdentifier(int32_t index);
881
882
/**
883
* Skips a sequence of characters that could occur in a double value.
884
* Does not fully parse or validate the value.
885
*/
886
int32_t skipDouble(int32_t index);
887
888
static UBool isArgTypeChar(UChar32 c);
889
890
UBool isChoice(int32_t index);
891
892
UBool isPlural(int32_t index);
893
894
UBool isSelect(int32_t index);
895
896
UBool isOrdinal(int32_t index);
897
898
/**
899
* @return TRUE if we are inside a MessageFormat (sub-)pattern,
900
* as opposed to inside a top-level choice/plural/select pattern.
901
*/
902
UBool inMessageFormatPattern(int32_t nestingLevel);
903
904
/**
905
* @return TRUE if we are in a MessageFormat sub-pattern
906
* of a top-level ChoiceFormat pattern.
907
*/
908
UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
909
910
void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
911
int32_t value, UErrorCode &errorCode);
912
913
void addLimitPart(int32_t start,
914
UMessagePatternPartType type, int32_t index, int32_t length,
915
int32_t value, UErrorCode &errorCode);
916
917
void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
918
919
void setParseError(UParseError *parseError, int32_t index);
920
921
UBool init(UErrorCode &errorCode);
922
UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
923
924
UMessagePatternApostropheMode aposMode;
925
UnicodeString msg;
926
// ArrayList<Part> parts=new ArrayList<Part>();
927
MessagePatternPartsList *partsList;
928
Part *parts;
929
int32_t partsLength;
930
// ArrayList<Double> numericValues;
931
MessagePatternDoubleList *numericValuesList;
932
double *numericValues;
933
int32_t numericValuesLength;
934
UBool hasArgNames;
935
UBool hasArgNumbers;
936
UBool needsAutoQuoting;
937
};
938
939
U_NAMESPACE_END
940
941
#endif // !UCONFIG_NO_FORMATTING
942
943
#endif // __MESSAGEPATTERN_H__
944
945