Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/tools/java/Scanner.java
38918 views
1
/*
2
* Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package sun.tools.java;
27
28
import java.io.IOException;
29
import java.io.InputStream;
30
import java.util.Hashtable;
31
32
/**
33
* A Scanner for Java tokens. Errors are reported
34
* to the environment object.<p>
35
*
36
* The scanner keeps track of the current token,
37
* the value of the current token (if any), and the start
38
* position of the current token.<p>
39
*
40
* The scan() method advances the scanner to the next
41
* token in the input.<p>
42
*
43
* The match() method is used to quickly match opening
44
* brackets (ie: '(', '{', or '[') with their closing
45
* counter part. This is useful during error recovery.<p>
46
*
47
* An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
48
* this means that both the line number and the exact offset into
49
* the file are encoded in each position value.<p>
50
*
51
* The compiler treats either "\n", "\r" or "\r\n" as the
52
* end of a line.<p>
53
*
54
* WARNING: The contents of this source file are not part of any
55
* supported API. Code that depends on them does so at its own risk:
56
* they are subject to change or removal without notice.
57
*
58
* @author Arthur van Hoff
59
*/
60
61
public
62
class Scanner implements Constants {
63
/**
64
* The increment for each character.
65
*/
66
public static final long OFFSETINC = 1;
67
68
/**
69
* The increment for each line.
70
*/
71
public static final long LINEINC = 1L << WHEREOFFSETBITS;
72
73
/**
74
* End of input
75
*/
76
public static final int EOF = -1;
77
78
/**
79
* Where errors are reported
80
*/
81
public Environment env;
82
83
/**
84
* Input reader
85
*/
86
protected ScannerInputReader in;
87
88
/**
89
* If true, present all comments as tokens.
90
* Contents are not saved, but positions are recorded accurately,
91
* so the comment can be recovered from the text.
92
* Line terminations are also returned as comment tokens,
93
* and may be distinguished by their start and end positions,
94
* which are equal (meaning, these tokens contain no chars).
95
*/
96
public boolean scanComments = false;
97
98
/**
99
* Current token
100
*/
101
public int token;
102
103
/**
104
* The position of the current token
105
*/
106
public long pos;
107
108
/**
109
* The position of the previous token
110
*/
111
public long prevPos;
112
113
/**
114
* The current character
115
*/
116
protected int ch;
117
118
/*
119
* Token values.
120
*/
121
public char charValue;
122
public int intValue;
123
public long longValue;
124
public float floatValue;
125
public double doubleValue;
126
public String stringValue;
127
public Identifier idValue;
128
public int radix; // Radix, when reading int or long
129
130
/*
131
* A doc comment preceding the most recent token
132
*/
133
public String docComment;
134
135
/*
136
* A growable character buffer.
137
*/
138
private int count;
139
private char buffer[] = new char[1024];
140
private void growBuffer() {
141
char newBuffer[] = new char[buffer.length * 2];
142
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
143
buffer = newBuffer;
144
}
145
146
// The following two methods have been hand-inlined in
147
// scanDocComment. If you make changes here, you should
148
// check to see if scanDocComment also needs modification.
149
private void putc(int ch) {
150
if (count == buffer.length) {
151
growBuffer();
152
}
153
buffer[count++] = (char)ch;
154
}
155
156
private String bufferString() {
157
return new String(buffer, 0, count);
158
}
159
160
/**
161
* Create a scanner to scan an input stream.
162
*/
163
public Scanner(Environment env, InputStream in) throws IOException {
164
this.env = env;
165
useInputStream(in);
166
}
167
168
/**
169
* Setup input from the given input stream,
170
* and scan the first token from it.
171
*/
172
protected void useInputStream(InputStream in) throws IOException {
173
try {
174
this.in = new ScannerInputReader(env, in);
175
} catch (Exception e) {
176
env.setCharacterEncoding(null);
177
this.in = new ScannerInputReader(env, in);
178
}
179
180
ch = this.in.read();
181
prevPos = this.in.pos;
182
183
scan();
184
}
185
186
/**
187
* Create a scanner to scan an input stream.
188
*/
189
protected Scanner(Environment env) {
190
this.env = env;
191
// Expect the subclass to call useInputStream at the right time.
192
}
193
194
/**
195
* Define a keyword.
196
*/
197
private static void defineKeyword(int val) {
198
Identifier.lookup(opNames[val]).setType(val);
199
}
200
201
/**
202
* Initialized keyword and token Hashtables
203
*/
204
static {
205
// Statement keywords
206
defineKeyword(FOR);
207
defineKeyword(IF);
208
defineKeyword(ELSE);
209
defineKeyword(WHILE);
210
defineKeyword(DO);
211
defineKeyword(SWITCH);
212
defineKeyword(CASE);
213
defineKeyword(DEFAULT);
214
defineKeyword(BREAK);
215
defineKeyword(CONTINUE);
216
defineKeyword(RETURN);
217
defineKeyword(TRY);
218
defineKeyword(CATCH);
219
defineKeyword(FINALLY);
220
defineKeyword(THROW);
221
222
// Type defineKeywords
223
defineKeyword(BYTE);
224
defineKeyword(CHAR);
225
defineKeyword(SHORT);
226
defineKeyword(INT);
227
defineKeyword(LONG);
228
defineKeyword(FLOAT);
229
defineKeyword(DOUBLE);
230
defineKeyword(VOID);
231
defineKeyword(BOOLEAN);
232
233
// Expression keywords
234
defineKeyword(INSTANCEOF);
235
defineKeyword(TRUE);
236
defineKeyword(FALSE);
237
defineKeyword(NEW);
238
defineKeyword(THIS);
239
defineKeyword(SUPER);
240
defineKeyword(NULL);
241
242
// Declaration keywords
243
defineKeyword(IMPORT);
244
defineKeyword(CLASS);
245
defineKeyword(EXTENDS);
246
defineKeyword(IMPLEMENTS);
247
defineKeyword(INTERFACE);
248
defineKeyword(PACKAGE);
249
defineKeyword(THROWS);
250
251
// Modifier keywords
252
defineKeyword(PRIVATE);
253
defineKeyword(PUBLIC);
254
defineKeyword(PROTECTED);
255
defineKeyword(STATIC);
256
defineKeyword(TRANSIENT);
257
defineKeyword(SYNCHRONIZED);
258
defineKeyword(NATIVE);
259
defineKeyword(ABSTRACT);
260
defineKeyword(VOLATILE);
261
defineKeyword(FINAL);
262
defineKeyword(STRICTFP);
263
264
// reserved keywords
265
defineKeyword(CONST);
266
defineKeyword(GOTO);
267
}
268
269
/**
270
* Scan a comment. This method should be
271
* called once the initial /, * and the next
272
* character have been read.
273
*/
274
private void skipComment() throws IOException {
275
while (true) {
276
switch (ch) {
277
case EOF:
278
env.error(pos, "eof.in.comment");
279
return;
280
281
case '*':
282
if ((ch = in.read()) == '/') {
283
ch = in.read();
284
return;
285
}
286
break;
287
288
default:
289
ch = in.read();
290
break;
291
}
292
}
293
}
294
295
/**
296
* Scan a doc comment. This method should be called
297
* once the initial /, * and * have been read. It gathers
298
* the content of the comment (witout leading spaces and '*'s)
299
* in the string buffer.
300
*/
301
private String scanDocComment() throws IOException {
302
// Note: this method has been hand-optimized to yield
303
// better performance. This was done after it was noted
304
// that javadoc spent a great deal of its time here.
305
// This should also help the performance of the compiler
306
// as well -- it scans the doc comments to find
307
// @deprecated tags.
308
//
309
// The logic of the method has been completely rewritten
310
// to avoid the use of flags that need to be looked at
311
// for every character read. Members that are accessed
312
// more than once have been stored in local variables.
313
// The methods putc() and bufferString() have been
314
// inlined by hand. Extra cases have been added to
315
// switch statements to trick the compiler into generating
316
// a tableswitch instead of a lookupswitch.
317
//
318
// This implementation aims to preserve the previous
319
// behavior of this method.
320
321
int c;
322
323
// Put `in' in a local variable.
324
final ScannerInputReader in = this.in;
325
326
// We maintain the buffer locally rather than calling putc().
327
char[] buffer = this.buffer;
328
int count = 0;
329
330
// We are called pointing at the second star of the doc
331
// comment:
332
//
333
// Input: /** the rest of the comment ... */
334
// ^
335
//
336
// We rely on this in the code below.
337
338
// Consume any number of stars.
339
while ((c = in.read()) == '*')
340
;
341
342
// Is the comment of the form /**/, /***/, /****/, etc.?
343
if (c == '/') {
344
// Set ch and return
345
ch = in.read();
346
return "";
347
}
348
349
// Skip a newline on the first line of the comment.
350
if (c == '\n') {
351
c = in.read();
352
}
353
354
outerLoop:
355
// The outerLoop processes the doc comment, looping once
356
// for each line. For each line, it first strips off
357
// whitespace, then it consumes any stars, then it
358
// puts the rest of the line into our buffer.
359
while (true) {
360
361
// The wsLoop consumes whitespace from the beginning
362
// of each line.
363
wsLoop:
364
while (true) {
365
switch (c) {
366
case ' ':
367
case '\t':
368
// We could check for other forms of whitespace
369
// as well, but this is left as is for minimum
370
// disturbance of functionality.
371
//
372
// Just skip whitespace.
373
c = in.read();
374
break;
375
376
// We have added extra cases here to trick the
377
// compiler into using a tableswitch instead of
378
// a lookupswitch. They can be removed without
379
// a change in meaning.
380
case 10: case 11: case 12: case 13: case 14: case 15:
381
case 16: case 17: case 18: case 19: case 20: case 21:
382
case 22: case 23: case 24: case 25: case 26: case 27:
383
case 28: case 29: case 30: case 31:
384
default:
385
// We've seen something that isn't whitespace,
386
// jump out.
387
break wsLoop;
388
}
389
} // end wsLoop.
390
391
// Are there stars here? If so, consume them all
392
// and check for the end of comment.
393
if (c == '*') {
394
// Skip all of the stars...
395
do {
396
c = in.read();
397
} while (c == '*');
398
399
// ...then check for the closing slash.
400
if (c == '/') {
401
// We're done with the doc comment.
402
// Set ch and break out.
403
ch = in.read();
404
break outerLoop;
405
}
406
}
407
408
// The textLoop processes the rest of the characters
409
// on the line, adding them to our buffer.
410
textLoop:
411
while (true) {
412
switch (c) {
413
case EOF:
414
// We've seen a premature EOF. Break out
415
// of the loop.
416
env.error(pos, "eof.in.comment");
417
ch = EOF;
418
break outerLoop;
419
420
case '*':
421
// Is this just a star? Or is this the
422
// end of a comment?
423
c = in.read();
424
if (c == '/') {
425
// This is the end of the comment,
426
// set ch and return our buffer.
427
ch = in.read();
428
break outerLoop;
429
}
430
// This is just an ordinary star. Add it to
431
// the buffer.
432
if (count == buffer.length) {
433
growBuffer();
434
buffer = this.buffer;
435
}
436
buffer[count++] = '*';
437
break;
438
439
case '\n':
440
// We've seen a newline. Add it to our
441
// buffer and break out of this loop,
442
// starting fresh on a new line.
443
if (count == buffer.length) {
444
growBuffer();
445
buffer = this.buffer;
446
}
447
buffer[count++] = '\n';
448
c = in.read();
449
break textLoop;
450
451
// Again, the extra cases here are a trick
452
// to get the compiler to generate a tableswitch.
453
case 0: case 1: case 2: case 3: case 4: case 5:
454
case 6: case 7: case 8: case 11: case 12: case 13:
455
case 14: case 15: case 16: case 17: case 18: case 19:
456
case 20: case 21: case 22: case 23: case 24: case 25:
457
case 26: case 27: case 28: case 29: case 30: case 31:
458
case 32: case 33: case 34: case 35: case 36: case 37:
459
case 38: case 39: case 40:
460
default:
461
// Add the character to our buffer.
462
if (count == buffer.length) {
463
growBuffer();
464
buffer = this.buffer;
465
}
466
buffer[count++] = (char)c;
467
c = in.read();
468
break;
469
}
470
} // end textLoop
471
} // end outerLoop
472
473
// We have scanned our doc comment. It is stored in
474
// buffer. The previous implementation of scanDocComment
475
// stripped off all trailing spaces and stars from the comment.
476
// We will do this as well, so as to cause a minimum of
477
// disturbance. Is this what we want?
478
if (count > 0) {
479
int i = count - 1;
480
trailLoop:
481
while (i > -1) {
482
switch (buffer[i]) {
483
case ' ':
484
case '\t':
485
case '*':
486
i--;
487
break;
488
// And again, the extra cases here are a trick
489
// to get the compiler to generate a tableswitch.
490
case 0: case 1: case 2: case 3: case 4: case 5:
491
case 6: case 7: case 8: case 10: case 11: case 12:
492
case 13: case 14: case 15: case 16: case 17: case 18:
493
case 19: case 20: case 21: case 22: case 23: case 24:
494
case 25: case 26: case 27: case 28: case 29: case 30:
495
case 31: case 33: case 34: case 35: case 36: case 37:
496
case 38: case 39: case 40:
497
default:
498
break trailLoop;
499
}
500
}
501
count = i + 1;
502
503
// Return the text of the doc comment.
504
return new String(buffer, 0, count);
505
} else {
506
return "";
507
}
508
}
509
510
/**
511
* Scan a number. The first digit of the number should be the current
512
* character. We may be scanning hex, decimal, or octal at this point
513
*/
514
private void scanNumber() throws IOException {
515
boolean seenNonOctal = false;
516
boolean overflow = false;
517
boolean seenDigit = false; // used to detect invalid hex number 0xL
518
radix = (ch == '0' ? 8 : 10);
519
long value = ch - '0';
520
count = 0;
521
putc(ch); // save character in buffer
522
numberLoop:
523
for (;;) {
524
switch (ch = in.read()) {
525
case '.':
526
if (radix == 16)
527
break numberLoop; // an illegal character
528
scanReal();
529
return;
530
531
case '8': case '9':
532
// We can't yet throw an error if reading an octal. We might
533
// discover we're really reading a real.
534
seenNonOctal = true;
535
case '0': case '1': case '2': case '3':
536
case '4': case '5': case '6': case '7':
537
seenDigit = true;
538
putc(ch);
539
if (radix == 10) {
540
overflow = overflow || (value * 10)/10 != value;
541
value = (value * 10) + (ch - '0');
542
overflow = overflow || (value - 1 < -1);
543
} else if (radix == 8) {
544
overflow = overflow || (value >>> 61) != 0;
545
value = (value << 3) + (ch - '0');
546
} else {
547
overflow = overflow || (value >>> 60) != 0;
548
value = (value << 4) + (ch - '0');
549
}
550
break;
551
552
case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
553
if (radix != 16) {
554
scanReal();
555
return;
556
}
557
// fall through
558
case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
559
seenDigit = true;
560
putc(ch);
561
if (radix != 16)
562
break numberLoop; // an illegal character
563
overflow = overflow || (value >>> 60) != 0;
564
value = (value << 4) + 10 +
565
Character.toLowerCase((char)ch) - 'a';
566
break;
567
568
case 'l': case 'L':
569
ch = in.read(); // skip over 'l'
570
longValue = value;
571
token = LONGVAL;
572
break numberLoop;
573
574
case 'x': case 'X':
575
// if the first character is a '0' and this is the second
576
// letter, then read in a hexadecimal number. Otherwise, error.
577
if (count == 1 && radix == 8) {
578
radix = 16;
579
seenDigit = false;
580
break;
581
} else {
582
// we'll get an illegal character error
583
break numberLoop;
584
}
585
586
default:
587
intValue = (int)value;
588
token = INTVAL;
589
break numberLoop;
590
}
591
} // while true
592
593
// We have just finished reading the number. The next thing better
594
// not be a letter or digit.
595
// Note: There will be deprecation warnings against these uses
596
// of Character.isJavaLetterOrDigit and Character.isJavaLetter.
597
// Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
598
if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
599
env.error(in.pos, "invalid.number");
600
do { ch = in.read(); }
601
while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
602
intValue = 0;
603
token = INTVAL;
604
} else if (radix == 8 && seenNonOctal) {
605
// A bogus octal literal.
606
intValue = 0;
607
token = INTVAL;
608
env.error(pos, "invalid.octal.number");
609
} else if (radix == 16 && seenDigit == false) {
610
// A hex literal with no digits, 0xL, for example.
611
intValue = 0;
612
token = INTVAL;
613
env.error(pos, "invalid.hex.number");
614
} else {
615
if (token == INTVAL) {
616
// Check for overflow. Note that base 10 literals
617
// have different rules than base 8 and 16.
618
overflow = overflow ||
619
(value & 0xFFFFFFFF00000000L) != 0 ||
620
(radix == 10 && value > 2147483648L);
621
622
if (overflow) {
623
intValue = 0;
624
625
// Give a specific error message which tells
626
// the user the range.
627
switch (radix) {
628
case 8:
629
env.error(pos, "overflow.int.oct");
630
break;
631
case 10:
632
env.error(pos, "overflow.int.dec");
633
break;
634
case 16:
635
env.error(pos, "overflow.int.hex");
636
break;
637
default:
638
throw new CompilerError("invalid radix");
639
}
640
}
641
} else {
642
if (overflow) {
643
longValue = 0;
644
645
// Give a specific error message which tells
646
// the user the range.
647
switch (radix) {
648
case 8:
649
env.error(pos, "overflow.long.oct");
650
break;
651
case 10:
652
env.error(pos, "overflow.long.dec");
653
break;
654
case 16:
655
env.error(pos, "overflow.long.hex");
656
break;
657
default:
658
throw new CompilerError("invalid radix");
659
}
660
}
661
}
662
}
663
}
664
665
/**
666
* Scan a float. We are either looking at the decimal, or we have already
667
* seen it and put it into the buffer. We haven't seen an exponent.
668
* Scan a float. Should be called with the current character is either
669
* the 'e', 'E' or '.'
670
*/
671
private void scanReal() throws IOException {
672
boolean seenExponent = false;
673
boolean isSingleFloat = false;
674
char lastChar;
675
if (ch == '.') {
676
putc(ch);
677
ch = in.read();
678
}
679
680
numberLoop:
681
for ( ; ; ch = in.read()) {
682
switch (ch) {
683
case '0': case '1': case '2': case '3': case '4':
684
case '5': case '6': case '7': case '8': case '9':
685
putc(ch);
686
break;
687
688
case 'e': case 'E':
689
if (seenExponent)
690
break numberLoop; // we'll get a format error
691
putc(ch);
692
seenExponent = true;
693
break;
694
695
case '+': case '-':
696
lastChar = buffer[count - 1];
697
if (lastChar != 'e' && lastChar != 'E')
698
break numberLoop; // this isn't an error, though!
699
putc(ch);
700
break;
701
702
case 'f': case 'F':
703
ch = in.read(); // skip over 'f'
704
isSingleFloat = true;
705
break numberLoop;
706
707
case 'd': case 'D':
708
ch = in.read(); // skip over 'd'
709
// fall through
710
default:
711
break numberLoop;
712
} // sswitch
713
} // loop
714
715
// we have just finished reading the number. The next thing better
716
// not be a letter or digit.
717
if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
718
env.error(in.pos, "invalid.number");
719
do { ch = in.read(); }
720
while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
721
doubleValue = 0;
722
token = DOUBLEVAL;
723
} else {
724
token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
725
try {
726
lastChar = buffer[count - 1];
727
if (lastChar == 'e' || lastChar == 'E'
728
|| lastChar == '+' || lastChar == '-') {
729
env.error(in.pos -1, "float.format");
730
} else if (isSingleFloat) {
731
String string = bufferString();
732
floatValue = Float.valueOf(string).floatValue();
733
if (Float.isInfinite(floatValue)) {
734
env.error(pos, "overflow.float");
735
} else if (floatValue == 0 && !looksLikeZero(string)) {
736
env.error(pos, "underflow.float");
737
}
738
} else {
739
String string = bufferString();
740
doubleValue = Double.valueOf(string).doubleValue();
741
if (Double.isInfinite(doubleValue)) {
742
env.error(pos, "overflow.double");
743
} else if (doubleValue == 0 && !looksLikeZero(string)) {
744
env.error(pos, "underflow.double");
745
}
746
}
747
} catch (NumberFormatException ee) {
748
env.error(pos, "float.format");
749
doubleValue = 0;
750
floatValue = 0;
751
}
752
}
753
return;
754
}
755
756
// We have a token that parses as a number. Is this token possibly zero?
757
// i.e. does it have a non-zero value in the mantissa?
758
private static boolean looksLikeZero(String token) {
759
int length = token.length();
760
for (int i = 0; i < length; i++) {
761
switch (token.charAt(i)) {
762
case 0: case '.':
763
continue;
764
case '1': case '2': case '3': case '4': case '5':
765
case '6': case '7': case '8': case '9':
766
return false;
767
case 'e': case 'E': case 'f': case 'F':
768
return true;
769
}
770
}
771
return true;
772
}
773
774
/**
775
* Scan an escape character.
776
* @return the character or -1 if it escaped an
777
* end-of-line.
778
*/
779
private int scanEscapeChar() throws IOException {
780
long p = in.pos;
781
782
switch (ch = in.read()) {
783
case '0': case '1': case '2': case '3':
784
case '4': case '5': case '6': case '7': {
785
int n = ch - '0';
786
for (int i = 2 ; i > 0 ; i--) {
787
switch (ch = in.read()) {
788
case '0': case '1': case '2': case '3':
789
case '4': case '5': case '6': case '7':
790
n = (n << 3) + ch - '0';
791
break;
792
793
default:
794
if (n > 0xFF) {
795
env.error(p, "invalid.escape.char");
796
}
797
return n;
798
}
799
}
800
ch = in.read();
801
if (n > 0xFF) {
802
env.error(p, "invalid.escape.char");
803
}
804
return n;
805
}
806
807
case 'r': ch = in.read(); return '\r';
808
case 'n': ch = in.read(); return '\n';
809
case 'f': ch = in.read(); return '\f';
810
case 'b': ch = in.read(); return '\b';
811
case 't': ch = in.read(); return '\t';
812
case '\\': ch = in.read(); return '\\';
813
case '\"': ch = in.read(); return '\"';
814
case '\'': ch = in.read(); return '\'';
815
}
816
817
env.error(p, "invalid.escape.char");
818
ch = in.read();
819
return -1;
820
}
821
822
/**
823
* Scan a string. The current character
824
* should be the opening " of the string.
825
*/
826
private void scanString() throws IOException {
827
token = STRINGVAL;
828
count = 0;
829
ch = in.read();
830
831
// Scan a String
832
while (true) {
833
switch (ch) {
834
case EOF:
835
env.error(pos, "eof.in.string");
836
stringValue = bufferString();
837
return;
838
839
case '\r':
840
case '\n':
841
ch = in.read();
842
env.error(pos, "newline.in.string");
843
stringValue = bufferString();
844
return;
845
846
case '"':
847
ch = in.read();
848
stringValue = bufferString();
849
return;
850
851
case '\\': {
852
int c = scanEscapeChar();
853
if (c >= 0) {
854
putc((char)c);
855
}
856
break;
857
}
858
859
default:
860
putc(ch);
861
ch = in.read();
862
break;
863
}
864
}
865
}
866
867
/**
868
* Scan a character. The current character should be
869
* the opening ' of the character constant.
870
*/
871
private void scanCharacter() throws IOException {
872
token = CHARVAL;
873
874
switch (ch = in.read()) {
875
case '\\':
876
int c = scanEscapeChar();
877
charValue = (char)((c >= 0) ? c : 0);
878
break;
879
880
case '\'':
881
// There are two standard problems this case deals with. One
882
// is the malformed single quote constant (i.e. the programmer
883
// uses ''' instead of '\'') and the other is the empty
884
// character constant (i.e. ''). Just consume any number of
885
// single quotes and emit an error message.
886
charValue = 0;
887
env.error(pos, "invalid.char.constant");
888
ch = in.read();
889
while (ch == '\'') {
890
ch = in.read();
891
}
892
return;
893
894
case '\r':
895
case '\n':
896
charValue = 0;
897
env.error(pos, "invalid.char.constant");
898
return;
899
900
default:
901
charValue = (char)ch;
902
ch = in.read();
903
break;
904
}
905
906
if (ch == '\'') {
907
ch = in.read();
908
} else {
909
env.error(pos, "invalid.char.constant");
910
while (true) {
911
switch (ch) {
912
case '\'':
913
ch = in.read();
914
return;
915
case ';':
916
case '\n':
917
case EOF:
918
return;
919
default:
920
ch = in.read();
921
}
922
}
923
}
924
}
925
926
/**
927
* Scan an Identifier. The current character should
928
* be the first character of the identifier.
929
*/
930
private void scanIdentifier() throws IOException {
931
count = 0;
932
933
while (true) {
934
putc(ch);
935
switch (ch = in.read()) {
936
case 'a': case 'b': case 'c': case 'd': case 'e':
937
case 'f': case 'g': case 'h': case 'i': case 'j':
938
case 'k': case 'l': case 'm': case 'n': case 'o':
939
case 'p': case 'q': case 'r': case 's': case 't':
940
case 'u': case 'v': case 'w': case 'x': case 'y':
941
case 'z':
942
case 'A': case 'B': case 'C': case 'D': case 'E':
943
case 'F': case 'G': case 'H': case 'I': case 'J':
944
case 'K': case 'L': case 'M': case 'N': case 'O':
945
case 'P': case 'Q': case 'R': case 'S': case 'T':
946
case 'U': case 'V': case 'W': case 'X': case 'Y':
947
case 'Z':
948
case '0': case '1': case '2': case '3': case '4':
949
case '5': case '6': case '7': case '8': case '9':
950
case '$': case '_':
951
break;
952
953
default:
954
if (!Character.isJavaLetterOrDigit((char)ch)) {
955
idValue = Identifier.lookup(bufferString());
956
token = idValue.getType();
957
return;
958
}
959
}
960
}
961
}
962
963
/**
964
* The ending position of the current token
965
*/
966
// Note: This should be part of the pos itself.
967
public long getEndPos() {
968
return in.pos;
969
}
970
971
/**
972
* If the current token is IDENT, return the identifier occurrence.
973
* It will be freshly allocated.
974
*/
975
public IdentifierToken getIdToken() {
976
return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
977
}
978
979
/**
980
* Scan the next token.
981
* @return the position of the previous token.
982
*/
983
public long scan() throws IOException {
984
return xscan();
985
}
986
987
protected long xscan() throws IOException {
988
final ScannerInputReader in = this.in;
989
long retPos = pos;
990
prevPos = in.pos;
991
docComment = null;
992
while (true) {
993
pos = in.pos;
994
995
switch (ch) {
996
case EOF:
997
token = EOF;
998
return retPos;
999
1000
case '\n':
1001
if (scanComments) {
1002
ch = ' ';
1003
// Avoid this path the next time around.
1004
// Do not just call in.read; we want to present
1005
// a null token (and also avoid read-ahead).
1006
token = COMMENT;
1007
return retPos;
1008
}
1009
case ' ':
1010
case '\t':
1011
case '\f':
1012
ch = in.read();
1013
break;
1014
1015
case '/':
1016
switch (ch = in.read()) {
1017
case '/':
1018
// Parse a // comment
1019
while (((ch = in.read()) != EOF) && (ch != '\n'));
1020
if (scanComments) {
1021
token = COMMENT;
1022
return retPos;
1023
}
1024
break;
1025
1026
case '*':
1027
ch = in.read();
1028
if (ch == '*') {
1029
docComment = scanDocComment();
1030
} else {
1031
skipComment();
1032
}
1033
if (scanComments) {
1034
return retPos;
1035
}
1036
break;
1037
1038
case '=':
1039
ch = in.read();
1040
token = ASGDIV;
1041
return retPos;
1042
1043
default:
1044
token = DIV;
1045
return retPos;
1046
}
1047
break;
1048
1049
case '"':
1050
scanString();
1051
return retPos;
1052
1053
case '\'':
1054
scanCharacter();
1055
return retPos;
1056
1057
case '0': case '1': case '2': case '3': case '4':
1058
case '5': case '6': case '7': case '8': case '9':
1059
scanNumber();
1060
return retPos;
1061
1062
case '.':
1063
switch (ch = in.read()) {
1064
case '0': case '1': case '2': case '3': case '4':
1065
case '5': case '6': case '7': case '8': case '9':
1066
count = 0;
1067
putc('.');
1068
scanReal();
1069
break;
1070
default:
1071
token = FIELD;
1072
}
1073
return retPos;
1074
1075
case '{':
1076
ch = in.read();
1077
token = LBRACE;
1078
return retPos;
1079
1080
case '}':
1081
ch = in.read();
1082
token = RBRACE;
1083
return retPos;
1084
1085
case '(':
1086
ch = in.read();
1087
token = LPAREN;
1088
return retPos;
1089
1090
case ')':
1091
ch = in.read();
1092
token = RPAREN;
1093
return retPos;
1094
1095
case '[':
1096
ch = in.read();
1097
token = LSQBRACKET;
1098
return retPos;
1099
1100
case ']':
1101
ch = in.read();
1102
token = RSQBRACKET;
1103
return retPos;
1104
1105
case ',':
1106
ch = in.read();
1107
token = COMMA;
1108
return retPos;
1109
1110
case ';':
1111
ch = in.read();
1112
token = SEMICOLON;
1113
return retPos;
1114
1115
case '?':
1116
ch = in.read();
1117
token = QUESTIONMARK;
1118
return retPos;
1119
1120
case '~':
1121
ch = in.read();
1122
token = BITNOT;
1123
return retPos;
1124
1125
case ':':
1126
ch = in.read();
1127
token = COLON;
1128
return retPos;
1129
1130
case '-':
1131
switch (ch = in.read()) {
1132
case '-':
1133
ch = in.read();
1134
token = DEC;
1135
return retPos;
1136
1137
case '=':
1138
ch = in.read();
1139
token = ASGSUB;
1140
return retPos;
1141
}
1142
token = SUB;
1143
return retPos;
1144
1145
case '+':
1146
switch (ch = in.read()) {
1147
case '+':
1148
ch = in.read();
1149
token = INC;
1150
return retPos;
1151
1152
case '=':
1153
ch = in.read();
1154
token = ASGADD;
1155
return retPos;
1156
}
1157
token = ADD;
1158
return retPos;
1159
1160
case '<':
1161
switch (ch = in.read()) {
1162
case '<':
1163
if ((ch = in.read()) == '=') {
1164
ch = in.read();
1165
token = ASGLSHIFT;
1166
return retPos;
1167
}
1168
token = LSHIFT;
1169
return retPos;
1170
1171
case '=':
1172
ch = in.read();
1173
token = LE;
1174
return retPos;
1175
}
1176
token = LT;
1177
return retPos;
1178
1179
case '>':
1180
switch (ch = in.read()) {
1181
case '>':
1182
switch (ch = in.read()) {
1183
case '=':
1184
ch = in.read();
1185
token = ASGRSHIFT;
1186
return retPos;
1187
1188
case '>':
1189
if ((ch = in.read()) == '=') {
1190
ch = in.read();
1191
token = ASGURSHIFT;
1192
return retPos;
1193
}
1194
token = URSHIFT;
1195
return retPos;
1196
}
1197
token = RSHIFT;
1198
return retPos;
1199
1200
case '=':
1201
ch = in.read();
1202
token = GE;
1203
return retPos;
1204
}
1205
token = GT;
1206
return retPos;
1207
1208
case '|':
1209
switch (ch = in.read()) {
1210
case '|':
1211
ch = in.read();
1212
token = OR;
1213
return retPos;
1214
1215
case '=':
1216
ch = in.read();
1217
token = ASGBITOR;
1218
return retPos;
1219
}
1220
token = BITOR;
1221
return retPos;
1222
1223
case '&':
1224
switch (ch = in.read()) {
1225
case '&':
1226
ch = in.read();
1227
token = AND;
1228
return retPos;
1229
1230
case '=':
1231
ch = in.read();
1232
token = ASGBITAND;
1233
return retPos;
1234
}
1235
token = BITAND;
1236
return retPos;
1237
1238
case '=':
1239
if ((ch = in.read()) == '=') {
1240
ch = in.read();
1241
token = EQ;
1242
return retPos;
1243
}
1244
token = ASSIGN;
1245
return retPos;
1246
1247
case '%':
1248
if ((ch = in.read()) == '=') {
1249
ch = in.read();
1250
token = ASGREM;
1251
return retPos;
1252
}
1253
token = REM;
1254
return retPos;
1255
1256
case '^':
1257
if ((ch = in.read()) == '=') {
1258
ch = in.read();
1259
token = ASGBITXOR;
1260
return retPos;
1261
}
1262
token = BITXOR;
1263
return retPos;
1264
1265
case '!':
1266
if ((ch = in.read()) == '=') {
1267
ch = in.read();
1268
token = NE;
1269
return retPos;
1270
}
1271
token = NOT;
1272
return retPos;
1273
1274
case '*':
1275
if ((ch = in.read()) == '=') {
1276
ch = in.read();
1277
token = ASGMUL;
1278
return retPos;
1279
}
1280
token = MUL;
1281
return retPos;
1282
1283
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1284
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1285
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1286
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1287
case 'y': case 'z':
1288
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1289
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1290
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1291
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1292
case 'Y': case 'Z':
1293
case '$': case '_':
1294
scanIdentifier();
1295
return retPos;
1296
1297
case '\u001a':
1298
// Our one concession to DOS.
1299
if ((ch = in.read()) == EOF) {
1300
token = EOF;
1301
return retPos;
1302
}
1303
env.error(pos, "funny.char");
1304
ch = in.read();
1305
break;
1306
1307
1308
default:
1309
if (Character.isJavaLetter((char)ch)) {
1310
scanIdentifier();
1311
return retPos;
1312
}
1313
env.error(pos, "funny.char");
1314
ch = in.read();
1315
break;
1316
}
1317
}
1318
}
1319
1320
/**
1321
* Scan to a matching '}', ']' or ')'. The current token must be
1322
* a '{', '[' or '(';
1323
*/
1324
public void match(int open, int close) throws IOException {
1325
int depth = 1;
1326
1327
while (true) {
1328
scan();
1329
if (token == open) {
1330
depth++;
1331
} else if (token == close) {
1332
if (--depth == 0) {
1333
return;
1334
}
1335
} else if (token == EOF) {
1336
env.error(pos, "unbalanced.paren");
1337
return;
1338
}
1339
}
1340
}
1341
}
1342
1343