Path: blob/aarch64-shenandoah-jdk8u272-b10/langtools/src/share/classes/com/sun/tools/javadoc/JavaScriptScanner.java
38899 views
/*1* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package com.sun.tools.javadoc;2627import java.util.Arrays;28import java.util.HashMap;29import java.util.HashSet;30import java.util.Locale;31import java.util.Map;32import java.util.Set;3334import com.sun.tools.javadoc.JavaScriptScanner.TagParser.Kind;3536import static com.sun.tools.javac.util.LayoutCharacters.EOI;3738/**39* Parser to detect use of JavaScript in documentation comments.40*/41@Deprecated42public class JavaScriptScanner {43public static interface Reporter {44void report();45}4647static class ParseException extends Exception {48private static final long serialVersionUID = 0;49ParseException(String key) {50super(key);51}52}5354private Reporter reporter;5556/** The input buffer, index of most recent character read,57* index of one past last character in buffer.58*/59protected char[] buf;60protected int bp;61protected int buflen;6263/** The current character.64*/65protected char ch;6667private boolean newline = true;6869Map<String, TagParser> tagParsers;70Set<String> uriAttrs;7172public JavaScriptScanner() {73initTagParsers();74initURIAttrs();75}7677public void parse(String comment, Reporter r) {78reporter = r;79String c = comment;80buf = new char[c.length() + 1];81c.getChars(0, c.length(), buf, 0);82buf[buf.length - 1] = EOI;83buflen = buf.length - 1;84bp = -1;85newline = true;86nextChar();8788blockContent();89blockTags();90}9192private void checkHtmlTag(String tag) {93if (tag.equalsIgnoreCase("script")) {94reporter.report();95}96}9798private void checkHtmlAttr(String name, String value) {99String n = name.toLowerCase(Locale.ENGLISH);100// https://www.w3.org/TR/html52/fullindex.html#attributes-table101// See https://www.w3.org/TR/html52/webappapis.html#events-event-handlers102// An event handler has a name, which always starts with "on" and is followed by103// the name of the event for which it is intended.104if (n.startsWith("on")105|| uriAttrs.contains(n)106&& value != null && value.toLowerCase(Locale.ENGLISH).trim().startsWith("javascript:")) {107reporter.report();108}109}110111void nextChar() {112ch = buf[bp < buflen ? ++bp : buflen];113switch (ch) {114case '\f': case '\n': case '\r':115newline = true;116}117}118119/**120* Read block content, consisting of text, html and inline tags.121* Terminated by the end of input, or the beginning of the next block tag:122* i.e. @ as the first non-whitespace character on a line.123*/124@SuppressWarnings("fallthrough")125protected void blockContent() {126127loop:128while (bp < buflen) {129switch (ch) {130case '\n': case '\r': case '\f':131newline = true;132// fallthrough133134case ' ': case '\t':135nextChar();136break;137138case '&':139entity(null);140break;141142case '<':143html();144break;145146case '>':147newline = false;148nextChar();149break;150151case '{':152inlineTag(null);153break;154155case '@':156if (newline) {157break loop;158}159// fallthrough160161default:162newline = false;163nextChar();164}165}166}167168/**169* Read a series of block tags, including their content.170* Standard tags parse their content appropriately.171* Non-standard tags are represented by {@link UnknownBlockTag}.172*/173protected void blockTags() {174while (ch == '@')175blockTag();176}177178/**179* Read a single block tag, including its content.180* Standard tags parse their content appropriately.181* Non-standard tags are represented by {@link UnknownBlockTag}.182*/183protected void blockTag() {184int p = bp;185try {186nextChar();187if (isIdentifierStart(ch)) {188String name = readTagName();189TagParser tp = tagParsers.get(name);190if (tp == null) {191blockContent();192} else {193switch (tp.getKind()) {194case BLOCK:195tp.parse(p);196return;197case INLINE:198return;199}200}201}202blockContent();203} catch (ParseException e) {204blockContent();205}206}207208protected void inlineTag(Void list) {209newline = false;210nextChar();211if (ch == '@') {212inlineTag();213}214}215216/**217* Read a single inline tag, including its content.218* Standard tags parse their content appropriately.219* Non-standard tags are represented by {@link UnknownBlockTag}.220* Malformed tags may be returned as {@link Erroneous}.221*/222protected void inlineTag() {223int p = bp - 1;224try {225nextChar();226if (isIdentifierStart(ch)) {227String name = readTagName();228TagParser tp = tagParsers.get(name);229230if (tp == null) {231skipWhitespace();232inlineText(WhitespaceRetentionPolicy.REMOVE_ALL);233nextChar();234} else {235skipWhitespace();236if (tp.getKind() == TagParser.Kind.INLINE) {237tp.parse(p);238} else { // handle block tags (ex: @see) in inline content239inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content240nextChar();241}242}243}244} catch (ParseException e) {245}246}247248private static enum WhitespaceRetentionPolicy {249RETAIN_ALL,250REMOVE_FIRST_SPACE,251REMOVE_ALL252}253254/**255* Read plain text content of an inline tag.256* Matching pairs of { } are skipped; the text is terminated by the first257* unmatched }. It is an error if the beginning of the next tag is detected.258*/259private void inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException {260switch (whitespacePolicy) {261case REMOVE_ALL:262skipWhitespace();263break;264case REMOVE_FIRST_SPACE:265if (ch == ' ')266nextChar();267break;268case RETAIN_ALL:269default:270// do nothing271break;272273}274int pos = bp;275int depth = 1;276277loop:278while (bp < buflen) {279switch (ch) {280case '\n': case '\r': case '\f':281newline = true;282break;283284case ' ': case '\t':285break;286287case '{':288newline = false;289depth++;290break;291292case '}':293if (--depth == 0) {294return;295}296newline = false;297break;298299case '@':300if (newline)301break loop;302newline = false;303break;304305default:306newline = false;307break;308}309nextChar();310}311throw new ParseException("dc.unterminated.inline.tag");312}313314/**315* Read Java class name, possibly followed by member316* Matching pairs of {@literal < >} are skipped. The text is terminated by the first317* unmatched }. It is an error if the beginning of the next tag is detected.318*/319// TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE320// TODO: improve quality of parse to forbid bad constructions.321// TODO: update to use ReferenceParser322@SuppressWarnings("fallthrough")323protected void reference(boolean allowMember) throws ParseException {324int pos = bp;325int depth = 0;326327// scan to find the end of the signature, by looking for the first328// whitespace not enclosed in () or <>, or the end of the tag329loop:330while (bp < buflen) {331switch (ch) {332case '\n': case '\r': case '\f':333newline = true;334// fallthrough335336case ' ': case '\t':337if (depth == 0)338break loop;339break;340341case '(':342case '<':343newline = false;344depth++;345break;346347case ')':348case '>':349newline = false;350--depth;351break;352353case '}':354if (bp == pos)355return;356newline = false;357break loop;358359case '@':360if (newline)361break loop;362// fallthrough363364default:365newline = false;366367}368nextChar();369}370371if (depth != 0)372throw new ParseException("dc.unterminated.signature");373}374375/**376* Read Java identifier377* Matching pairs of { } are skipped; the text is terminated by the first378* unmatched }. It is an error if the beginning of the next tag is detected.379*/380@SuppressWarnings("fallthrough")381protected void identifier() throws ParseException {382skipWhitespace();383int pos = bp;384385if (isJavaIdentifierStart(ch)) {386readJavaIdentifier();387return;388}389390throw new ParseException("dc.identifier.expected");391}392393/**394* Read a quoted string.395* It is an error if the beginning of the next tag is detected.396*/397@SuppressWarnings("fallthrough")398protected void quotedString() {399int pos = bp;400nextChar();401402loop:403while (bp < buflen) {404switch (ch) {405case '\n': case '\r': case '\f':406newline = true;407break;408409case ' ': case '\t':410break;411412case '"':413nextChar();414// trim trailing white-space?415return;416417case '@':418if (newline)419break loop;420421}422nextChar();423}424}425426/**427* Read a term ie. one word.428* It is an error if the beginning of the next tag is detected.429*/430@SuppressWarnings("fallthrough")431protected void inlineWord() {432int pos = bp;433int depth = 0;434loop:435while (bp < buflen) {436switch (ch) {437case '\n':438newline = true;439// fallthrough440441case '\r': case '\f': case ' ': case '\t':442return;443444case '@':445if (newline)446break loop;447448case '{':449depth++;450break;451452case '}':453if (depth == 0 || --depth == 0)454return;455break;456}457newline = false;458nextChar();459}460}461462/**463* Read general text content of an inline tag, including HTML entities and elements.464* Matching pairs of { } are skipped; the text is terminated by the first465* unmatched }. It is an error if the beginning of the next tag is detected.466*/467@SuppressWarnings("fallthrough")468private void inlineContent() {469470skipWhitespace();471int pos = bp;472int depth = 1;473474loop:475while (bp < buflen) {476477switch (ch) {478case '\n': case '\r': case '\f':479newline = true;480// fall through481482case ' ': case '\t':483nextChar();484break;485486case '&':487entity(null);488break;489490case '<':491newline = false;492html();493break;494495case '{':496newline = false;497depth++;498nextChar();499break;500501case '}':502newline = false;503if (--depth == 0) {504nextChar();505return;506}507nextChar();508break;509510case '@':511if (newline)512break loop;513// fallthrough514515default:516nextChar();517break;518}519}520521}522523protected void entity(Void list) {524newline = false;525entity();526}527528/**529* Read an HTML entity.530* {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }531*/532protected void entity() {533nextChar();534String name = null;535if (ch == '#') {536int namep = bp;537nextChar();538if (isDecimalDigit(ch)) {539nextChar();540while (isDecimalDigit(ch))541nextChar();542name = new String(buf, namep, bp - namep);543} else if (ch == 'x' || ch == 'X') {544nextChar();545if (isHexDigit(ch)) {546nextChar();547while (isHexDigit(ch))548nextChar();549name = new String(buf, namep, bp - namep);550}551}552} else if (isIdentifierStart(ch)) {553name = readIdentifier();554}555556if (name != null) {557if (ch != ';')558return;559nextChar();560}561}562563/**564* Read the start or end of an HTML tag, or an HTML comment565* {@literal <identifier attrs> } or {@literal </identifier> }566*/567protected void html() {568int p = bp;569nextChar();570if (isIdentifierStart(ch)) {571String name = readIdentifier();572checkHtmlTag(name);573htmlAttrs();574if (ch == '/') {575nextChar();576}577if (ch == '>') {578nextChar();579return;580}581} else if (ch == '/') {582nextChar();583if (isIdentifierStart(ch)) {584readIdentifier();585skipWhitespace();586if (ch == '>') {587nextChar();588return;589}590}591} else if (ch == '!') {592nextChar();593if (ch == '-') {594nextChar();595if (ch == '-') {596nextChar();597while (bp < buflen) {598int dash = 0;599while (ch == '-') {600dash++;601nextChar();602}603// Strictly speaking, a comment should not contain "--"604// so dash > 2 is an error, dash == 2 implies ch == '>'605// See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments606// for more details.607if (dash >= 2 && ch == '>') {608nextChar();609return;610}611612nextChar();613}614}615}616}617618bp = p + 1;619ch = buf[bp];620}621622/**623* Read a series of HTML attributes, terminated by {@literal > }.624* Each attribute is of the form {@literal identifier[=value] }.625* "value" may be unquoted, single-quoted, or double-quoted.626*/627protected void htmlAttrs() {628skipWhitespace();629630loop:631while (isIdentifierStart(ch)) {632int namePos = bp;633String name = readAttributeName();634skipWhitespace();635StringBuilder value = new StringBuilder();636if (ch == '=') {637nextChar();638skipWhitespace();639if (ch == '\'' || ch == '"') {640char quote = ch;641nextChar();642while (bp < buflen && ch != quote) {643if (newline && ch == '@') {644// No point trying to read more.645// In fact, all attrs get discarded by the caller646// and superseded by a malformed.html node because647// the html tag itself is not terminated correctly.648break loop;649}650value.append(ch);651nextChar();652}653nextChar();654} else {655while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {656value.append(ch);657nextChar();658}659}660skipWhitespace();661}662checkHtmlAttr(name, value.toString());663}664}665666protected void attrValueChar(Void list) {667switch (ch) {668case '&':669entity(list);670break;671672case '{':673inlineTag(list);674break;675676default:677nextChar();678}679}680681protected boolean isIdentifierStart(char ch) {682return Character.isUnicodeIdentifierStart(ch);683}684685protected String readIdentifier() {686int start = bp;687nextChar();688while (bp < buflen && Character.isUnicodeIdentifierPart(ch))689nextChar();690return new String(buf, start, bp - start);691}692693protected String readAttributeName() {694int start = bp;695nextChar();696while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-'))697nextChar();698return new String(buf, start, bp - start);699}700701protected String readTagName() {702int start = bp;703nextChar();704while (bp < buflen705&& (Character.isUnicodeIdentifierPart(ch) || ch == '.'706|| ch == '-' || ch == ':')) {707nextChar();708}709return new String(buf, start, bp - start);710}711712protected boolean isJavaIdentifierStart(char ch) {713return Character.isJavaIdentifierStart(ch);714}715716protected String readJavaIdentifier() {717int start = bp;718nextChar();719while (bp < buflen && Character.isJavaIdentifierPart(ch))720nextChar();721return new String(buf, start, bp - start);722}723724protected boolean isDecimalDigit(char ch) {725return ('0' <= ch && ch <= '9');726}727728protected boolean isHexDigit(char ch) {729return ('0' <= ch && ch <= '9')730|| ('a' <= ch && ch <= 'f')731|| ('A' <= ch && ch <= 'F');732}733734protected boolean isUnquotedAttrValueTerminator(char ch) {735switch (ch) {736case '\f': case '\n': case '\r': case '\t':737case ' ':738case '"': case '\'': case '`':739case '=': case '<': case '>':740return true;741default:742return false;743}744}745746protected boolean isWhitespace(char ch) {747return Character.isWhitespace(ch);748}749750protected void skipWhitespace() {751while (isWhitespace(ch)) {752nextChar();753}754}755756/**757* @param start position of first character of string758* @param end position of character beyond last character to be included759*/760String newString(int start, int end) {761return new String(buf, start, end - start);762}763764static abstract class TagParser {765enum Kind { INLINE, BLOCK }766767final Kind kind;768final String name;769770771TagParser(Kind k, String tk) {772kind = k;773name = tk;774}775776TagParser(Kind k, String tk, boolean retainWhiteSpace) {777this(k, tk);778}779780Kind getKind() {781return kind;782}783784String getName() {785return name;786}787788abstract void parse(int pos) throws ParseException;789}790791/**792* @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>793*/794@SuppressWarnings("deprecation")795private void initTagParsers() {796TagParser[] parsers = {797// @author name-text798new TagParser(Kind.BLOCK, "author") {799@Override800public void parse(int pos) {801blockContent();802}803},804805// {@code text}806new TagParser(Kind.INLINE, "code", true) {807@Override808public void parse(int pos) throws ParseException {809inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);810nextChar();811}812},813814// @deprecated deprecated-text815new TagParser(Kind.BLOCK, "deprecated") {816@Override817public void parse(int pos) {818blockContent();819}820},821822// {@docRoot}823new TagParser(Kind.INLINE, "docRoot") {824@Override825public void parse(int pos) throws ParseException {826if (ch == '}') {827nextChar();828return;829}830inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content831nextChar();832throw new ParseException("dc.unexpected.content");833}834},835836// @exception class-name description837new TagParser(Kind.BLOCK, "exception") {838@Override839public void parse(int pos) throws ParseException {840skipWhitespace();841reference(false);842blockContent();843}844},845846// @hidden hidden-text847new TagParser(Kind.BLOCK, "hidden") {848@Override849public void parse(int pos) {850blockContent();851}852},853854// @index search-term options-description855new TagParser(Kind.INLINE, "index") {856@Override857public void parse(int pos) throws ParseException {858skipWhitespace();859if (ch == '}') {860throw new ParseException("dc.no.content");861}862if (ch == '"') quotedString(); else inlineWord();863skipWhitespace();864if (ch != '}') {865inlineContent();866} else {867nextChar();868}869}870},871872// {@inheritDoc}873new TagParser(Kind.INLINE, "inheritDoc") {874@Override875public void parse(int pos) throws ParseException {876if (ch == '}') {877nextChar();878return;879}880inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content881nextChar();882throw new ParseException("dc.unexpected.content");883}884},885886// {@link package.class#member label}887new TagParser(Kind.INLINE, "link") {888@Override889public void parse(int pos) throws ParseException {890reference(true);891inlineContent();892}893},894895// {@linkplain package.class#member label}896new TagParser(Kind.INLINE, "linkplain") {897@Override898public void parse(int pos) throws ParseException {899reference(true);900inlineContent();901}902},903904// {@literal text}905new TagParser(Kind.INLINE, "literal", true) {906@Override907public void parse(int pos) throws ParseException {908inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);909nextChar();910}911},912913// @param parameter-name description914new TagParser(Kind.BLOCK, "param") {915@Override916public void parse(int pos) throws ParseException {917skipWhitespace();918919boolean typaram = false;920if (ch == '<') {921typaram = true;922nextChar();923}924925identifier();926927if (typaram) {928if (ch != '>')929throw new ParseException("dc.gt.expected");930nextChar();931}932933skipWhitespace();934blockContent();935}936},937938// @return description939new TagParser(Kind.BLOCK, "return") {940@Override941public void parse(int pos) {942blockContent();943}944},945946// @see reference | quoted-string | HTML947new TagParser(Kind.BLOCK, "see") {948@Override949public void parse(int pos) throws ParseException {950skipWhitespace();951switch (ch) {952case '"':953quotedString();954skipWhitespace();955if (ch == '@'956|| ch == EOI && bp == buf.length - 1) {957return;958}959break;960961case '<':962blockContent();963return;964965case '@':966if (newline)967throw new ParseException("dc.no.content");968break;969970case EOI:971if (bp == buf.length - 1)972throw new ParseException("dc.no.content");973break;974975default:976if (isJavaIdentifierStart(ch) || ch == '#') {977reference(true);978blockContent();979}980}981throw new ParseException("dc.unexpected.content");982}983},984985// @serialData data-description986new TagParser(Kind.BLOCK, "@serialData") {987@Override988public void parse(int pos) {989blockContent();990}991},992993// @serialField field-name field-type description994new TagParser(Kind.BLOCK, "serialField") {995@Override996public void parse(int pos) throws ParseException {997skipWhitespace();998identifier();999skipWhitespace();1000reference(false);1001if (isWhitespace(ch)) {1002skipWhitespace();1003blockContent();1004}1005}1006},10071008// @serial field-description | include | exclude1009new TagParser(Kind.BLOCK, "serial") {1010@Override1011public void parse(int pos) {1012blockContent();1013}1014},10151016// @since since-text1017new TagParser(Kind.BLOCK, "since") {1018@Override1019public void parse(int pos) {1020blockContent();1021}1022},10231024// @throws class-name description1025new TagParser(Kind.BLOCK, "throws") {1026@Override1027public void parse(int pos) throws ParseException {1028skipWhitespace();1029reference(false);1030blockContent();1031}1032},10331034// {@value package.class#field}1035new TagParser(Kind.INLINE, "value") {1036@Override1037public void parse(int pos) throws ParseException {1038reference(true);1039skipWhitespace();1040if (ch == '}') {1041nextChar();1042return;1043}1044nextChar();1045throw new ParseException("dc.unexpected.content");1046}1047},10481049// @version version-text1050new TagParser(Kind.BLOCK, "version") {1051@Override1052public void parse(int pos) {1053blockContent();1054}1055},1056};10571058tagParsers = new HashMap<>();1059for (TagParser p: parsers)1060tagParsers.put(p.getName(), p);10611062}10631064private void initURIAttrs() {1065uriAttrs = new HashSet<>(Arrays.asList(1066// See https://www.w3.org/TR/html4/sgml/dtd.html1067// https://www.w3.org/TR/html5/1068// These are all the attributes that take a %URI or a valid URL potentially surrounded1069// by spaces1070"action", "cite", "classid", "codebase", "data",1071"datasrc", "for", "href", "longdesc", "profile",1072"src", "usemap"1073));1074}10751076}107710781079