Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/make/src/classes/build/tools/dtdbuilder/DTDParser.java
32287 views
/*1* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package build.tools.dtdbuilder;2627import javax.swing.text.html.parser.*;28import java.net.URL;29import java.io.IOException;30import java.io.InputStream;31import java.util.Enumeration;32import java.util.Vector;33import java.util.Hashtable;34import java.util.BitSet;35import java.text.MessageFormat;3637import sun.misc.MessageUtils;3839/**40* A parser for DTDs. This parser roughly corresponds to the41* rules specified in "The SGML Handbook" by Charles F. Goldfarb.42* The end result of parsing the stream is a DTD object.43*44*45* @see DTD46* @see DTDInputStream47* @author Arthur van Hoff48*/49final50class DTDParser implements DTDConstants {51DTDBuilder dtd;52DTDInputStream in;53int ch;54char str[] = new char[128];55int strpos = 0;56int nerrors = 0;5758/**59* Report an error.60*/61void error(String err, String arg1, String arg2, String arg3) {62nerrors++;6364String msgParams[] = {arg1, arg2, arg3};6566String str = getSubstProp("dtderr." + err, msgParams);67if (str == null) {68str = err + "[" + arg1 + "," + arg2 + "," + arg3 + "]";69}70System.err.println("line " + in.ln + ", dtd " + dtd + ": " + str);71}72void error(String err, String arg1, String arg2) {73error(err, arg1, arg2, "?");74}75void error(String err, String arg1) {76error(err, arg1, "?", "?");77}78void error(String err) {79error(err, "?", "?", "?");80}8182private String getSubstProp(String propName, String args[]) {83String prop = System.getProperty(propName);8485if (prop == null) {86return null;87}8889return MessageFormat.format(prop, (Object[])args);90}9192/**93* Expect a character.94*/95boolean expect(int c) throws IOException {96if (ch != c) {97char str[] = {(char)c};98error("expected", "'" + new String(str) + "'");99return false;100}101ch = in.read();102return true;103}104105/**106* Add a char to the string buffer.107*/108void addString(int c) {109if (strpos == str.length) {110char newstr[] = new char[str.length * 2];111System.arraycopy(str, 0, newstr, 0, str.length);112str = newstr;113}114str[strpos++] = (char)c;115}116117/**118* Get the string which was accumulated in the buffer.119* Pos is the starting position of the string.120*/121String getString(int pos) {122char newstr[] = new char[strpos - pos];123System.arraycopy(str, pos, newstr, 0, strpos - pos);124strpos = pos;125return new String(newstr);126}127128/**129* Get the chars which were accumulated in the buffer.130* Pos is the starting position of the string.131*/132char[] getChars(int pos) {133char newstr[] = new char[strpos - pos];134System.arraycopy(str, pos, newstr, 0, strpos - pos);135strpos = pos;136return newstr;137}138139/**140* Skip spaces. [5] 297:23141*/142void skipSpace() throws IOException {143while (true) {144switch (ch) {145case '\n':146case ' ':147case '\t':148ch = in.read();149break;150151default:152return;153}154}155}156157/**158* Skip tag spaces (includes comments). [65] 372:1159*/160void skipParameterSpace() throws IOException {161while (true) {162switch (ch) {163case '\n':164case ' ':165case '\t':166ch = in.read();167break;168case '-':169if ((ch = in.read()) != '-') {170in.push(ch);171ch = '-';172return;173}174175in.replace++;176while (true) {177switch (ch = in.read()) {178case '-':179if ((ch = in.read()) == '-') {180ch = in.read();181in.replace--;182skipParameterSpace();183return;184}185break;186187case -1:188error("eof.arg", "comment");189in.replace--;190return;191}192}193default:194return;195}196}197}198199/**200* Parse identifier. Uppercase characters are automatically201* folded to lowercase. Returns falsed if no identifier is found.202*/203@SuppressWarnings("fallthrough")204boolean parseIdentifier(boolean lower) throws IOException {205switch (ch) {206case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':207case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':208case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':209case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':210case 'Y': case 'Z':211if (lower) {212ch = 'a' + (ch - 'A');213}214/* fall through */215216case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':217case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':218case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':219case 's': case 't': case 'u': case 'v': case 'w': case 'x':220case 'y': case 'z':221break;222223default:224return false;225}226227addString(ch);228ch = in.read();229parseNameToken(lower);230return true;231}232233/**234* Parses name token. If <code>lower</code> is true, upper case letters235* are folded to lower case. Returns falsed if no token is found.236*/237@SuppressWarnings("fallthrough")238boolean parseNameToken(boolean lower) throws IOException {239boolean first = true;240241while (true) {242switch (ch) {243case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':244case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':245case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':246case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':247case 'Y': case 'Z':248if (lower) {249ch = 'a' + (ch - 'A');250}251/* fall through */252253case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':254case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':255case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':256case 's': case 't': case 'u': case 'v': case 'w': case 'x':257case 'y': case 'z':258259case '0': case '1': case '2': case '3': case '4':260case '5': case '6': case '7': case '8': case '9':261262case '.': case '-':263addString(ch);264ch = in.read();265first = false;266break;267268default:269return !first;270}271}272}273274/**275* Parse a list of identifiers.276*/277Vector<String> parseIdentifierList(boolean lower) throws IOException {278Vector<String> elems = new Vector<>();279skipSpace();280switch (ch) {281case '(':282ch = in.read();283skipParameterSpace();284while (parseNameToken(lower)) {285elems.addElement(getString(0));286skipParameterSpace();287if (ch == '|') {288ch = in.read();289skipParameterSpace();290}291}292expect(')');293skipParameterSpace();294break;295296default:297if (!parseIdentifier(lower)) {298error("expected", "identifier");299break;300}301elems.addElement(getString(0));302skipParameterSpace();303break;304}305return elems;306}307308/**309* Parse and Entity reference. Should be called when310* a & is encountered. The data is put in the string buffer.311* [59] 350:17312*/313private void parseEntityReference() throws IOException {314int pos = strpos;315316if ((ch = in.read()) == '#') {317int n = 0;318ch = in.read();319if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'))) {320addString('#');321} else {322while ((ch >= '0') && (ch <= '9')) {323n = (n * 10) + ch - '0';324ch = in.read();325}326if ((ch == ';') || (ch == '\n')) {327ch = in.read();328}329addString(n);330return;331}332}333334while (true) {335switch (ch) {336case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':337case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':338case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':339case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':340case 'Y': case 'Z':341342case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':343case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':344case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':345case 's': case 't': case 'u': case 'v': case 'w': case 'x':346case 'y': case 'z':347348case '0': case '1': case '2': case '3': case '4':349case '5': case '6': case '7': case '8': case '9':350351case '.': case '-':352addString(ch);353ch = in.read();354break;355356default:357if (strpos == pos) {358addString('&');359return;360}361String nm = getString(pos);362Entity ent = dtd.getEntity(nm);363if (ent == null) {364error("undef.entref" + nm);365return;366}367if ((ch == ';') || (ch == '\n')) {368ch = in.read();369}370char data[] = ent.getData();371for (int i = 0 ; i < data.length ; i++) {372addString(data[i]);373}374return;375}376}377}378379/**380* Parse an entity declaration.381* [101] 394:18382* REMIND: external entity type383*/384private void parseEntityDeclaration() throws IOException {385int type = GENERAL;386387skipSpace();388if (ch == '%') {389ch = in.read();390type = PARAMETER;391skipSpace();392}393if (ch == '#') {394addString('#');395ch = in.read();396}397if (!parseIdentifier(false)) {398error("expected", "identifier");399return;400}401String nm = getString(0);402skipParameterSpace();403if (parseIdentifier(false)) {404String tnm = getString(0);405int t = Entity.name2type(tnm);406if (t == 0) {407error("invalid.arg", "entity type", tnm);408} else {409type |= t;410}411skipParameterSpace();412}413414if ((ch != '"') && (ch != '\'')) {415error("expected", "entity value");416skipParameterSpace();417if (ch == '>') {418ch = in.read();419}420return;421}422423int term = ch;424ch = in.read();425while ((ch != -1) && (ch != term)) {426if (ch == '&') {427parseEntityReference();428} else {429addString(ch & 0xFF);430ch = in.read();431}432}433if (ch == term) {434ch = in.read();435}436if (in.replace == 0) {437char data[] = getChars(0);438dtd.defineEntity(nm, type, data);439} else {440strpos = 0;441}442skipParameterSpace();443expect('>');444}445446/**447* Parse content model.448* [126] 410:1449* REMIND: data tag group450*/451ContentModel parseContentModel() throws IOException {452ContentModel m = null;453454switch (ch) {455case '(':456ch = in.read();457skipParameterSpace();458ContentModel e = parseContentModel();459460if (ch != ')') {461m = new ContentModel(ch, e);462do {463ch = in.read();464skipParameterSpace();465e.next = parseContentModel();466if (e.next.type == m.type) {467e.next = (ContentModel)e.next.content;468}469for (; e.next != null ; e = e.next);470} while (ch == m.type);471} else {472m = new ContentModel(',', e);473}474expect(')');475break;476477case '#':478ch = in.read();479if (parseIdentifier(true)) {480m = new ContentModel('*', new ContentModel(dtd.getElement("#" + getString(0))));481} else {482error("invalid", "content model");483}484break;485486default:487if (parseIdentifier(true)) {488m = new ContentModel(dtd.getElement(getString(0)));489} else {490error("invalid", "content model");491}492break;493}494495switch (ch) {496case '?':497case '*':498case '+':499m = new ContentModel(ch, m);500ch = in.read();501break;502}503skipParameterSpace();504505return m;506}507508/**509* Parse element declaration.510* [116] 405:6511*/512void parseElementDeclaration() throws IOException {513Vector<String> elems = parseIdentifierList(true);514BitSet inclusions = null;515BitSet exclusions = null;516boolean omitStart = false;517boolean omitEnd = false;518519if ((ch == '-') || (ch == 'O')) {520omitStart = ch == 'O';521ch = in.read();522skipParameterSpace();523524if ((ch == '-') || (ch == 'O')) {525omitEnd = ch == 'O';526ch = in.read();527skipParameterSpace();528} else {529expect('-');530}531}532533int type = MODEL;534ContentModel content = null;535if (parseIdentifier(false)) {536String nm = getString(0);537type = Element.name2type(nm);538if (type == 0) {539error("invalid.arg", "content type", nm);540type = EMPTY;541}542skipParameterSpace();543} else {544content = parseContentModel();545}546547if ((type == MODEL) || (type == ANY)) {548if (ch == '-') {549ch = in.read();550Vector<String> v = parseIdentifierList(true);551exclusions = new BitSet();552for (Enumeration<String> e = v.elements() ; e.hasMoreElements() ;) {553exclusions.set(dtd.getElement(e.nextElement()).getIndex());554}555}556if (ch == '+') {557ch = in.read();558Vector<String> v = parseIdentifierList(true);559inclusions = new BitSet();560for (Enumeration<String> e = v.elements() ; e.hasMoreElements() ;) {561inclusions.set(dtd.getElement(e.nextElement()).getIndex());562}563}564}565expect('>');566567if (in.replace == 0) {568for (Enumeration<String> e = elems.elements() ; e.hasMoreElements() ;) {569dtd.defineElement(e.nextElement(), type, omitStart, omitEnd, content, exclusions, inclusions, null);570}571}572}573574/**575* Parse an attribute declared value.576* [145] 422:6577*/578void parseAttributeDeclaredValue(AttributeList atts) throws IOException {579if (ch == '(') {580atts.values = parseIdentifierList(true);581atts.type = NMTOKEN;582return;583}584if (!parseIdentifier(false)) {585error("invalid", "attribute value");586return;587}588atts.type = AttributeList.name2type(getString(0));589skipParameterSpace();590if (atts.type == NOTATION) {591atts.values = parseIdentifierList(true);592}593}594595/**596* Parse an attribute value specification.597* [33] 331:1598*/599@SuppressWarnings("fallthrough")600String parseAttributeValueSpecification() throws IOException {601int delim = -1;602switch (ch) {603case '\'':604case '"':605delim = ch;606ch = in.read();607}608while (true) {609switch (ch) {610case -1:611error("eof.arg", "attribute value");612return getString(0);613614case '&':615parseEntityReference();616break;617618case ' ':619case '\t':620case '\n':621if (delim == -1) {622return getString(0);623}624addString(' ');625ch = in.read();626break;627628case '\'':629case '"':630if (delim == ch) {631ch = in.read();632return getString(0);633}634/* fall through */635636default:637addString(ch & 0xFF);638ch = in.read();639break;640}641}642}643644/**645* Parse an attribute default value.646* [147] 425:1647*/648void parseAttributeDefaultValue(AttributeList atts) throws IOException {649if (ch == '#') {650ch = in.read();651if (!parseIdentifier(true)) {652error("invalid", "attribute value");653return;654}655skipParameterSpace();656atts.modifier = AttributeList.name2type(getString(0));657if (atts.modifier != FIXED) {658return;659}660}661atts.value = parseAttributeValueSpecification();662skipParameterSpace();663}664665/**666* Parse an attribute definition list declaration.667* [141] 420:15668* REMIND: associated notation name669*/670void parseAttlistDeclaration() throws IOException {671Vector<String> elems = parseIdentifierList(true);672AttributeList attlist = null, atts = null;673674while (parseIdentifier(true)) {675if (atts == null) {676attlist = atts = new AttributeList(getString(0));677} else {678atts.next = new AttributeList(getString(0));679atts = atts.next;680}681skipParameterSpace();682parseAttributeDeclaredValue(atts);683parseAttributeDefaultValue(atts);684685if ((atts.modifier == IMPLIED) && (atts.values != null) && (atts.values.size() == 1)) {686atts.value = (String)atts.values.elementAt(0);687}688}689690expect('>');691692if (in.replace == 0) {693for (Enumeration<String> e = elems.elements() ; e.hasMoreElements() ;) {694dtd.defineAttributes(e.nextElement(), attlist);695}696}697}698699/**700* Parse an ignored section until ]]> is encountered.701*/702void parseIgnoredSection() throws IOException {703int depth = 1;704in.replace++;705while (true) {706switch (ch) {707case '<':708if ((ch = in.read()) == '!') {709if ((ch = in.read()) == '[') {710ch = in.read();711depth++;712}713}714break;715case ']':716if ((ch = in.read()) == ']') {717if ((ch = in.read()) == '>') {718ch = in.read();719if (--depth == 0) {720in.replace--;721return;722}723}724}725break;726case -1:727error("eof");728in.replace--;729return;730731default:732ch = in.read();733break;734}735}736}737738/**739* Parse a marked section declaration.740* [93] 391:13741* REMIND: deal with all status keywords742*/743void parseMarkedSectionDeclaration() throws IOException {744ch = in.read();745skipSpace();746if (!parseIdentifier(true)) {747error("expected", "section status keyword");748return;749}750String str = getString(0);751skipSpace();752expect('[');753if ("ignore".equals(str)) {754parseIgnoredSection();755} else {756if (!"include".equals(str)) {757error("invalid.arg", "section status keyword", str);758}759parseSection();760expect(']');761expect(']');762expect('>');763}764}765766/**767* Parse an external identifier768* [73] 379:1769*/770void parseExternalIdentifier() throws IOException {771if (parseIdentifier(false)) {772String id = getString(0);773skipParameterSpace();774775if (id.equals("PUBLIC")) {776if ((ch == '\'') || (ch == '"')) {777parseAttributeValueSpecification();778} else {779error("expected", "public identifier");780}781skipParameterSpace();782} else if (!id.equals("SYSTEM")) {783error("invalid", "external identifier");784}785if ((ch == '\'') || (ch == '"')) {786parseAttributeValueSpecification();787}788skipParameterSpace();789}790}791792/**793* Parse document type declaration.794* [110] 403:1795*/796void parseDocumentTypeDeclaration() throws IOException {797skipParameterSpace();798if (!parseIdentifier(true)) {799error("expected", "identifier");800} else {801skipParameterSpace();802}803strpos = 0;804parseExternalIdentifier();805806if (ch == '[') {807ch = in.read();808parseSection();809expect(']');810skipParameterSpace();811}812expect('>');813}814815/**816* Parse a section of the input upto EOF or ']'.817*/818@SuppressWarnings("fallthrough")819void parseSection() throws IOException {820while (true) {821switch (ch) {822case ']':823return;824825case '<':826switch (ch = in.read()) {827case '!':828switch (ch = in.read()) {829case '[':830parseMarkedSectionDeclaration();831break;832833case '-':834skipParameterSpace();835expect('>');836break;837838default:839if (parseIdentifier(true)) {840String str = getString(0);841842if (str.equals("element")) {843parseElementDeclaration();844845} else if (str.equals("entity")) {846parseEntityDeclaration();847848} else if (str.equals("attlist")) {849parseAttlistDeclaration();850851} else if (str.equals("doctype")) {852parseDocumentTypeDeclaration();853854} else if (str.equals("usemap")) {855error("ignoring", "usemap");856while ((ch != -1) && (ch != '>')) {857ch = in.read();858}859expect('>');860} else if (str.equals("shortref")) {861error("ignoring", "shortref");862while ((ch != -1) && (ch != '>')) {863ch = in.read();864}865expect('>');866} else if (str.equals("notation")) {867error("ignoring", "notation");868while ((ch != -1) && (ch != '>')) {869ch = in.read();870}871expect('>');872} else {873error("markup");874}875} else {876error("markup");877while ((ch != -1) && (ch != '>')) {878ch = in.read();879}880expect('>');881}882}883}884break;885886case -1:887return;888889default:890char str[] = {(char)ch};891error("invalid.arg", "character", "'" + new String(str) + "' / " + ch);892/* fall through */893894case ' ':895case '\t':896case '\n':897ch = in.read();898break;899}900}901}902903/**904* Parse a DTD.905* @return the dtd or null if an error occurred.906*/907DTD parse(InputStream in, DTDBuilder dtd) {908try {909this.dtd = dtd;910this.in = new DTDInputStream(in, dtd);911912long tm = System.currentTimeMillis();913ch = this.in.read();914parseSection();915916if (ch != -1) {917error("premature");918}919920tm = System.currentTimeMillis() - tm;921System.err.println("[Parsed DTD " + dtd + " in " + tm + "ms]");922} catch (IOException e) {923error("ioexception");924} catch (Exception e) {925error("exception", e.getClass().getName(), e.getMessage());926e.printStackTrace();927} catch (ThreadDeath e) {928error("terminated");929}930return (nerrors > 0) ? null : dtd;931}932}933934935