Path: blob/master/sourcetools/com.ibm.jpp.preprocessor/com/ibm/jpp/xml/XMLParser.java
6004 views
/*******************************************************************************1* Copyright (c) 1999, 2017 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/21package com.ibm.jpp.xml;2223import java.io.IOException;24import java.io.InputStream;25import java.util.HashMap;26import java.util.Map;2728public class XMLParser {29static final boolean THROW_ON_EOF = true;30static final boolean EOF_ENCOUNTERED = true;31static final boolean VERBOSE = false;3233private String _fURI;34private InputStream _fInput;35private char _fScan;36private boolean _fSuccess;37private int _fLine = 1;38private int _fColumn = 1;39private int _fLevel;40private IXMLDocumentHandler _fDocumentHandler;41private final byte[] _fbuf = new byte[256];42private int _fcount;43private int _fpos;4445void assertCondition(boolean condition, String detail) throws XMLException {46if (!condition) {47parseError(detail);48}49}5051void parseError(String detail) throws XMLException {52throw new XMLException(_fURI + "(line " + _fLine + "): Column " + _fColumn + ": " + detail);53}5455boolean scan_char() throws XMLException {56return scan_char(THROW_ON_EOF);57}5859// Read a single character60boolean scan_char(boolean throwOnEOF) throws XMLException {61try {62int i = read();63if (i == -1) {64if (throwOnEOF) {65parseError("Unexpected EOF");66} else {67return false;68}69}70_fScan = (char) i;71} catch (IOException e) {72throw new XMLException(e);73}7475if (XMLSpec.isLineDelimiter(_fScan)) {76_fLine++;77_fColumn = 1;78} else {79_fColumn++;80}8182if (VERBOSE) {83System.out.print(String.valueOf(_fScan));84}85return true;86}8788void _scan_for_all(String expected) throws XMLException {89int length = expected.length();90for (int i = 0; i < length; i++) {91char expectedChar = expected.charAt(i);92if (_fScan != expectedChar) {93parseError("Expected '" + expectedChar + "'");94}95scan_char();96}97}9899// Eat and discard whitespace100private void _skip_whitespace() throws XMLException {101while (XMLSpec.isWhitespace(_fScan)) {102scan_char();103}104}105106// Primitive: Scan a name, current scan position is the first char107private String _scan_name() throws XMLException {108assertCondition(XMLSpec.isNameStartChar(_fScan), "Expected beginning of a name");109XMLStringBuffer buffer = new XMLStringBuffer();110111do {112buffer.append(_fScan);113scan_char();114} while (XMLSpec.isNameChar(_fScan));115116return buffer.toString();117}118119// Primitive: Scan escaped character reference120// We have read a &121private String _scan_escaped_char() throws XMLException {122assertCondition(_fScan == '&', "Expected beginning of an escaped character");123scan_char(); // advance past the ampersand124125boolean scanningDigits = (_fScan == '#');126127if (scanningDigits) {128scan_char(); // advance past the sharp129}130131XMLStringBuffer buffer = new XMLStringBuffer();132while (XMLSpec.isNameChar(_fScan)) {133buffer.append(_fScan);134scan_char();135}136137String escape = buffer.toString();138139if (scanningDigits) {140int i = decode(escape);141return String.valueOf((char) i);142} else {143String equivalent = XMLSpec.namedEscapeToString(escape);144assertCondition(equivalent != null, "Unrecognized escape -->" + escape);145return equivalent;146}147}148149int read() throws IOException {150if (_fbuf == null) {151throw new IOException();152} else if (_fpos >= _fcount && fillbuf() == -1) {153/* Are there buffered bytes available? */154return -1; /* no, fill buffer */155} else if (_fcount - _fpos <= 0) {156/* Did filling the buffer fail with -1 (EOF)? */157return -1;158} else {159return _fbuf[_fpos++] & 0xFF;160}161}162163private int fillbuf() throws IOException {164_fpos = 0;165int result = _fInput.read(_fbuf);166_fcount = result == -1 ? 0 : result;167return result;168}169170static int decode(String string) throws NumberFormatException {171int length = string.length();172int i = 0;173if (length == 0) {174throw new NumberFormatException();175}176177char firstDigit = string.charAt(i++);178boolean negative = firstDigit == '-';179if (negative) {180if (length == 1) {181throw new NumberFormatException(string);182}183firstDigit = string.charAt(i++);184}185186int base = 10;187if (firstDigit == '0') {188if (i == length) {189return 0;190}191192if ((firstDigit = string.charAt(i++)) == 'x' || firstDigit == 'X') {193if (i == length) {194throw new NumberFormatException(string);195}196firstDigit = string.charAt(i++);197base = 16;198} else {199base = 8;200}201} else if (firstDigit == '#') {202if (i == length) {203throw new NumberFormatException(string);204}205firstDigit = string.charAt(i++);206base = 16;207}208209int result = Character.digit(firstDigit, base);210if (result == -1) {211throw new NumberFormatException(string);212}213214result = -result;215while (i < length) {216int digit = Character.digit(string.charAt(i++), base);217if (digit == -1) {218throw new NumberFormatException(string);219}220221int next = result * base - digit;222if (next > result) {223throw new NumberFormatException(string);224}225result = next;226}227if (!negative) {228result = -result;229if (result < 0) {230throw new NumberFormatException(string);231}232}233return result;234}235236// Primitive: Scan character data237private boolean _scan_cdata_or_eof() throws XMLException {238XMLStringBuffer buffer = new XMLStringBuffer();239240while (_fScan != '<') {241if (_fScan == '&') {242buffer.append(_scan_escaped_char());243} else {244if (!XMLSpec.isWhitespace(_fScan)) {245buffer.append(_fScan);246} else {247buffer.append(" "); // convert to a regular space248}249}250251if (!scan_char(!THROW_ON_EOF)) {252// Failed to read253if (_fLevel == 0) {254return EOF_ENCOUNTERED;255} else {256parseError("Character data ended prematurely");257}258}259}260261_fDocumentHandler.xmlCharacters(buffer.toString());262return !EOF_ENCOUNTERED;263}264265// Primitive: Scan a name, current scan position is the open quote266private String _scan_attribute_value() throws XMLException {267assertCondition(_fScan == '"', "Expected quoted attribute value");268scan_char();269270XMLStringBuffer buffer = new XMLStringBuffer();271/*[PR 120136] flags="" in jpp_configuration causes problems*/272if (_fScan != '"') {273do {274buffer.append(_fScan);275scan_char();276} while (_fScan != '"');277}278// Advance past the final quote279scan_char();280281return buffer.toString();282}283284// Scan attributes into a hashtable285private Map<String, String> _scan_attributes() throws XMLException {286Map<String, String> attributes = new HashMap<>();287288_skip_whitespace();289290while (XMLSpec.isNameStartChar(_fScan)) {291String key = _scan_name();292_scan_for_all("=");293String val = _scan_attribute_value();294_skip_whitespace();295296// Store the key-value pair in the hashtable297attributes.put(key, val);298}299300return attributes;301}302303// Perform very cursory verification304private void _scan_xml_header() throws XMLException {305_scan_for_all("<?xml");306Map<String, String> attributes = _scan_attributes();307308String encoding = attributes.get("encoding");309assertCondition("UTF-8".equals(encoding), "Unsupported encoding");310_scan_for_all("?>");311312// Notify the content handler313_fDocumentHandler.xmlStartDocument();314}315316// <? already scanned, continue ==> <?xml:stylesheet type="text/xsl"317// href="excludes.xsl" ?>318private void _scan_processing_instruction() throws XMLException {319scan_char(); // advance past '?'320_scan_name();321_skip_whitespace();322_scan_attributes();323324while (_fScan != '>') {325scan_char();326}327}328329// <!- already scanned, continue330private void _scan_comment() throws XMLException {331_scan_for_all("-"); // get the second '-'332333XMLStringBuffer comment = new XMLStringBuffer();334do {335scan_char();336comment.append(_fScan);337} while (!comment.endsWith("-->"));338}339340// <!D already scanned, continue341// '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | DeclSep)* ']'342// S?)? '>'343private void _scan_doctype() throws XMLException {344_scan_for_all("DOCTYPE");345_skip_whitespace();346_scan_name();347348while (_fScan != '>') {349scan_char();350}351}352353// <! already scanned, continue354private void _scan_doctype_or_comment() throws XMLException {355scan_char(); // advance past '!'356357switch (_fScan) {358case 'D':359_scan_doctype();360break;361case '-':362_scan_comment();363break;364default:365parseError("Unknown <! type -->" + _fScan);366break;367}368}369370// </ already scanned,371private void _scan_element_close() throws XMLException {372scan_char(); // advance past '/'373String closedTag = _scan_name();374while (_fScan != '>') {375scan_char();376}377378// Notify the content handler379_fDocumentHandler.xmlEndElement(closedTag);380_fLevel--;381}382383// Perform very cursory verification384private void _scan_element_or_instruction() throws XMLException {385assertCondition(_fScan == '<', "Expected entity or directive");386scan_char();387388if (_fScan == '?') {389_scan_processing_instruction();390return;391}392393if (_fScan == '!') {394_scan_doctype_or_comment();395return;396}397398if (_fScan == '/') {399_scan_element_close();400return;401}402403String elementName = _scan_name();404Map<String, String> attributes = _scan_attributes();405406// Notify the content handler407_fDocumentHandler.xmlStartElement(elementName, attributes);408_fLevel++;409410_skip_whitespace();411if (_fScan == '/') {412// Special case for empty elements413_fDocumentHandler.xmlEndElement(elementName);414_fLevel--;415}416417while (_fScan != '>') {418scan_char();419}420scan_char(); // eat the element close421}422423boolean checkForCompletion() throws XMLException {424while (true) {425if (!scan_char(!THROW_ON_EOF)) {426return false; // out of data427} else if (!XMLSpec.isWhitespace(_fScan)) {428return true;429}430}431}432433void parseXML() throws XMLException {434_fLevel = 0;435436// Fetch the first character437scan_char();438439// Consume any leading whitespace440_skip_whitespace();441_scan_xml_header();442443// Begin scanning body444do {445if (_fScan == '<') {446_scan_element_or_instruction();447}448} while (_scan_cdata_or_eof() != EOF_ENCOUNTERED);449450// Notify the content handler451_fDocumentHandler.xmlEndDocument();452}453454public boolean parse(InputStream uriStream, IXMLDocumentHandler handler) throws XMLException {455_fSuccess = false;456_fDocumentHandler = handler;457458_fInput = uriStream;459parseXML();460461return _fSuccess;462}463464}465466467