Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/net/www/ParseUtil.java
38830 views
/*1* Copyright (c) 1998, 2007, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.net.www;2627import java.util.BitSet;28import java.io.UnsupportedEncodingException;29import java.io.File;30import java.net.URL;31import java.net.MalformedURLException;32import java.net.URI;33import java.net.URISyntaxException;34import java.nio.ByteBuffer;35import java.nio.CharBuffer;36import java.nio.charset.CharacterCodingException;37import sun.nio.cs.ThreadLocalCoders;38import java.nio.charset.CharsetDecoder;39import java.nio.charset.CoderResult;40import java.nio.charset.CodingErrorAction;4142/**43* A class that contains useful routines common to sun.net.www44* @author Mike McCloskey45*/4647public class ParseUtil {48static BitSet encodedInPath;4950static {51encodedInPath = new BitSet(256);5253// Set the bits corresponding to characters that are encoded in the54// path component of a URI.5556// These characters are reserved in the path segment as described in57// RFC2396 section 3.3.58encodedInPath.set('=');59encodedInPath.set(';');60encodedInPath.set('?');61encodedInPath.set('/');6263// These characters are defined as excluded in RFC2396 section 2.4.364// and must be escaped if they occur in the data part of a URI.65encodedInPath.set('#');66encodedInPath.set(' ');67encodedInPath.set('<');68encodedInPath.set('>');69encodedInPath.set('%');70encodedInPath.set('"');71encodedInPath.set('{');72encodedInPath.set('}');73encodedInPath.set('|');74encodedInPath.set('\\');75encodedInPath.set('^');76encodedInPath.set('[');77encodedInPath.set(']');78encodedInPath.set('`');7980// US ASCII control characters 00-1F and 7F.81for (int i=0; i<32; i++)82encodedInPath.set(i);83encodedInPath.set(127);84}8586/**87* Constructs an encoded version of the specified path string suitable88* for use in the construction of a URL.89*90* A path separator is replaced by a forward slash. The string is UTF891* encoded. The % escape sequence is used for characters that are above92* 0x7F or those defined in RFC2396 as reserved or excluded in the path93* component of a URL.94*/95public static String encodePath(String path) {96return encodePath(path, true);97}98/*99* flag indicates whether path uses platform dependent100* File.separatorChar or not. True indicates path uses platform101* dependent File.separatorChar.102*/103public static String encodePath(String path, boolean flag) {104char[] retCC = new char[path.length() * 2 + 16];105int retLen = 0;106char[] pathCC = path.toCharArray();107108int n = path.length();109for (int i=0; i<n; i++) {110char c = pathCC[i];111if ((!flag && c == '/') || (flag && c == File.separatorChar))112retCC[retLen++] = '/';113else {114if (c <= 0x007F) {115if (c >= 'a' && c <= 'z' ||116c >= 'A' && c <= 'Z' ||117c >= '0' && c <= '9') {118retCC[retLen++] = c;119} else120if (encodedInPath.get(c))121retLen = escape(retCC, c, retLen);122else123retCC[retLen++] = c;124} else if (c > 0x07FF) {125retLen = escape(retCC, (char)(0xE0 | ((c >> 12) & 0x0F)), retLen);126retLen = escape(retCC, (char)(0x80 | ((c >> 6) & 0x3F)), retLen);127retLen = escape(retCC, (char)(0x80 | ((c >> 0) & 0x3F)), retLen);128} else {129retLen = escape(retCC, (char)(0xC0 | ((c >> 6) & 0x1F)), retLen);130retLen = escape(retCC, (char)(0x80 | ((c >> 0) & 0x3F)), retLen);131}132}133//worst case scenario for character [0x7ff-] every single134//character will be encoded into 9 characters.135if (retLen + 9 > retCC.length) {136int newLen = retCC.length * 2 + 16;137if (newLen < 0) {138newLen = Integer.MAX_VALUE;139}140char[] buf = new char[newLen];141System.arraycopy(retCC, 0, buf, 0, retLen);142retCC = buf;143}144}145return new String(retCC, 0, retLen);146}147148/**149* Appends the URL escape sequence for the specified char to the150* specified StringBuffer.151*/152private static int escape(char[] cc, char c, int index) {153cc[index++] = '%';154cc[index++] = Character.forDigit((c >> 4) & 0xF, 16);155cc[index++] = Character.forDigit(c & 0xF, 16);156return index;157}158159/**160* Un-escape and return the character at position i in string s.161*/162private static byte unescape(String s, int i) {163return (byte) Integer.parseInt(s.substring(i+1,i+3),16);164}165166167/**168* Returns a new String constructed from the specified String by replacing169* the URL escape sequences and UTF8 encoding with the characters they170* represent.171*/172public static String decode(String s) {173int n = s.length();174if ((n == 0) || (s.indexOf('%') < 0))175return s;176177StringBuilder sb = new StringBuilder(n);178ByteBuffer bb = ByteBuffer.allocate(n);179CharBuffer cb = CharBuffer.allocate(n);180CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")181.onMalformedInput(CodingErrorAction.REPORT)182.onUnmappableCharacter(CodingErrorAction.REPORT);183184char c = s.charAt(0);185for (int i = 0; i < n;) {186assert c == s.charAt(i);187if (c != '%') {188sb.append(c);189if (++i >= n)190break;191c = s.charAt(i);192continue;193}194bb.clear();195int ui = i;196for (;;) {197assert (n - i >= 2);198try {199bb.put(unescape(s, i));200} catch (NumberFormatException e) {201throw new IllegalArgumentException();202}203i += 3;204if (i >= n)205break;206c = s.charAt(i);207if (c != '%')208break;209}210bb.flip();211cb.clear();212dec.reset();213CoderResult cr = dec.decode(bb, cb, true);214if (cr.isError())215throw new IllegalArgumentException("Error decoding percent encoded characters");216cr = dec.flush(cb);217if (cr.isError())218throw new IllegalArgumentException("Error decoding percent encoded characters");219sb.append(cb.flip().toString());220}221222return sb.toString();223}224225/**226* Returns a canonical version of the specified string.227*/228public String canonizeString(String file) {229int i = 0;230int lim = file.length();231232// Remove embedded /../233while ((i = file.indexOf("/../")) >= 0) {234if ((lim = file.lastIndexOf('/', i - 1)) >= 0) {235file = file.substring(0, lim) + file.substring(i + 3);236} else {237file = file.substring(i + 3);238}239}240// Remove embedded /./241while ((i = file.indexOf("/./")) >= 0) {242file = file.substring(0, i) + file.substring(i + 2);243}244// Remove trailing ..245while (file.endsWith("/..")) {246i = file.indexOf("/..");247if ((lim = file.lastIndexOf('/', i - 1)) >= 0) {248file = file.substring(0, lim+1);249} else {250file = file.substring(0, i);251}252}253// Remove trailing .254if (file.endsWith("/."))255file = file.substring(0, file.length() -1);256257return file;258}259260public static URL fileToEncodedURL(File file)261throws MalformedURLException262{263String path = file.getAbsolutePath();264path = ParseUtil.encodePath(path);265if (!path.startsWith("/")) {266path = "/" + path;267}268if (!path.endsWith("/") && file.isDirectory()) {269path = path + "/";270}271return new URL("file", "", path);272}273274public static java.net.URI toURI(URL url) {275String protocol = url.getProtocol();276String auth = url.getAuthority();277String path = url.getPath();278String query = url.getQuery();279String ref = url.getRef();280if (path != null && !(path.startsWith("/")))281path = "/" + path;282283//284// In java.net.URI class, a port number of -1 implies the default285// port number. So get it stripped off before creating URI instance.286//287if (auth != null && auth.endsWith(":-1"))288auth = auth.substring(0, auth.length() - 3);289290java.net.URI uri;291try {292uri = createURI(protocol, auth, path, query, ref);293} catch (java.net.URISyntaxException e) {294uri = null;295}296return uri;297}298299//300// createURI() and its auxiliary code are cloned from java.net.URI.301// Most of the code are just copy and paste, except that quote()302// has been modified to avoid double-escape.303//304// Usually it is unacceptable, but we're forced to do it because305// otherwise we need to change public API, namely java.net.URI's306// multi-argument constructors. It turns out that the changes cause307// incompatibilities so can't be done.308//309private static URI createURI(String scheme,310String authority,311String path,312String query,313String fragment) throws URISyntaxException314{315String s = toString(scheme, null,316authority, null, null, -1,317path, query, fragment);318checkPath(s, scheme, path);319return new URI(s);320}321322private static String toString(String scheme,323String opaquePart,324String authority,325String userInfo,326String host,327int port,328String path,329String query,330String fragment)331{332StringBuffer sb = new StringBuffer();333if (scheme != null) {334sb.append(scheme);335sb.append(':');336}337appendSchemeSpecificPart(sb, opaquePart,338authority, userInfo, host, port,339path, query);340appendFragment(sb, fragment);341return sb.toString();342}343344private static void appendSchemeSpecificPart(StringBuffer sb,345String opaquePart,346String authority,347String userInfo,348String host,349int port,350String path,351String query)352{353if (opaquePart != null) {354/* check if SSP begins with an IPv6 address355* because we must not quote a literal IPv6 address356*/357if (opaquePart.startsWith("//[")) {358int end = opaquePart.indexOf("]");359if (end != -1 && opaquePart.indexOf(":")!=-1) {360String doquote, dontquote;361if (end == opaquePart.length()) {362dontquote = opaquePart;363doquote = "";364} else {365dontquote = opaquePart.substring(0,end+1);366doquote = opaquePart.substring(end+1);367}368sb.append (dontquote);369sb.append(quote(doquote, L_URIC, H_URIC));370}371} else {372sb.append(quote(opaquePart, L_URIC, H_URIC));373}374} else {375appendAuthority(sb, authority, userInfo, host, port);376if (path != null)377sb.append(quote(path, L_PATH, H_PATH));378if (query != null) {379sb.append('?');380sb.append(quote(query, L_URIC, H_URIC));381}382}383}384385private static void appendAuthority(StringBuffer sb,386String authority,387String userInfo,388String host,389int port)390{391if (host != null) {392sb.append("//");393if (userInfo != null) {394sb.append(quote(userInfo, L_USERINFO, H_USERINFO));395sb.append('@');396}397boolean needBrackets = ((host.indexOf(':') >= 0)398&& !host.startsWith("[")399&& !host.endsWith("]"));400if (needBrackets) sb.append('[');401sb.append(host);402if (needBrackets) sb.append(']');403if (port != -1) {404sb.append(':');405sb.append(port);406}407} else if (authority != null) {408sb.append("//");409if (authority.startsWith("[")) {410int end = authority.indexOf("]");411if (end != -1 && authority.indexOf(":")!=-1) {412String doquote, dontquote;413if (end == authority.length()) {414dontquote = authority;415doquote = "";416} else {417dontquote = authority.substring(0,end+1);418doquote = authority.substring(end+1);419}420sb.append (dontquote);421sb.append(quote(doquote,422L_REG_NAME | L_SERVER,423H_REG_NAME | H_SERVER));424}425} else {426sb.append(quote(authority,427L_REG_NAME | L_SERVER,428H_REG_NAME | H_SERVER));429}430}431}432433private static void appendFragment(StringBuffer sb, String fragment) {434if (fragment != null) {435sb.append('#');436sb.append(quote(fragment, L_URIC, H_URIC));437}438}439440// Quote any characters in s that are not permitted441// by the given mask pair442//443private static String quote(String s, long lowMask, long highMask) {444int n = s.length();445StringBuffer sb = null;446boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);447for (int i = 0; i < s.length(); i++) {448char c = s.charAt(i);449if (c < '\u0080') {450if (!match(c, lowMask, highMask) && !isEscaped(s, i)) {451if (sb == null) {452sb = new StringBuffer();453sb.append(s.substring(0, i));454}455appendEscape(sb, (byte)c);456} else {457if (sb != null)458sb.append(c);459}460} else if (allowNonASCII461&& (Character.isSpaceChar(c)462|| Character.isISOControl(c))) {463if (sb == null) {464sb = new StringBuffer();465sb.append(s.substring(0, i));466}467appendEncoded(sb, c);468} else {469if (sb != null)470sb.append(c);471}472}473return (sb == null) ? s : sb.toString();474}475476//477// To check if the given string has an escaped triplet478// at the given position479//480private static boolean isEscaped(String s, int pos) {481if (s == null || (s.length() <= (pos + 2)))482return false;483484return s.charAt(pos) == '%'485&& match(s.charAt(pos + 1), L_HEX, H_HEX)486&& match(s.charAt(pos + 2), L_HEX, H_HEX);487}488489private static void appendEncoded(StringBuffer sb, char c) {490ByteBuffer bb = null;491try {492bb = ThreadLocalCoders.encoderFor("UTF-8")493.encode(CharBuffer.wrap("" + c));494} catch (CharacterCodingException x) {495assert false;496}497while (bb.hasRemaining()) {498int b = bb.get() & 0xff;499if (b >= 0x80)500appendEscape(sb, (byte)b);501else502sb.append((char)b);503}504}505506private final static char[] hexDigits = {507'0', '1', '2', '3', '4', '5', '6', '7',508'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'509};510511private static void appendEscape(StringBuffer sb, byte b) {512sb.append('%');513sb.append(hexDigits[(b >> 4) & 0x0f]);514sb.append(hexDigits[(b >> 0) & 0x0f]);515}516517// Tell whether the given character is permitted by the given mask pair518private static boolean match(char c, long lowMask, long highMask) {519if (c < 64)520return ((1L << c) & lowMask) != 0;521if (c < 128)522return ((1L << (c - 64)) & highMask) != 0;523return false;524}525526// If a scheme is given then the path, if given, must be absolute527//528private static void checkPath(String s, String scheme, String path)529throws URISyntaxException530{531if (scheme != null) {532if ((path != null)533&& ((path.length() > 0) && (path.charAt(0) != '/')))534throw new URISyntaxException(s,535"Relative path in absolute URI");536}537}538539540// -- Character classes for parsing --541542// Compute a low-order mask for the characters543// between first and last, inclusive544private static long lowMask(char first, char last) {545long m = 0;546int f = Math.max(Math.min(first, 63), 0);547int l = Math.max(Math.min(last, 63), 0);548for (int i = f; i <= l; i++)549m |= 1L << i;550return m;551}552553// Compute the low-order mask for the characters in the given string554private static long lowMask(String chars) {555int n = chars.length();556long m = 0;557for (int i = 0; i < n; i++) {558char c = chars.charAt(i);559if (c < 64)560m |= (1L << c);561}562return m;563}564565// Compute a high-order mask for the characters566// between first and last, inclusive567private static long highMask(char first, char last) {568long m = 0;569int f = Math.max(Math.min(first, 127), 64) - 64;570int l = Math.max(Math.min(last, 127), 64) - 64;571for (int i = f; i <= l; i++)572m |= 1L << i;573return m;574}575576// Compute the high-order mask for the characters in the given string577private static long highMask(String chars) {578int n = chars.length();579long m = 0;580for (int i = 0; i < n; i++) {581char c = chars.charAt(i);582if ((c >= 64) && (c < 128))583m |= (1L << (c - 64));584}585return m;586}587588589// Character-class masks590591// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |592// "8" | "9"593private static final long L_DIGIT = lowMask('0', '9');594private static final long H_DIGIT = 0L;595596// hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |597// "a" | "b" | "c" | "d" | "e" | "f"598private static final long L_HEX = L_DIGIT;599private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f');600601// upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |602// "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |603// "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"604private static final long L_UPALPHA = 0L;605private static final long H_UPALPHA = highMask('A', 'Z');606607// lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |608// "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |609// "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"610private static final long L_LOWALPHA = 0L;611private static final long H_LOWALPHA = highMask('a', 'z');612613// alpha = lowalpha | upalpha614private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;615private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;616617// alphanum = alpha | digit618private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;619private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;620621// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |622// "(" | ")"623private static final long L_MARK = lowMask("-_.!~*'()");624private static final long H_MARK = highMask("-_.!~*'()");625626// unreserved = alphanum | mark627private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;628private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;629630// reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |631// "$" | "," | "[" | "]"632// Added per RFC2732: "[", "]"633private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");634private static final long H_RESERVED = highMask(";/?:@&=+$,[]");635636// The zero'th bit is used to indicate that escape pairs and non-US-ASCII637// characters are allowed; this is handled by the scanEscape method below.638private static final long L_ESCAPED = 1L;639private static final long H_ESCAPED = 0L;640641// Dash, for use in domainlabel and toplabel642private static final long L_DASH = lowMask("-");643private static final long H_DASH = highMask("-");644645// uric = reserved | unreserved | escaped646private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;647private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;648649// pchar = unreserved | escaped |650// ":" | "@" | "&" | "=" | "+" | "$" | ","651private static final long L_PCHAR652= L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,");653private static final long H_PCHAR654= H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,");655656// All valid path characters657private static final long L_PATH = L_PCHAR | lowMask(";/");658private static final long H_PATH = H_PCHAR | highMask(";/");659660// userinfo = *( unreserved | escaped |661// ";" | ":" | "&" | "=" | "+" | "$" | "," )662private static final long L_USERINFO663= L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,");664private static final long H_USERINFO665= H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,");666667// reg_name = 1*( unreserved | escaped | "$" | "," |668// ";" | ":" | "@" | "&" | "=" | "+" )669private static final long L_REG_NAME670= L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+");671private static final long H_REG_NAME672= H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+");673674// All valid characters for server-based authorities675private static final long L_SERVER676= L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]");677private static final long H_SERVER678= H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]");679}680681682