Path: blob/trunk/third_party/closure/goog/url/url.js
4058 views
/**1* @license2* Copyright The Closure Library Authors.3* SPDX-License-Identifier: Apache-2.04*/56/**7* @fileoverview Class for parsing strings into URLs using browser native8* resolution.9*10* Use `resolveUrl` to resolve a url string with an optional base url string to11* URL. Will throw an error if the resulting URL would not be valid. This can12* be used in place of the [URL Web API][1] while providing support in IE and13* working around various inconsistencies in Edge.14*15* Use `resolveRelativeUrl` to resolve any relative URL into an absolute URL for16* the current location.17*18* Use `createUrl` to easily construct a new URL from an existing URL.19*20* This package attempts to follow the [WHATWG URL standard][2] where21* possible, deviating only when there are significant advantages to doing so22* such as splitting out searchParams from a property to a function call to23* allow the compiler to remove the relevant polyfill code if unused, or24* removing functionality that can cause confusion, unexpected25* results, or unnecessary code size increases to the package. This package26* also adds checks that are missing in some browsers (e.g. throwing errors when27* a potential URL doesn't have a protocol or hostname), and generally tries to28* ensure consistency among browsers while still accurately reporting how a29* browser will interpret a given URL.30*31* Unlike goog.URI, this package is NOT intended to be used with URLs that are32* "special", and is only guaranteed to return useful results for the schemes33* listed in the spec (http(s), ws(s), ftp, file, blob). Various browsers34* (Chrome included) do not correctly parse special URLs and the results will35* be inaccurate in those cases. If you need to parse URLs using these36* protocols, prefer to use goog.Uri (or goog.uri.utils) instead.37* [1]: https://developer.mozilla.org/en-US/docs/Web/API/URL38* [2]: https://url.spec.whatwg.org/39*/40goog.module('goog.url');41goog.module.declareLegacyNamespace();4243const ConstString = goog.require('goog.string.Const');44const Tagname = goog.require('goog.dom.TagName');45const safe = goog.require('goog.dom.safe');46const uncheckedConversions = goog.require('goog.html.uncheckedconversions');47const {assert} = goog.require('goog.asserts');48const {concat: iterableConcat, map: iterableMap} = goog.require('goog.collections.iters');49const {createElement} = goog.require('goog.dom');5051// Capture the native URL constructor before users have a chance to clobber it.52/** @type {?typeof URL} */53const NATIVE_URL = goog.global['URL'];5455/** @define {boolean} */56const ASSUME_COMPLIANT_URL_API = goog.define(57'ASSUME_COMPLIANT_URL_API',58// TODO(user) narrow this down if earlier featureset years allow,59// if they get defined. FY2020 does NOT include Edge (EdgeHTML), which is60// good as workarounds are needed for spec compliance and a searchParams61// polyfill.62goog.FEATURESET_YEAR >= 2020);6364let urlBase = goog.global?.document?.baseURI ||65// baseURI is not available in IE11 and earlier66goog.global.location?.href || '';6768/**69* For testing only - this adjusts the base used in `resolveRelativeUrl`.70* @param {string} base71* Maybe this should just be visible to allow others using this package to test72* it?73* @package74*/75const setUrlBaseForTesting = function(base) {76urlBase = base;77};7879exports.setUrlBaseForTesting = setUrlBaseForTesting;808182/**83* Feature-detection for native URL parsing84* @type {boolean}85*/86const supportsNativeURLConstructor = {87// TODO(user) Does this work without JSCompiler?88/** @return {boolean} */89valueOf: function() {90if (ASSUME_COMPLIANT_URL_API) {91return true;92}93try {94new NATIVE_URL('http://example.com');95return true;96} catch (e) {97return false;98}99}100}.valueOf();101102/**103* ReadonlySearchParams partially implements the URLSearchParams interface,104* excluding all mutability methods and some less-useful methods for reading the105* underlying data. Exclusions:106* - append107* - delete108* - set109* - sort110* - values()111* - entries()112* - forEach(...)113* @extends {Iterable<!Array<string>>}114* @record115*/116class ReadonlySearchParams {117/**118* @param {string} key The key to retrieve a value for. Must not be119* url-encoded.120* @return {?string} The value. If a key is specified more than once, the121* first value is returned (as per the spec). All values will be url-decoded122* already.123*/124get(key) {};125126/**127* @param {string} key The key to retrieve all values for. Must not be128* url-encoded.129* @return {!Array<string>} The list of values for this key. Will return the130* empty array if there are no values for the key. All values will have131* been url-decoded already.132*/133getAll(key) {};134135/**136* @param {string} key The key to search for. Must not be url-encoded.137* @return {boolean} True iff this key exists within the search params.138*/139has(key) {};140141/**142* @return {string}143*/144toString() {};145}146147exports.ReadonlySearchParams = ReadonlySearchParams;148149/**150* A polyfill implementation of ReadonlySearchParams that is only used in older151* browsers that don't natively support searchParams. This includes IE and Edge152* (EdgeHTML).153* @implements {ReadonlySearchParams}154*/155class SearchParamsImpl {156/**157* @param {string} search The search string from URL resolution. May158* optionally begin with '?', and is expected to be URL-encoded.159*/160constructor(search) {161/** @private @const {!Map<string, !Array<string>>} */162this.paramMap_ = new Map();163if (search.indexOf('?') == 0) {164search = search.substring(1);165}166const params = search.split('&');167for (let p of params) {168let key = p;169let val = '';170const keyValueSplit = p.split('=');171const isKV = keyValueSplit.length > 1;172if (isKV) {173key = decodeURIComponent(keyValueSplit[0].replace('+', ' '));174val = decodeURIComponent(keyValueSplit[1].replace('+', ' '));175}176let entries = this.paramMap_.get(key);177if (entries == null) {178entries = [];179this.paramMap_.set(key, entries);180}181entries.push(val);182}183}184185/**186* @override187*/188get(key) {189const values = this.paramMap_.get(key);190return values && values.length ? values[0] : null;191}192193/**194* @override195*/196getAll(key) {197// As per the spec, this returns the "empty sequence" if the key is not198// found.199return [...(this.paramMap_.get(key) || [])];200}201202/**203* @override204*/205has(key) {206return this.paramMap_.has(key);207}208209/**210* @return {!IteratorIterable<!Array<string>>}211*/212[Symbol.iterator]() {213return iterableConcat(...iterableMap(this.paramMap_, (e) => {214const key = /** @const {string} */ (e[0]);215const values = /** @const {!Array<string>} */ (e[1]);216return iterableMap(values, (v) => {217return [key, v];218});219}));220}221222/**223* @override224*/225toString() {226return iterableSearchParamsToString(this);227}228}229230/**231* @param {!Iterable<!Array<string>>} iterable The iterable which acts like a232* URLSearchParams object (each iteration returns another key and value).233* Note that both keys and values must NOT be already URL encoded.234* @return {string} The serialized SearchParams, with all keys and values235* correctly encoded.236*/237const iterableSearchParamsToString = function(iterable) {238// Some characters are not form-encoded properly by encodeURIComponent, so we239// enumerate their replacements here for use later.240const encode = (s) => {241// Form encoding is defined [in the spec][1] but there are some values that242// are not encoded the right way by encodeURIComponent. Thus, we replace243// their representation in the resulting encoded string with their actual244// encoding.245// [1]: https://url.spec.whatwg.org/#urlencoded-serializing246return encodeURIComponent(s).replace(/[!()~']|(%20)/g, (c) => {247return {248'!': '%21',249'(': '%28',250')': '%29',251'%20': '+',252'\'': '%27',253'~': '%7E',254}[c];255});256};257return Array258.from(259iterable,260(keyValuePair) =>261encode(keyValuePair[0]) + '=' + encode(keyValuePair[1]))262.join('&');263};264265/**266* UrlLike mirrors most of the public readonly interface of the URL object in267* the URL Web API.268* Notable exclusions:269* - toJson()270* - searchParams271*272* Instead of using the searchParams property, use `getSearchParams` from this273* package. This allows for the relevant code to be removed when inspection of274* search parameters is not needed.275* @record276*/277class UrlLike {278constructor() {279/**280* @const {string}281*/282this.href;283284/**285* @const {string}286*/287this.protocol;288289/**290* @const {string}291*/292this.username;293294/**295* @const {string}296*/297this.password;298299/**300* @const {string}301*/302this.host;303304/**305* @const {string}306*/307this.hostname;308309/**310* @const {string}311*/312this.port;313314/**315* @const {string}316*/317this.origin;318319/**320* @const {string}321*/322this.pathname;323324/**325* @const {string}326*/327this.search;328329/**330* @const {string}331*/332this.hash;333}334335/** @return {string} */336toString() {};337}338339exports.UrlLike = UrlLike;340341/**342* This function is equivalent to 'new URL(href)' in newer browsers, and will343* automatically work around the Security Problems in IE, retrying the parse344* automatically while extracting the userinfo.345* @param {string} urlStr346* @return {!UrlLike} A canonicalized version of the information from the URL.347* Will throw if the resulting URL is invalid.348*/349const createAnchorElementInIE = function(urlStr) {350const aTag = createElement(Tagname.A);351352let protocol;353try {354safe.setAnchorHref(355aTag,356uncheckedConversions.safeUrlFromStringKnownToSatisfyTypeContract(357ConstString.from(358'This url is attached to an Anchor tag that is NEVER attached ' +359' to the DOM and is not returned from this function.'),360urlStr));361// If the URL is actually invalid, trying to read from it will throw.362protocol = aTag.protocol;363} catch (e) {364// We catch and re-throw an error here as the default error in IE is365// simply 'Invalid argument.' with no useful information.366throw new Error(`${urlStr} is not a valid URL.`);367}368// The anchor tag will be created and assigned some values, but a URL missing369// a protocol and/or hostname is not valid in a browser and other browsers URL370// APIs reject them.371// '' : IE11.719.18362, IE11.0.9600372// ':' : IE11.??? (web testing version as of 04/03/2020)373// last char != ':': hunch...374if (protocol === '' || protocol === ':' ||375protocol[protocol.length - 1] != ':') {376throw new Error(`${urlStr} is not a valid URL.`);377}378if (!canonicalPortForProtocols.has(protocol)) {379throw new Error(`${urlStr} is not a valid URL.`);380}381if (!aTag.hostname) {382throw new Error(`${urlStr} is not a valid URL.`);383}384const href = aTag.href;385const urlLike = {386href,387protocol: aTag.protocol,388username: '',389password: '',390// Host, origin, and port assigned below after canonicalization.391hostname: aTag.hostname,392pathname: '/' + aTag.pathname,393search: aTag.search,394hash: aTag.hash,395toString: () => href,396};397// Canonicalize the port out from the URL if it matches398const canonicalPort = canonicalPortForProtocols.get(aTag.protocol);399if (canonicalPort === aTag.port) {400urlLike.host = urlLike.hostname;401urlLike.port = '';402// This does not work for blob and file protocol types - they are far more403// complicated.404urlLike.origin = urlLike.protocol + '//' + urlLike.hostname;405} else {406urlLike.host = aTag.host;407urlLike.port = aTag.port;408urlLike.origin =409urlLike.protocol + '//' + urlLike.hostname + ':' + urlLike.port;410}411return urlLike;412};413414/**415* @param {?string} username416* @param {?string} password417* @return {string} The serialized userinfo string418*/419const assembleUserInfo = function(username, password) {420if (username && password) {421return username + ':' + password + '@';422} else if (username) {423return username + '@';424} else if (password) {425return ':' + password + '@';426} else {427return '';428}429};430431/**432* This function wraps 'new URL(href)' in newer browsers adds common checks for433* parts of the URL spec (e.g. no protocol, no hostname for well-known protocols434* like HTTP(s) and WS(S)) that some browsers don't adhere to. It also adds435* origin construction for browsers that don't support it (Edge).436* @param {string} urlStr437* @return {!UrlLike}438*/439const urlParseWithCommonChecks = function(urlStr) {440let res;441try {442res = new NATIVE_URL(urlStr);443} catch (e) {444throw new Error(`${urlStr} is not a valid URL.`);445}446const canonicalPort = canonicalPortForProtocols.get(res.protocol);447if (!canonicalPort) {448throw new Error(`${urlStr} is not a valid URL.`);449}450if (!res.hostname) {451throw new Error(`${urlStr} is not a valid URL.`);452}453// For some protocols, Edge doen't know how to construct the origin.454if (res.origin != 'null') {455return res;456}457// We can't assign to the native object's origin property (it is ignored), so458// we make a copy here.459const urlLike = {460href: res.href,461protocol: res.protocol,462username: '',463password: '',464host: res.host,465port: res.port,466// origin assigned below after canonicalization.467hostname: res.hostname,468pathname: res.pathname,469search: res.search,470// We don't copy searchParams because Edge doesn't have it anyways.471hash: res.hash,472};473if (canonicalPort === res.port) {474// This does not work for blob and file protocol types - they are far more475// complicated.476urlLike.origin = res.protocol + '//' + res.hostname;477} else {478urlLike.origin = res.protocol + '//' + res.hostname + ':' + res.port;479}480return urlLike;481};482483/**484* Resolves the given url string (with the optional base) into a URL object485* according to the [URL spec][https://url.spec.whatwg.org/]. Will throw an486* error if the resulting URL is invalid or if the browser can't or won't use487* that URL for some reason. Relative URLs are considered invalid without a base488* and will throw an error - please use `resolveRelativeUrl` instead for this489* use-case.490*491* Note that calling resolveUrl with both urlStr and baseStr may have surprising492* behavior. For example, any invocation with both parameters will never use the493* hash value from baseStr. Similarly, passing a path segment in urlStr will494* append (or replace) the path in baseStr, but will ALSO exclude the search and495* hash portions of baseStr from the resulting URL. See the unit tests496* (specifically testWithBase* test cases) for examples.497*498* Compatibility notes:499* - both IE (all versions) and Edge (EdgeHTML only) disallow URLs to have user500* information in them, and parsing those strings will throw an error.501* - FireFox disallows URLs with just a password in the userinfo.502* @param {string} urlStr A potential absolute URL as a string, or a relative503* URL if baseStr is provided.504* @param {string=} baseStr An optional base url as a string, only required if505* the url is relative.506* @return {!UrlLike} An object that describes the various parts of the URL if507* valid. Throws an error if invalid. While this object is the native URL508* object where possible, users should NOT rely on this property and instead509* treat it as a simple record.510*/511const resolveUrl = function(urlStr, baseStr) {512if (ASSUME_COMPLIANT_URL_API) {513// Safari throws a TypeError if you call the constructor with a second514// argument that isn't defined, so we can't pass baseStr all the time.515return baseStr ? new NATIVE_URL(urlStr, baseStr) : new NATIVE_URL(urlStr);516}517518// Ideally, this should effectively become519// if Edge520// and the else should effectively become521// if IE522523// TODO(user) Some use of FEATURESET_YEAR near here would help strip524// down the implementation even more for browsers we are more sure support the525// URL Web API (including Edge). 2019? Maybe?526527if (supportsNativeURLConstructor) {528if (!baseStr) {529return urlParseWithCommonChecks(urlStr);530}531// Edge doesn't throw if baseStr is not a valid absolute URL when the532// urlStr is absolute. This is against the spec, so try and parse this with533// commonChecks (which will throw if baseStr is not a valid absolute URL).534const baseUrl = urlParseWithCommonChecks(baseStr);535536// If urlStr is present and absolute, then only those values are used.537try {538return urlParseWithCommonChecks(urlStr);539} catch (e) {540// urlStr is not absolute. We shall give both pieces to the constructor541// below and see what it thinks.542}543return new NATIVE_URL(urlStr, baseUrl.href);544} else {545if (!baseStr) {546return createAnchorElementInIE(urlStr);547}548// It is against the spec to provide a baseStr that is not absolute.549const baseUrl = createAnchorElementInIE(baseStr);550551// If urlStr is present and absolute, then only those values are used even552// if baseStr is defined. The spec says we must try and parse baseStr first553// (and error on it) before we do this though.554try {555return createAnchorElementInIE(urlStr);556} catch (e) {557// urlStr is not absolute. We shall assemble base pieces + url pieces558// below.559// Deliberate fallthrough560}561562// If the base is present and absolute, check for special characters that563// help determine what parts of base we use vs the relative parts.564// This is similar to the [state machine][1] mentioned in the565// spec except we already know that urlStr is NOT absolute.566// [1]: https://url.spec.whatwg.org/#relative-state567const newBaseStr = baseUrl.protocol + '//' +568assembleUserInfo(baseUrl.username, baseUrl.password) + baseUrl.host;569let /** string */ href;570const firstChar = urlStr[0];571if (firstChar === '/' || firstChar === '\\') {572href = newBaseStr + urlStr;573} else if (firstChar === '?') {574href = newBaseStr + baseUrl.pathname + urlStr;575} else if (!firstChar || firstChar === '#') {576href = newBaseStr + baseUrl.pathname + baseUrl.search + urlStr;577} else {578// This doesn't start with any of the authority terminating characters,579// but other browsers treat it implicitly as an extension to the existing580// path, removing anything after the last '/' and appending urlStr to it.581const lastPathSeparator = baseUrl.pathname.lastIndexOf('/');582const path = lastPathSeparator > 0 ?583baseUrl.pathname.substring(0, lastPathSeparator) :584'';585href = newBaseStr + path + '/' + urlStr;586}587return createAnchorElementInIE(href);588}589};590591exports.resolveUrl = resolveUrl;592593/**594* Browsers will canonicalize a URL if the scheme has a "canonical" port for it.595* This maps schemes to their canonical port. These mappings are defined in the596* [spec][1].597*598* [1]: https://url.spec.whatwg.org/#url-miscellaneous599* @type {!Map<string,string>}600*/601const canonicalPortForProtocols = new Map([602['http:', '80'],603['https:', '443'],604['ws:', '80'],605['wss:', '443'],606['ftp:', '21'],607]);608609/**610* Returns a URLSearchParams-like object for a given URL object. This is used611* instead of the native URL object's 'searchParams' property to allow the612* Closure Compiler to code-strip the polyfill if searchParams are never used.613* @param {!UrlLike|!URL} url The URL object to derive SearchParams for.614* @return {!ReadonlySearchParams} The URLSearchParams-like object for the URL.615* @suppress {strictMissingProperties} url.searchParams on union616*/617const getSearchParams = function(url) {618if (goog.FEATURESET_YEAR >= 2020 ||619(supportsNativeURLConstructor && url.searchParams)) {620return url.searchParams;621}622return new SearchParamsImpl(url.search);623};624625exports.getSearchParams = getSearchParams;626627/**628* Resolves the given relative URL string without requiring a specific base URL629* (unlike resolveUrl). Will resolve the relative URL against the current630* document's BaseURI, and the resulting URL WILL contain properties from631* this URI.632* @param {string} relativeURL A string which may be only a relative URL (i.e.633* has no protocol, userinfo, hostname, or port).634* @return {!UrlLike} A URL that is relative to the current document's Base URI635* with all the relevant relative parts from the input parameter.636*/637const resolveRelativeUrl = function(relativeURL) {638return resolveUrl(relativeURL, urlBase);639};640641exports.resolveRelativeUrl = resolveRelativeUrl;642643/**644* @record645*/646class UrlPrimitivePartsPartial {647constructor() {648/** @const {string|undefined} */649this.protocol;650651/** @const {string|undefined} */652this.username;653654/** @const {string|undefined} */655this.password;656657/** @const {string|undefined} */658this.hostname;659660/** @const {string|undefined} */661this.port;662663/** @const {string|undefined} */664this.pathname;665666/** @const {string|undefined} */667this.search;668669/** @const {!Iterable<!Array<string>>|undefined} */670this.searchParams;671672/** @const {string|undefined} */673this.hash;674}675}676677exports.UrlPrimitivePartsPartial = UrlPrimitivePartsPartial;678679/**680* Creates a new URL object from primitve parts, optionally allowing for some of681* those parts to be taken from a base URL object. Parts only accepts primitive682* parts of the URL (e.g will NOT accept origin or host) for simplicity, and683* only accepts either a search OR searchParams property, not both at the same684* time. The resulting URL-like string is then parsed by `resolveUrl`, and as685* such this method will also throw an error if the result is not a valid URL686* (unlike Object.assign and other similar combinations of object properties).687*688* This method does some validation of its inputs, and in general is NOT a good689* way to clone an existing URL object. For that purpose, prefer to use690* `resolveUrl(existingURLObject.href)`.691* @param {!UrlPrimitivePartsPartial}692* parts The parts that should be combined together to create a new URL.693* @param {!UrlLike=} base An optional base whose primitive parts are used if694* they are not specified in the parts param. If all required primitive695* parts (host, protocol) are specified in the parts param, this can be696* omitted.697* @return {!UrlLike} The resulting URL object if valid. Will throw an error if698* the resulting combination of parts and base is invalid.699*/700const createUrl = function(parts, base = undefined) {701assert(702!(parts.search && parts.searchParams),703'Only provide search or searchParams, not both');704// Alas we cannot use Object.assign as the native URL object will not let its705// properties be copied over.706let newParts = {};707if (base) {708newParts.protocol = base.protocol;709newParts.username = base.username;710newParts.password = base.password;711newParts.hostname = base.hostname;712newParts.port = base.port;713newParts.pathname = base.pathname;714newParts.search = base.search;715// Note we don't copy over searchParams here as we won't use it anyways.716// search should be available instead.717newParts.hash = base.hash;718}719Object.assign(newParts, parts);720721// Check for spec compliance722if (newParts.port && newParts.port[0] === ':') {723throw new Error('port should not start with \':\'');724}725if (newParts.hash && newParts.hash[0] != '#') {726newParts.hash = '#' + newParts.hash;727}728// Manually assign search/searchParams from parts and clean up newParts so it729// only specifies a search property.730// precedence is as follows:731// parts.search732// parts.searchParams733// newParts.search (aka base.search)734if (parts.search) {735if (parts.search[0] != '?') {736newParts.search = '?' + parts.search;737}738// newParts.search is already equal to parts.search due to Object.assign739// above. searchParams will be undefined here as it isn't copied from base.740} else if (parts.searchParams) {741newParts.search = '?' + iterableSearchParamsToString(parts.searchParams);742// Not strictly necessary, but clear searchParams now we have serialized it.743newParts.searchParams = undefined;744}745746let sb = '';747if (newParts.protocol) {748sb += newParts.protocol + '//';749}750751const userinfo = assembleUserInfo(newParts.username, newParts.password);752sb += userinfo;753sb += newParts.hostname || '';754if (newParts.port) {755sb += ':' + newParts.port;756}757sb += newParts.pathname || '';758sb += newParts.search || '';759sb += newParts.hash || '';760return resolveUrl(sb);761};762763exports.createUrl = createUrl;764765766