Path: blob/master/src/infrastructure/markup/markuprule/PhutilRemarkupHyperlinkRule.php
12241 views
<?php12final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule {34const KEY_HYPERLINKS = 'hyperlinks';56public function getPriority() {7return 400.0;8}910public function apply($text) {11static $angle_pattern;12static $curly_pattern;13static $bare_pattern;1415if ($angle_pattern === null) {16// See T13608. A previous version of this code matched bare URIs17// starting with "\w{3,}", which can take a very long time to match18// against long inputs.19//20// Use a protocol length limit in all patterns for general sanity,21// and a negative lookbehind in the bare pattern to avoid explosive22// complexity during expression evaluation.2324$protocol_fragment = '\w{3,32}';25$uri_fragment = '[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+';2627$angle_pattern = sprintf(28'(<(%s://%s?)>)',29$protocol_fragment,30$uri_fragment);3132$curly_pattern = sprintf(33'({(%s://%s?)})',34$protocol_fragment,35$uri_fragment);3637$bare_pattern = sprintf(38'((?<!\w)%s://%s)',39$protocol_fragment,40$uri_fragment);41}4243// Hyperlinks with explicit "<>" around them get linked exactly, without44// the "<>". Angle brackets are basically special and mean "this is a URL45// with weird characters". This is assumed to be reasonable because they46// don't appear in most normal text or most normal URLs.47$text = preg_replace_callback(48$angle_pattern,49array($this, 'markupHyperlinkAngle'),50$text);5152// We match "{uri}", but do not link it by default.53$text = preg_replace_callback(54$curly_pattern,55array($this, 'markupHyperlinkCurly'),56$text);5758// Anything else we match "ungreedily", which means we'll look for59// stuff that's probably puncutation or otherwise not part of the URL and60// not link it. This lets someone write "QuicK! Go to61// http://www.example.com/!". We also apply some paren balancing rules.6263// NOTE: We're explicitly avoiding capturing stored blocks, so text like64// `http://www.example.com/[[x | y]]` doesn't get aggressively captured.6566$text = preg_replace_callback(67$bare_pattern,68array($this, 'markupHyperlinkUngreedy'),69$text);7071return $text;72}7374public function markupHyperlinkAngle(array $matches) {75return $this->markupHyperlink('<', $matches);76}7778public function markupHyperlinkCurly(array $matches) {79return $this->markupHyperlink('{', $matches);80}8182protected function markupHyperlink($mode, array $matches) {83$raw_uri = $matches[1];8485try {86$uri = new PhutilURI($raw_uri);87} catch (Exception $ex) {88return $matches[0];89}9091$engine = $this->getEngine();9293$token = $engine->storeText($raw_uri);9495$list_key = self::KEY_HYPERLINKS;96$link_list = $engine->getTextMetadata($list_key, array());9798$link_list[] = array(99'token' => $token,100'uri' => $raw_uri,101'mode' => $mode,102);103104$engine->setTextMetadata($list_key, $link_list);105106return $token;107}108109protected function renderHyperlink($link, $is_embed) {110// If the URI is "{uri}" and no handler picked it up, we just render it111// as plain text.112if ($is_embed) {113return $this->renderRawLink($link, $is_embed);114}115116$engine = $this->getEngine();117118$same_window = $engine->getConfig('uri.same-window', false);119if ($same_window) {120$target = null;121} else {122$target = '_blank';123}124125return phutil_tag(126'a',127array(128'href' => $link,129'class' => 'remarkup-link',130'target' => $target,131'rel' => 'noreferrer',132),133$link);134}135136private function renderRawLink($link, $is_embed) {137if ($is_embed) {138return '{'.$link.'}';139} else {140return $link;141}142}143144protected function markupHyperlinkUngreedy($matches) {145$match = $matches[0];146$tail = null;147$trailing = null;148if (preg_match('/[;,.:!?]+$/', $match, $trailing)) {149$tail = $trailing[0];150$match = substr($match, 0, -strlen($tail));151}152153// If there's a closing paren at the end but no balancing open paren in154// the URL, don't link the close paren. This is an attempt to gracefully155// handle the two common paren cases, Wikipedia links and English language156// parentheticals, e.g.:157//158// http://en.wikipedia.org/wiki/Noun_(disambiguation)159// (see also http://www.example.com)160//161// We could apply a craftier heuristic here which tries to actually balance162// the parens, but this is probably sufficient.163if (preg_match('/\\)$/', $match) && !preg_match('/\\(/', $match)) {164$tail = ')'.$tail;165$match = substr($match, 0, -1);166}167168try {169$uri = new PhutilURI($match);170} catch (Exception $ex) {171return $matches[0];172}173174$link = $this->markupHyperlink(null, array(null, $match));175176return hsprintf('%s%s', $link, $tail);177}178179public function didMarkupText() {180$engine = $this->getEngine();181182$protocols = $engine->getConfig('uri.allowed-protocols', array());183$is_toc = $engine->getState('toc');184$is_text = $engine->isTextMode();185$is_mail = $engine->isHTMLMailMode();186187$list_key = self::KEY_HYPERLINKS;188$raw_list = $engine->getTextMetadata($list_key, array());189190$links = array();191foreach ($raw_list as $key => $link) {192$token = $link['token'];193$raw_uri = $link['uri'];194$mode = $link['mode'];195196$is_embed = ($mode === '{');197$is_literal = ($mode === '<');198199// If we're rendering in a "Table of Contents" or a plain text mode,200// we're going to render the raw URI without modifications.201if ($is_toc || $is_text) {202$result = $this->renderRawLink($raw_uri, $is_embed);203$engine->overwriteStoredText($token, $result);204continue;205}206207// If this URI doesn't use a whitelisted protocol, don't link it. This208// is primarily intended to prevent "javascript://" silliness.209$uri = new PhutilURI($raw_uri);210$protocol = $uri->getProtocol();211$valid_protocol = idx($protocols, $protocol);212if (!$valid_protocol) {213$result = $this->renderRawLink($raw_uri, $is_embed);214$engine->overwriteStoredText($token, $result);215continue;216}217218// If the URI is written as "<uri>", we'll render it literally even if219// some handler would otherwise deal with it.220// If we're rendering for HTML mail, we also render literally.221if ($is_literal || $is_mail) {222$result = $this->renderHyperlink($raw_uri, $is_embed);223$engine->overwriteStoredText($token, $result);224continue;225}226227// Otherwise, this link is a valid resource which extensions are allowed228// to handle.229$links[$key] = $link;230}231232if (!$links) {233return;234}235236foreach ($links as $key => $link) {237$links[$key] = new PhutilRemarkupHyperlinkRef($link);238}239240$extensions = PhutilRemarkupHyperlinkEngineExtension::getAllLinkEngines();241foreach ($extensions as $extension) {242$extension = id(clone $extension)243->setEngine($engine)244->processHyperlinks($links);245246foreach ($links as $key => $link) {247$result = $link->getResult();248if ($result !== null) {249$engine->overwriteStoredText($link->getToken(), $result);250unset($links[$key]);251}252}253254if (!$links) {255break;256}257}258259// Render any remaining links in a normal way.260foreach ($links as $link) {261$result = $this->renderHyperlink($link->getURI(), $link->isEmbed());262$engine->overwriteStoredText($link->getToken(), $result);263}264}265266}267268269