Path: blob/master/src/infrastructure/markup/PhabricatorMarkupEngine.php
12241 views
<?php12/**3* Manages markup engine selection, configuration, application, caching and4* pipelining.5*6* @{class:PhabricatorMarkupEngine} can be used to render objects which7* implement @{interface:PhabricatorMarkupInterface} in a batched, cache-aware8* way. For example, if you have a list of comments written in remarkup (and9* the objects implement the correct interface) you can render them by first10* building an engine and adding the fields with @{method:addObject}.11*12* $field = 'field:body'; // Field you want to render. Each object exposes13* // one or more fields of markup.14*15* $engine = new PhabricatorMarkupEngine();16* foreach ($comments as $comment) {17* $engine->addObject($comment, $field);18* }19*20* Now, call @{method:process} to perform the actual cache/rendering21* step. This is a heavyweight call which does batched data access and22* transforms the markup into output.23*24* $engine->process();25*26* Finally, do something with the results:27*28* $results = array();29* foreach ($comments as $comment) {30* $results[] = $engine->getOutput($comment, $field);31* }32*33* If you have a single object to render, you can use the convenience method34* @{method:renderOneObject}.35*36* @task markup Markup Pipeline37* @task engine Engine Construction38*/39final class PhabricatorMarkupEngine extends Phobject {4041private $objects = array();42private $viewer;43private $contextObject;44private $version = 21;45private $engineCaches = array();46private $auxiliaryConfig = array();4748private static $engineStack = array();495051/* -( Markup Pipeline )---------------------------------------------------- */525354/**55* Convenience method for pushing a single object through the markup56* pipeline.57*58* @param PhabricatorMarkupInterface The object to render.59* @param string The field to render.60* @param PhabricatorUser User viewing the markup.61* @param object A context object for policy checks62* @return string Marked up output.63* @task markup64*/65public static function renderOneObject(66PhabricatorMarkupInterface $object,67$field,68PhabricatorUser $viewer,69$context_object = null) {70return id(new PhabricatorMarkupEngine())71->setViewer($viewer)72->setContextObject($context_object)73->addObject($object, $field)74->process()75->getOutput($object, $field);76}777879/**80* Queue an object for markup generation when @{method:process} is81* called. You can retrieve the output later with @{method:getOutput}.82*83* @param PhabricatorMarkupInterface The object to render.84* @param string The field to render.85* @return this86* @task markup87*/88public function addObject(PhabricatorMarkupInterface $object, $field) {89$key = $this->getMarkupFieldKey($object, $field);90$this->objects[$key] = array(91'object' => $object,92'field' => $field,93);9495return $this;96}979899/**100* Process objects queued with @{method:addObject}. You can then retrieve101* the output with @{method:getOutput}.102*103* @return this104* @task markup105*/106public function process() {107self::$engineStack[] = $this;108109try {110$result = $this->execute();111} finally {112array_pop(self::$engineStack);113}114115return $result;116}117118public static function isRenderingEmbeddedContent() {119// See T13678. This prevents cycles when rendering embedded content that120// itself has remarkup fields.121return (count(self::$engineStack) > 1);122}123124private function execute() {125$keys = array();126foreach ($this->objects as $key => $info) {127if (!isset($info['markup'])) {128$keys[] = $key;129}130}131132if (!$keys) {133return $this;134}135136$objects = array_select_keys($this->objects, $keys);137138// Build all the markup engines. We need an engine for each field whether139// we have a cache or not, since we still need to postprocess the cache.140$engines = array();141foreach ($objects as $key => $info) {142$engines[$key] = $info['object']->newMarkupEngine($info['field']);143$engines[$key]->setConfig('viewer', $this->viewer);144$engines[$key]->setConfig('contextObject', $this->contextObject);145146foreach ($this->auxiliaryConfig as $aux_key => $aux_value) {147$engines[$key]->setConfig($aux_key, $aux_value);148}149}150151// Load or build the preprocessor caches.152$blocks = $this->loadPreprocessorCaches($engines, $objects);153$blocks = mpull($blocks, 'getCacheData');154155$this->engineCaches = $blocks;156157// Finalize the output.158foreach ($objects as $key => $info) {159$engine = $engines[$key];160$field = $info['field'];161$object = $info['object'];162163$output = $engine->postprocessText($blocks[$key]);164$output = $object->didMarkupText($field, $output, $engine);165$this->objects[$key]['output'] = $output;166}167168return $this;169}170171172/**173* Get the output of markup processing for a field queued with174* @{method:addObject}. Before you can call this method, you must call175* @{method:process}.176*177* @param PhabricatorMarkupInterface The object to retrieve.178* @param string The field to retrieve.179* @return string Processed output.180* @task markup181*/182public function getOutput(PhabricatorMarkupInterface $object, $field) {183$key = $this->getMarkupFieldKey($object, $field);184$this->requireKeyProcessed($key);185186return $this->objects[$key]['output'];187}188189190/**191* Retrieve engine metadata for a given field.192*193* @param PhabricatorMarkupInterface The object to retrieve.194* @param string The field to retrieve.195* @param string The engine metadata field to retrieve.196* @param wild Optional default value.197* @task markup198*/199public function getEngineMetadata(200PhabricatorMarkupInterface $object,201$field,202$metadata_key,203$default = null) {204205$key = $this->getMarkupFieldKey($object, $field);206$this->requireKeyProcessed($key);207208return idx($this->engineCaches[$key]['metadata'], $metadata_key, $default);209}210211212/**213* @task markup214*/215private function requireKeyProcessed($key) {216if (empty($this->objects[$key])) {217throw new Exception(218pht(219"Call %s before using results (key = '%s').",220'addObject()',221$key));222}223224if (!isset($this->objects[$key]['output'])) {225throw new PhutilInvalidStateException('process');226}227}228229230/**231* @task markup232*/233private function getMarkupFieldKey(234PhabricatorMarkupInterface $object,235$field) {236237static $custom;238if ($custom === null) {239$custom = array_merge(240self::loadCustomInlineRules(),241self::loadCustomBlockRules());242243$custom = mpull($custom, 'getRuleVersion', null);244ksort($custom);245$custom = PhabricatorHash::digestForIndex(serialize($custom));246}247248return $object->getMarkupFieldKey($field).'@'.$this->version.'@'.$custom;249}250251252/**253* @task markup254*/255private function loadPreprocessorCaches(array $engines, array $objects) {256$blocks = array();257258$use_cache = array();259foreach ($objects as $key => $info) {260if ($info['object']->shouldUseMarkupCache($info['field'])) {261$use_cache[$key] = true;262}263}264265if ($use_cache) {266try {267$blocks = id(new PhabricatorMarkupCache())->loadAllWhere(268'cacheKey IN (%Ls)',269array_keys($use_cache));270$blocks = mpull($blocks, null, 'getCacheKey');271} catch (Exception $ex) {272phlog($ex);273}274}275276$is_readonly = PhabricatorEnv::isReadOnly();277278foreach ($objects as $key => $info) {279// False check in case MySQL doesn't support unicode characters280// in the string (T1191), resulting in unserialize returning false.281if (isset($blocks[$key]) && $blocks[$key]->getCacheData() !== false) {282// If we already have a preprocessing cache, we don't need to rebuild283// it.284continue;285}286287$text = $info['object']->getMarkupText($info['field']);288$data = $engines[$key]->preprocessText($text);289290// NOTE: This is just debugging information to help sort out cache issues.291// If one machine is misconfigured and poisoning caches you can use this292// field to hunt it down.293294$metadata = array(295'host' => php_uname('n'),296);297298$blocks[$key] = id(new PhabricatorMarkupCache())299->setCacheKey($key)300->setCacheData($data)301->setMetadata($metadata);302303if (isset($use_cache[$key]) && !$is_readonly) {304// This is just filling a cache and always safe, even on a read pathway.305$unguarded = AphrontWriteGuard::beginScopedUnguardedWrites();306$blocks[$key]->replace();307unset($unguarded);308}309}310311return $blocks;312}313314315/**316* Set the viewing user. Used to implement object permissions.317*318* @param PhabricatorUser The viewing user.319* @return this320* @task markup321*/322public function setViewer(PhabricatorUser $viewer) {323$this->viewer = $viewer;324return $this;325}326327/**328* Set the context object. Used to implement object permissions.329*330* @param The object in which context this remarkup is used.331* @return this332* @task markup333*/334public function setContextObject($object) {335$this->contextObject = $object;336return $this;337}338339public function setAuxiliaryConfig($key, $value) {340// TODO: This is gross and should be removed. Avoid use.341$this->auxiliaryConfig[$key] = $value;342return $this;343}344345346/* -( Engine Construction )------------------------------------------------ */347348349350/**351* @task engine352*/353public static function newManiphestMarkupEngine() {354return self::newMarkupEngine(array(355));356}357358359/**360* @task engine361*/362public static function newPhrictionMarkupEngine() {363return self::newMarkupEngine(array(364'header.generate-toc' => true,365));366}367368369/**370* @task engine371*/372public static function newPhameMarkupEngine() {373return self::newMarkupEngine(374array(375'macros' => false,376'uri.full' => true,377'uri.same-window' => true,378'uri.base' => PhabricatorEnv::getURI('/'),379));380}381382383/**384* @task engine385*/386public static function newFeedMarkupEngine() {387return self::newMarkupEngine(388array(389'macros' => false,390'youtube' => false,391));392}393394/**395* @task engine396*/397public static function newCalendarMarkupEngine() {398return self::newMarkupEngine(array(399));400}401402403/**404* @task engine405*/406public static function newDifferentialMarkupEngine(array $options = array()) {407return self::newMarkupEngine(array(408'differential.diff' => idx($options, 'differential.diff'),409));410}411412413/**414* @task engine415*/416public static function newDiffusionMarkupEngine(array $options = array()) {417return self::newMarkupEngine(array(418'header.generate-toc' => true,419));420}421422/**423* @task engine424*/425public static function getEngine($ruleset = 'default') {426static $engines = array();427if (isset($engines[$ruleset])) {428return $engines[$ruleset];429}430431$engine = null;432switch ($ruleset) {433case 'default':434$engine = self::newMarkupEngine(array());435break;436case 'feed':437$engine = self::newMarkupEngine(array());438$engine->setConfig('autoplay.disable', true);439break;440case 'nolinebreaks':441$engine = self::newMarkupEngine(array());442$engine->setConfig('preserve-linebreaks', false);443break;444case 'diffusion-readme':445$engine = self::newMarkupEngine(array());446$engine->setConfig('preserve-linebreaks', false);447$engine->setConfig('header.generate-toc', true);448break;449case 'diviner':450$engine = self::newMarkupEngine(array());451$engine->setConfig('preserve-linebreaks', false);452// $engine->setConfig('diviner.renderer', new DivinerDefaultRenderer());453$engine->setConfig('header.generate-toc', true);454break;455case 'extract':456// Engine used for reference/edge extraction. Turn off anything which457// is slow and doesn't change reference extraction.458$engine = self::newMarkupEngine(array());459$engine->setConfig('pygments.enabled', false);460break;461default:462throw new Exception(pht('Unknown engine ruleset: %s!', $ruleset));463}464465$engines[$ruleset] = $engine;466return $engine;467}468469/**470* @task engine471*/472private static function getMarkupEngineDefaultConfiguration() {473return array(474'pygments' => PhabricatorEnv::getEnvConfig('pygments.enabled'),475'youtube' => PhabricatorEnv::getEnvConfig(476'remarkup.enable-embedded-youtube'),477'differential.diff' => null,478'header.generate-toc' => false,479'macros' => true,480'uri.allowed-protocols' => PhabricatorEnv::getEnvConfig(481'uri.allowed-protocols'),482'uri.full' => false,483'syntax-highlighter.engine' => PhabricatorEnv::getEnvConfig(484'syntax-highlighter.engine'),485'preserve-linebreaks' => true,486);487}488489490/**491* @task engine492*/493public static function newMarkupEngine(array $options) {494$options += self::getMarkupEngineDefaultConfiguration();495496$engine = new PhutilRemarkupEngine();497498$engine->setConfig('preserve-linebreaks', $options['preserve-linebreaks']);499500$engine->setConfig('pygments.enabled', $options['pygments']);501$engine->setConfig(502'uri.allowed-protocols',503$options['uri.allowed-protocols']);504$engine->setConfig('differential.diff', $options['differential.diff']);505$engine->setConfig('header.generate-toc', $options['header.generate-toc']);506$engine->setConfig(507'syntax-highlighter.engine',508$options['syntax-highlighter.engine']);509510$style_map = id(new PhabricatorDefaultSyntaxStyle())511->getRemarkupStyleMap();512$engine->setConfig('phutil.codeblock.style-map', $style_map);513514$engine->setConfig('uri.full', $options['uri.full']);515516if (isset($options['uri.base'])) {517$engine->setConfig('uri.base', $options['uri.base']);518}519520if (isset($options['uri.same-window'])) {521$engine->setConfig('uri.same-window', $options['uri.same-window']);522}523524$rules = array();525$rules[] = new PhutilRemarkupEscapeRemarkupRule();526$rules[] = new PhutilRemarkupEvalRule();527$rules[] = new PhutilRemarkupMonospaceRule();528529530$rules[] = new PhutilRemarkupDocumentLinkRule();531$rules[] = new PhabricatorNavigationRemarkupRule();532$rules[] = new PhabricatorKeyboardRemarkupRule();533$rules[] = new PhabricatorConfigRemarkupRule();534535if ($options['youtube']) {536$rules[] = new PhabricatorYoutubeRemarkupRule();537}538539$rules[] = new PhabricatorIconRemarkupRule();540$rules[] = new PhabricatorEmojiRemarkupRule();541$rules[] = new PhabricatorHandleRemarkupRule();542543$applications = PhabricatorApplication::getAllInstalledApplications();544foreach ($applications as $application) {545foreach ($application->getRemarkupRules() as $rule) {546$rules[] = $rule;547}548}549550$rules[] = new PhutilRemarkupHyperlinkRule();551552if ($options['macros']) {553$rules[] = new PhabricatorImageMacroRemarkupRule();554$rules[] = new PhabricatorMemeRemarkupRule();555}556557$rules[] = new PhutilRemarkupBoldRule();558$rules[] = new PhutilRemarkupItalicRule();559$rules[] = new PhutilRemarkupDelRule();560$rules[] = new PhutilRemarkupUnderlineRule();561$rules[] = new PhutilRemarkupHighlightRule();562$rules[] = new PhutilRemarkupAnchorRule();563564foreach (self::loadCustomInlineRules() as $rule) {565$rules[] = clone $rule;566}567568$blocks = array();569$blocks[] = new PhutilRemarkupQuotesBlockRule();570$blocks[] = new PhutilRemarkupReplyBlockRule();571$blocks[] = new PhutilRemarkupLiteralBlockRule();572$blocks[] = new PhutilRemarkupHeaderBlockRule();573$blocks[] = new PhutilRemarkupHorizontalRuleBlockRule();574$blocks[] = new PhutilRemarkupListBlockRule();575$blocks[] = new PhutilRemarkupCodeBlockRule();576$blocks[] = new PhutilRemarkupNoteBlockRule();577$blocks[] = new PhutilRemarkupTableBlockRule();578$blocks[] = new PhutilRemarkupSimpleTableBlockRule();579$blocks[] = new PhutilRemarkupInterpreterBlockRule();580$blocks[] = new PhutilRemarkupDefaultBlockRule();581582foreach (self::loadCustomBlockRules() as $rule) {583$blocks[] = $rule;584}585586foreach ($blocks as $block) {587$block->setMarkupRules($rules);588}589590$engine->setBlockRules($blocks);591592return $engine;593}594595public static function extractPHIDsFromMentions(596PhabricatorUser $viewer,597array $content_blocks) {598599$mentions = array();600601$engine = self::newDifferentialMarkupEngine();602$engine->setConfig('viewer', $viewer);603604foreach ($content_blocks as $content_block) {605if ($content_block === null) {606continue;607}608609if (!strlen($content_block)) {610continue;611}612613$engine->markupText($content_block);614$phids = $engine->getTextMetadata(615PhabricatorMentionRemarkupRule::KEY_MENTIONED,616array());617$mentions += $phids;618}619620return $mentions;621}622623public static function extractFilePHIDsFromEmbeddedFiles(624PhabricatorUser $viewer,625array $content_blocks) {626$files = array();627628$engine = self::newDifferentialMarkupEngine();629$engine->setConfig('viewer', $viewer);630631foreach ($content_blocks as $content_block) {632$engine->markupText($content_block);633$phids = $engine->getTextMetadata(634PhabricatorEmbedFileRemarkupRule::KEY_ATTACH_INTENT_FILE_PHIDS,635array());636foreach ($phids as $phid) {637$files[$phid] = $phid;638}639}640641return array_values($files);642}643644public static function summarizeSentence($corpus) {645$corpus = trim($corpus);646$blocks = preg_split('/\n+/', $corpus, 2);647$block = head($blocks);648649$sentences = preg_split(650'/\b([.?!]+)\B/u',651$block,6522,653PREG_SPLIT_DELIM_CAPTURE);654655if (count($sentences) > 1) {656$result = $sentences[0].$sentences[1];657} else {658$result = head($sentences);659}660661return id(new PhutilUTF8StringTruncator())662->setMaximumGlyphs(128)663->truncateString($result);664}665666/**667* Produce a corpus summary, in a way that shortens the underlying text668* without truncating it somewhere awkward.669*670* TODO: We could do a better job of this.671*672* @param string Remarkup corpus to summarize.673* @return string Summarized corpus.674*/675public static function summarize($corpus) {676677// Major goals here are:678// - Don't split in the middle of a character (utf-8).679// - Don't split in the middle of, e.g., **bold** text, since680// we end up with hanging '**' in the summary.681// - Try not to pick an image macro, header, embedded file, etc.682// - Hopefully don't return too much text. We don't explicitly limit683// this right now.684685$blocks = preg_split("/\n *\n\s*/", $corpus);686687$best = null;688foreach ($blocks as $block) {689// This is a test for normal spaces in the block, i.e. a heuristic to690// distinguish standard paragraphs from things like image macros. It may691// not work well for non-latin text. We prefer to summarize with a692// paragraph of normal words over an image macro, if possible.693$has_space = preg_match('/\w\s\w/', $block);694695// This is a test to find embedded images and headers. We prefer to696// summarize with a normal paragraph over a header or an embedded object,697// if possible.698$has_embed = preg_match('/^[{=]/', $block);699700if ($has_space && !$has_embed) {701// This seems like a good summary, so return it.702return $block;703}704705if (!$best) {706// This is the first block we found; if everything is garbage just707// use the first block.708$best = $block;709}710}711712return $best;713}714715private static function loadCustomInlineRules() {716return id(new PhutilClassMapQuery())717->setAncestorClass('PhabricatorRemarkupCustomInlineRule')718->execute();719}720721private static function loadCustomBlockRules() {722return id(new PhutilClassMapQuery())723->setAncestorClass('PhabricatorRemarkupCustomBlockRule')724->execute();725}726727public static function digestRemarkupContent($object, $content) {728$parts = array();729$parts[] = get_class($object);730731if ($object instanceof PhabricatorLiskDAO) {732$parts[] = $object->getID();733}734735$parts[] = $content;736737$message = implode("\n", $parts);738739return PhabricatorHash::digestWithNamedKey($message, 'remarkup');740}741742}743744745