Path: blob/master/src/infrastructure/markup/syntax/highlighter/xhpast/PhutilXHPASTSyntaxHighlighterFuture.php
13473 views
<?php12final class PhutilXHPASTSyntaxHighlighterFuture extends FutureProxy {34private $source;5private $scrub;67public function __construct(Future $proxied, $source, $scrub = false) {8parent::__construct($proxied);9$this->source = $source;10$this->scrub = $scrub;11}1213protected function didReceiveResult($result) {14try {15return $this->applyXHPHighlight($result);16} catch (Exception $ex) {17// XHP can't highlight source that isn't syntactically valid. Fall back18// to the fragment lexer.19$source = ($this->scrub20? preg_replace('/^.*\n/', '', $this->source)21: $this->source);22return id(new PhutilLexerSyntaxHighlighter())23->setConfig('lexer', new PhutilPHPFragmentLexer())24->setConfig('language', 'php')25->getHighlightFuture($source)26->resolve();27}28}2930private function applyXHPHighlight($result) {3132// We perform two passes here: one using the AST to find symbols we care33// about -- particularly, class names and function names. These are used34// in the crossreference stuff to link into Diffusion. After we've done our35// AST pass, we do a followup pass on the token stream to catch all the36// simple stuff like strings and comments.3738$tree = XHPASTTree::newFromDataAndResolvedExecFuture(39$this->source,40$result);4142$root = $tree->getRootNode();4344$tokens = $root->getTokens();45$interesting_symbols = $this->findInterestingSymbols($root);464748if ($this->scrub) {49// If we're scrubbing, we prepended "<?php\n" to the text to force the50// highlighter to treat it as PHP source. Now, we need to remove that.5152$ok = false;53if (count($tokens) >= 2) {54if ($tokens[0]->getTypeName() === 'T_OPEN_TAG') {55if ($tokens[1]->getTypeName() === 'T_WHITESPACE') {56$ok = true;57}58}59}6061if (!$ok) {62throw new Exception(63pht(64'Expected T_OPEN_TAG, T_WHITESPACE tokens at head of results '.65'for highlighting parse of PHP snippet.'));66}6768// Remove the "<?php".69unset($tokens[0]);7071$value = $tokens[1]->getValue();72if ((strlen($value) < 1) || ($value[0] != "\n")) {73throw new Exception(74pht(75'Expected "\\n" at beginning of T_WHITESPACE token at head of '.76'tokens for highlighting parse of PHP snippet.'));77}7879$value = substr($value, 1);80$tokens[1]->overwriteValue($value);81}8283$out = array();84foreach ($tokens as $key => $token) {85$value = $token->getValue();86$class = null;87$multi = false;88$attrs = array();89if (isset($interesting_symbols[$key])) {90$sym = $interesting_symbols[$key];91$class = $sym[0];92$attrs['data-symbol-context'] = idx($sym, 'context');93$attrs['data-symbol-name'] = idx($sym, 'symbol');94} else {95switch ($token->getTypeName()) {96case 'T_WHITESPACE':97break;98case 'T_DOC_COMMENT':99$class = 'dc';100$multi = true;101break;102case 'T_COMMENT':103$class = 'c';104$multi = true;105break;106case 'T_CONSTANT_ENCAPSED_STRING':107case 'T_ENCAPSED_AND_WHITESPACE':108case 'T_INLINE_HTML':109$class = 's';110$multi = true;111break;112case 'T_VARIABLE':113$class = 'nv';114break;115case 'T_OPEN_TAG':116case 'T_OPEN_TAG_WITH_ECHO':117case 'T_CLOSE_TAG':118$class = 'o';119break;120case 'T_LNUMBER':121case 'T_DNUMBER':122$class = 'm';123break;124case 'T_STRING':125static $magic = array(126'true' => true,127'false' => true,128'null' => true,129);130if (isset($magic[strtolower($value)])) {131$class = 'k';132break;133}134$class = 'nx';135break;136default:137$class = 'k';138break;139}140}141142if ($class) {143$attrs['class'] = $class;144if ($multi) {145// If the token may have multiple lines in it, make sure each146// <span> crosses no more than one line so the lines can be put147// in a table, etc., later.148$value = phutil_split_lines($value, $retain_endings = true);149} else {150$value = array($value);151}152foreach ($value as $val) {153$out[] = phutil_tag('span', $attrs, $val);154}155} else {156$out[] = $value;157}158}159160return phutil_implode_html('', $out);161}162163private function findInterestingSymbols(XHPASTNode $root) {164// Class name symbols appear in:165// class X extends X implements X, X { ... }166// new X();167// $x instanceof X168// catch (X $x)169// function f(X $x)170// X::f();171// X::$m;172// X::CONST;173174// These are PHP builtin tokens which can appear in a classname context.175// Don't link them since they don't go anywhere useful.176static $builtin_class_tokens = array(177'self' => true,178'parent' => true,179'static' => true,180);181182// Fortunately XHPAST puts all of these in a special node type so it's183// easy to find them.184$result_map = array();185$class_names = $root->selectDescendantsOfType('n_CLASS_NAME');186foreach ($class_names as $class_name) {187foreach ($class_name->getTokens() as $key => $token) {188if (isset($builtin_class_tokens[$token->getValue()])) {189// This is something like "self::method()".190continue;191}192$result_map[$key] = array(193'nc', // "Name, Class"194'symbol' => $class_name->getConcreteString(),195);196}197}198199// Function name symbols appear in:200// f()201202$function_calls = $root->selectDescendantsOfType('n_FUNCTION_CALL');203foreach ($function_calls as $call) {204$call = $call->getChildByIndex(0);205if ($call->getTypeName() == 'n_SYMBOL_NAME') {206// This is a normal function call, not some $f() shenanigans.207foreach ($call->getTokens() as $key => $token) {208$result_map[$key] = array(209'nf', // "Name, Function"210'symbol' => $call->getConcreteString(),211);212}213}214}215216// Upon encountering $x->y, link y without context, since $x is unknown.217218$prop_access = $root->selectDescendantsOfType('n_OBJECT_PROPERTY_ACCESS');219foreach ($prop_access as $access) {220$right = $access->getChildByIndex(1);221if ($right->getTypeName() == 'n_INDEX_ACCESS') {222// otherwise $x->y[0] doesn't get highlighted223$right = $right->getChildByIndex(0);224}225if ($right->getTypeName() == 'n_STRING') {226foreach ($right->getTokens() as $key => $token) {227$result_map[$key] = array(228'na', // "Name, Attribute"229'symbol' => $right->getConcreteString(),230);231}232}233}234235// Upon encountering x::y, try to link y with context x.236237$static_access = $root->selectDescendantsOfType('n_CLASS_STATIC_ACCESS');238foreach ($static_access as $access) {239$class = $access->getChildByIndex(0);240$right = $access->getChildByIndex(1);241if ($class->getTypeName() == 'n_CLASS_NAME' &&242($right->getTypeName() == 'n_STRING' ||243$right->getTypeName() == 'n_VARIABLE')) {244$classname = head($class->getTokens())->getValue();245$result = array(246'na',247'symbol' => ltrim($right->getConcreteString(), '$'),248);249if (!isset($builtin_class_tokens[$classname])) {250$result['context'] = $classname;251}252foreach ($right->getTokens() as $key => $token) {253$result_map[$key] = $result;254}255}256}257258return $result_map;259}260261}262263264