Path: blob/master/src/infrastructure/markup/blockrule/PhutilRemarkupListBlockRule.php
12241 views
<?php12final class PhutilRemarkupListBlockRule extends PhutilRemarkupBlockRule {34/**5* This rule must apply before the Code block rule because it needs to6* win blocks which begin ` - Lorem ipsum`.7*/8public function getPriority() {9return 400;10}1112public function getMatchingLineCount(array $lines, $cursor) {13$num_lines = 0;1415$first_line = $cursor;16$is_one_line = false;17while (isset($lines[$cursor])) {18if (!$num_lines) {19if (preg_match(self::START_BLOCK_PATTERN, $lines[$cursor])) {20$num_lines++;21$cursor++;22$is_one_line = true;23continue;24}25} else {26if (preg_match(self::CONT_BLOCK_PATTERN, $lines[$cursor])) {27$num_lines++;28$cursor++;29$is_one_line = false;30continue;31}3233// Allow lists to continue across multiple paragraphs, as long as lines34// are indented or a single empty line separates indented lines.3536$this_empty = !strlen(trim($lines[$cursor]));37$this_indented = preg_match('/^ /', $lines[$cursor]);3839$next_empty = true;40$next_indented = false;41if (isset($lines[$cursor + 1])) {42$next_empty = !strlen(trim($lines[$cursor + 1]));43$next_indented = preg_match('/^ /', $lines[$cursor + 1]);44}4546if ($this_empty || $this_indented) {47if (($this_indented && !$this_empty) ||48($next_indented && !$next_empty)) {49$num_lines++;50$cursor++;51continue;52}53}5455if ($this_empty) {56$num_lines++;57}58}5960break;61}6263// If this list only has one item in it, and the list marker is "#", and64// it's not the last line in the input, parse it as a header instead of a65// list. This produces better behavior for alternate Markdown headers.6667if ($is_one_line) {68if (($first_line + $num_lines) < count($lines)) {69if (strncmp($lines[$first_line], '#', 1) === 0) {70return 0;71}72}73}7475return $num_lines;76}7778/**79* The maximum sub-list depth you can nest to. Avoids silliness and blowing80* the stack.81*/82const MAXIMUM_LIST_NESTING_DEPTH = 12;83const START_BLOCK_PATTERN = '@^\s*(?:[-*#]+|([1-9][0-9]*)[.)]|\[\D?\])\s+@';84const CONT_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)]|\[\D?\])\s+@';85const STRIP_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)])\s*@';8687public function markupText($text, $children) {88$items = array();89$lines = explode("\n", $text);9091// We allow users to delimit lists using either differing indentation92// levels:93//94// - a95// - b96//97// ...or differing numbers of item-delimiter characters:98//99// - a100// -- b101//102// If they use the second style but block-indent the whole list, we'll103// get the depth counts wrong for the first item. To prevent this,104// un-indent every item by the minimum indentation level for the whole105// block before we begin parsing.106107$regex = self::START_BLOCK_PATTERN;108$min_space = PHP_INT_MAX;109foreach ($lines as $ii => $line) {110$matches = null;111if (preg_match($regex, $line)) {112$regex = self::CONT_BLOCK_PATTERN;113if (preg_match('/^(\s+)/', $line, $matches)) {114$space = strlen($matches[1]);115} else {116$space = 0;117}118$min_space = min($min_space, $space);119}120}121122$regex = self::START_BLOCK_PATTERN;123if ($min_space) {124foreach ($lines as $key => $line) {125if (preg_match($regex, $line)) {126$regex = self::CONT_BLOCK_PATTERN;127$lines[$key] = substr($line, $min_space);128}129}130}131132133// The input text may have linewraps in it, like this:134//135// - derp derp derp derp136// derp derp derp derp137// - blarp blarp blarp blarp138//139// Group text lines together into list items, stored in $items. So the140// result in the above case will be:141//142// array(143// array(144// "- derp derp derp derp",145// " derp derp derp derp",146// ),147// array(148// "- blarp blarp blarp blarp",149// ),150// );151152$item = array();153$starts_at = null;154$regex = self::START_BLOCK_PATTERN;155foreach ($lines as $line) {156$match = null;157if (preg_match($regex, $line, $match)) {158if (!$starts_at && !empty($match[1])) {159$starts_at = $match[1];160}161$regex = self::CONT_BLOCK_PATTERN;162if ($item) {163$items[] = $item;164$item = array();165}166}167$item[] = $line;168}169if ($item) {170$items[] = $item;171}172if (!$starts_at) {173$starts_at = 1;174}175176177// Process each item to normalize the text, remove line wrapping, and178// determine its depth (indentation level) and style (ordered vs unordered).179//180// We preserve consecutive linebreaks and interpret them as paragraph181// breaks.182//183// Given the above example, the processed array will look like:184//185// array(186// array(187// 'text' => 'derp derp derp derp derp derp derp derp',188// 'depth' => 0,189// 'style' => '-',190// ),191// array(192// 'text' => 'blarp blarp blarp blarp',193// 'depth' => 0,194// 'style' => '-',195// ),196// );197198$has_marks = false;199foreach ($items as $key => $item) {200// Trim space around newlines, to strip trailing whitespace and formatting201// indentation.202$item = preg_replace('/ *(\n+) */', '\1', implode("\n", $item));203204// Replace single newlines with a space. Preserve multiple newlines as205// paragraph breaks.206$item = preg_replace('/(?<!\n)\n(?!\n)/', ' ', $item);207208$item = rtrim($item);209210if (!strlen($item)) {211unset($items[$key]);212continue;213}214215$matches = null;216if (preg_match('/^\s*([-*#]{2,})/', $item, $matches)) {217// Alternate-style indents; use number of list item symbols.218$depth = strlen($matches[1]) - 1;219} else if (preg_match('/^(\s+)/', $item, $matches)) {220// Markdown-style indents; use indent depth.221$depth = strlen($matches[1]);222} else {223$depth = 0;224}225226if (preg_match('/^\s*(?:#|[0-9])/', $item)) {227$style = '#';228} else {229$style = '-';230}231232// Strip leading indicators off the item.233$text = preg_replace(self::STRIP_BLOCK_PATTERN, '', $item);234235// Look for "[]", "[ ]", "[*]", "[x]", etc., which we render as a236// checkbox. We don't render [1], [2], etc., as checkboxes, as these237// are often used as footnotes.238$mark = null;239$matches = null;240if (preg_match('/^\s*\[(\D?)\]\s*/', $text, $matches)) {241if (strlen(trim($matches[1]))) {242$mark = true;243} else {244$mark = false;245}246$has_marks = true;247$text = substr($text, strlen($matches[0]));248}249250$items[$key] = array(251'text' => $text,252'depth' => $depth,253'style' => $style,254'mark' => $mark,255);256}257$items = array_values($items);258259260// Users can create a sub-list by indenting any deeper amount than the261// previous list, so these are both valid:262//263// - a264// - b265//266// - a267// - b268//269// In the former case, we'll have depths (0, 2). In the latter case, depths270// (0, 4). We don't actually care about how many spaces there are, only271// how many list indentation levels (that is, we want to map both of272// those cases to (0, 1), indicating "outermost list" and "first sublist").273//274// This is made more complicated because lists at two different indentation275// levels might be at the same list level:276//277// - a278// - b279// - c280// - d281//282// Here, 'b' and 'd' are at the same list level (2) but different indent283// levels (2, 4).284//285// Users can also create "staircases" like this:286//287// - a288// - b289// # c290//291// While this is silly, we'd like to render it as faithfully as possible.292//293// In order to do this, we convert the list of nodes into a tree,294// normalizing indentation levels and inserting dummy nodes as necessary to295// make the tree well-formed. See additional notes at buildTree().296//297// In the case above, the result is a tree like this:298//299// - <null>300// - <null>301// - a302// - b303// # c304305$l = 0;306$r = count($items);307$tree = $this->buildTree($items, $l, $r, $cur_level = 0);308309310// We may need to open a list on a <null> node, but they do not have311// list style information yet. We need to propagate list style information312// backward through the tree. In the above example, the tree now looks313// like this:314//315// - <null (style=#)>316// - <null (style=-)>317// - a318// - b319// # c320321$this->adjustTreeStyleInformation($tree);322323// Finally, we have enough information to render the tree.324325$out = $this->renderTree($tree, 0, $has_marks, $starts_at);326327if ($this->getEngine()->isTextMode()) {328$out = implode('', $out);329$out = rtrim($out, "\n");330$out = preg_replace('/ +$/m', '', $out);331return $out;332}333334return phutil_implode_html('', $out);335}336337/**338* See additional notes in @{method:markupText}.339*/340private function buildTree(array $items, $l, $r, $cur_level) {341if ($l == $r) {342return array();343}344345if ($cur_level > self::MAXIMUM_LIST_NESTING_DEPTH) {346// This algorithm is recursive and we don't need you blowing the stack347// with your oh-so-clever 50,000-item-deep list. Cap indentation levels348// at a reasonable number and just shove everything deeper up to this349// level.350$nodes = array();351for ($ii = $l; $ii < $r; $ii++) {352$nodes[] = array(353'level' => $cur_level,354'items' => array(),355) + $items[$ii];356}357return $nodes;358}359360$min = $l;361for ($ii = $r - 1; $ii >= $l; $ii--) {362if ($items[$ii]['depth'] <= $items[$min]['depth']) {363$min = $ii;364}365}366367$min_depth = $items[$min]['depth'];368369$nodes = array();370if ($min != $l) {371$nodes[] = array(372'text' => null,373'level' => $cur_level,374'style' => null,375'mark' => null,376'items' => $this->buildTree($items, $l, $min, $cur_level + 1),377);378}379380$last = $min;381for ($ii = $last + 1; $ii < $r; $ii++) {382if ($items[$ii]['depth'] == $min_depth) {383$nodes[] = array(384'level' => $cur_level,385'items' => $this->buildTree($items, $last + 1, $ii, $cur_level + 1),386) + $items[$last];387$last = $ii;388}389}390$nodes[] = array(391'level' => $cur_level,392'items' => $this->buildTree($items, $last + 1, $r, $cur_level + 1),393) + $items[$last];394395return $nodes;396}397398399/**400* See additional notes in @{method:markupText}.401*/402private function adjustTreeStyleInformation(array &$tree) {403// The effect here is just to walk backward through the nodes at this level404// and apply the first style in the list to any empty nodes we inserted405// before it. As we go, also recurse down the tree.406407$style = '-';408for ($ii = count($tree) - 1; $ii >= 0; $ii--) {409if ($tree[$ii]['style'] !== null) {410// This is the earliest node we've seen with style, so set the411// style to its style.412$style = $tree[$ii]['style'];413} else {414// This node has no style, so apply the current style.415$tree[$ii]['style'] = $style;416}417if ($tree[$ii]['items']) {418$this->adjustTreeStyleInformation($tree[$ii]['items']);419}420}421}422423424/**425* See additional notes in @{method:markupText}.426*/427private function renderTree(428array $tree,429$level,430$has_marks,431$starts_at = 1) {432433$style = idx(head($tree), 'style');434435$out = array();436437if (!$this->getEngine()->isTextMode()) {438switch ($style) {439case '#':440$tag = 'ol';441break;442case '-':443$tag = 'ul';444break;445}446447$start_attr = null;448if (ctype_digit(phutil_string_cast($starts_at)) && $starts_at > 1) {449$start_attr = hsprintf(' start="%d"', $starts_at);450}451452if ($has_marks) {453$out[] = hsprintf(454'<%s class="remarkup-list remarkup-list-with-checkmarks"%s>',455$tag,456$start_attr);457} else {458$out[] = hsprintf(459'<%s class="remarkup-list"%s>',460$tag,461$start_attr);462}463464$out[] = "\n";465}466467$number = $starts_at;468foreach ($tree as $item) {469if ($this->getEngine()->isTextMode()) {470if ($item['text'] === null) {471// Don't render anything.472} else {473$indent = str_repeat(' ', 2 * $level);474$out[] = $indent;475if ($item['mark'] !== null) {476if ($item['mark']) {477$out[] = '[X] ';478} else {479$out[] = '[ ] ';480}481} else {482switch ($style) {483case '#':484$out[] = $number.'. ';485$number++;486break;487case '-':488$out[] = '- ';489break;490}491}492493$parts = preg_split('/\n{2,}/', $item['text']);494foreach ($parts as $key => $part) {495if ($key != 0) {496$out[] = "\n\n ".$indent;497}498$out[] = $this->applyRules($part);499}500$out[] = "\n";501}502} else {503if ($item['text'] === null) {504$out[] = hsprintf('<li class="remarkup-list-item phantom-item">');505} else {506if ($item['mark'] !== null) {507if ($item['mark'] == true) {508$out[] = hsprintf(509'<li class="remarkup-list-item remarkup-checked-item">');510} else {511$out[] = hsprintf(512'<li class="remarkup-list-item remarkup-unchecked-item">');513}514$out[] = phutil_tag(515'input',516array(517'type' => 'checkbox',518'checked' => ($item['mark'] ? 'checked' : null),519'disabled' => 'disabled',520));521$out[] = ' ';522} else {523$out[] = hsprintf('<li class="remarkup-list-item">');524}525526$parts = preg_split('/\n{2,}/', $item['text']);527foreach ($parts as $key => $part) {528if ($key != 0) {529$out[] = array(530"\n",531phutil_tag('br'),532phutil_tag('br'),533"\n",534);535}536$out[] = $this->applyRules($part);537}538}539}540541if ($item['items']) {542$subitems = $this->renderTree($item['items'], $level + 1, $has_marks);543foreach ($subitems as $i) {544$out[] = $i;545}546}547if (!$this->getEngine()->isTextMode()) {548$out[] = hsprintf("</li>\n");549}550}551552if (!$this->getEngine()->isTextMode()) {553switch ($style) {554case '#':555$out[] = hsprintf('</ol>');556break;557case '-':558$out[] = hsprintf('</ul>');559break;560}561}562563return $out;564}565566}567568569