Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/infrastructure/markup/blockrule/PhutilRemarkupListBlockRule.php
12241 views
1
<?php
2
3
final class PhutilRemarkupListBlockRule extends PhutilRemarkupBlockRule {
4
5
/**
6
* This rule must apply before the Code block rule because it needs to
7
* win blocks which begin ` - Lorem ipsum`.
8
*/
9
public function getPriority() {
10
return 400;
11
}
12
13
public function getMatchingLineCount(array $lines, $cursor) {
14
$num_lines = 0;
15
16
$first_line = $cursor;
17
$is_one_line = false;
18
while (isset($lines[$cursor])) {
19
if (!$num_lines) {
20
if (preg_match(self::START_BLOCK_PATTERN, $lines[$cursor])) {
21
$num_lines++;
22
$cursor++;
23
$is_one_line = true;
24
continue;
25
}
26
} else {
27
if (preg_match(self::CONT_BLOCK_PATTERN, $lines[$cursor])) {
28
$num_lines++;
29
$cursor++;
30
$is_one_line = false;
31
continue;
32
}
33
34
// Allow lists to continue across multiple paragraphs, as long as lines
35
// are indented or a single empty line separates indented lines.
36
37
$this_empty = !strlen(trim($lines[$cursor]));
38
$this_indented = preg_match('/^ /', $lines[$cursor]);
39
40
$next_empty = true;
41
$next_indented = false;
42
if (isset($lines[$cursor + 1])) {
43
$next_empty = !strlen(trim($lines[$cursor + 1]));
44
$next_indented = preg_match('/^ /', $lines[$cursor + 1]);
45
}
46
47
if ($this_empty || $this_indented) {
48
if (($this_indented && !$this_empty) ||
49
($next_indented && !$next_empty)) {
50
$num_lines++;
51
$cursor++;
52
continue;
53
}
54
}
55
56
if ($this_empty) {
57
$num_lines++;
58
}
59
}
60
61
break;
62
}
63
64
// If this list only has one item in it, and the list marker is "#", and
65
// it's not the last line in the input, parse it as a header instead of a
66
// list. This produces better behavior for alternate Markdown headers.
67
68
if ($is_one_line) {
69
if (($first_line + $num_lines) < count($lines)) {
70
if (strncmp($lines[$first_line], '#', 1) === 0) {
71
return 0;
72
}
73
}
74
}
75
76
return $num_lines;
77
}
78
79
/**
80
* The maximum sub-list depth you can nest to. Avoids silliness and blowing
81
* the stack.
82
*/
83
const MAXIMUM_LIST_NESTING_DEPTH = 12;
84
const START_BLOCK_PATTERN = '@^\s*(?:[-*#]+|([1-9][0-9]*)[.)]|\[\D?\])\s+@';
85
const CONT_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)]|\[\D?\])\s+@';
86
const STRIP_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)])\s*@';
87
88
public function markupText($text, $children) {
89
$items = array();
90
$lines = explode("\n", $text);
91
92
// We allow users to delimit lists using either differing indentation
93
// levels:
94
//
95
// - a
96
// - b
97
//
98
// ...or differing numbers of item-delimiter characters:
99
//
100
// - a
101
// -- b
102
//
103
// If they use the second style but block-indent the whole list, we'll
104
// get the depth counts wrong for the first item. To prevent this,
105
// un-indent every item by the minimum indentation level for the whole
106
// block before we begin parsing.
107
108
$regex = self::START_BLOCK_PATTERN;
109
$min_space = PHP_INT_MAX;
110
foreach ($lines as $ii => $line) {
111
$matches = null;
112
if (preg_match($regex, $line)) {
113
$regex = self::CONT_BLOCK_PATTERN;
114
if (preg_match('/^(\s+)/', $line, $matches)) {
115
$space = strlen($matches[1]);
116
} else {
117
$space = 0;
118
}
119
$min_space = min($min_space, $space);
120
}
121
}
122
123
$regex = self::START_BLOCK_PATTERN;
124
if ($min_space) {
125
foreach ($lines as $key => $line) {
126
if (preg_match($regex, $line)) {
127
$regex = self::CONT_BLOCK_PATTERN;
128
$lines[$key] = substr($line, $min_space);
129
}
130
}
131
}
132
133
134
// The input text may have linewraps in it, like this:
135
//
136
// - derp derp derp derp
137
// derp derp derp derp
138
// - blarp blarp blarp blarp
139
//
140
// Group text lines together into list items, stored in $items. So the
141
// result in the above case will be:
142
//
143
// array(
144
// array(
145
// "- derp derp derp derp",
146
// " derp derp derp derp",
147
// ),
148
// array(
149
// "- blarp blarp blarp blarp",
150
// ),
151
// );
152
153
$item = array();
154
$starts_at = null;
155
$regex = self::START_BLOCK_PATTERN;
156
foreach ($lines as $line) {
157
$match = null;
158
if (preg_match($regex, $line, $match)) {
159
if (!$starts_at && !empty($match[1])) {
160
$starts_at = $match[1];
161
}
162
$regex = self::CONT_BLOCK_PATTERN;
163
if ($item) {
164
$items[] = $item;
165
$item = array();
166
}
167
}
168
$item[] = $line;
169
}
170
if ($item) {
171
$items[] = $item;
172
}
173
if (!$starts_at) {
174
$starts_at = 1;
175
}
176
177
178
// Process each item to normalize the text, remove line wrapping, and
179
// determine its depth (indentation level) and style (ordered vs unordered).
180
//
181
// We preserve consecutive linebreaks and interpret them as paragraph
182
// breaks.
183
//
184
// Given the above example, the processed array will look like:
185
//
186
// array(
187
// array(
188
// 'text' => 'derp derp derp derp derp derp derp derp',
189
// 'depth' => 0,
190
// 'style' => '-',
191
// ),
192
// array(
193
// 'text' => 'blarp blarp blarp blarp',
194
// 'depth' => 0,
195
// 'style' => '-',
196
// ),
197
// );
198
199
$has_marks = false;
200
foreach ($items as $key => $item) {
201
// Trim space around newlines, to strip trailing whitespace and formatting
202
// indentation.
203
$item = preg_replace('/ *(\n+) */', '\1', implode("\n", $item));
204
205
// Replace single newlines with a space. Preserve multiple newlines as
206
// paragraph breaks.
207
$item = preg_replace('/(?<!\n)\n(?!\n)/', ' ', $item);
208
209
$item = rtrim($item);
210
211
if (!strlen($item)) {
212
unset($items[$key]);
213
continue;
214
}
215
216
$matches = null;
217
if (preg_match('/^\s*([-*#]{2,})/', $item, $matches)) {
218
// Alternate-style indents; use number of list item symbols.
219
$depth = strlen($matches[1]) - 1;
220
} else if (preg_match('/^(\s+)/', $item, $matches)) {
221
// Markdown-style indents; use indent depth.
222
$depth = strlen($matches[1]);
223
} else {
224
$depth = 0;
225
}
226
227
if (preg_match('/^\s*(?:#|[0-9])/', $item)) {
228
$style = '#';
229
} else {
230
$style = '-';
231
}
232
233
// Strip leading indicators off the item.
234
$text = preg_replace(self::STRIP_BLOCK_PATTERN, '', $item);
235
236
// Look for "[]", "[ ]", "[*]", "[x]", etc., which we render as a
237
// checkbox. We don't render [1], [2], etc., as checkboxes, as these
238
// are often used as footnotes.
239
$mark = null;
240
$matches = null;
241
if (preg_match('/^\s*\[(\D?)\]\s*/', $text, $matches)) {
242
if (strlen(trim($matches[1]))) {
243
$mark = true;
244
} else {
245
$mark = false;
246
}
247
$has_marks = true;
248
$text = substr($text, strlen($matches[0]));
249
}
250
251
$items[$key] = array(
252
'text' => $text,
253
'depth' => $depth,
254
'style' => $style,
255
'mark' => $mark,
256
);
257
}
258
$items = array_values($items);
259
260
261
// Users can create a sub-list by indenting any deeper amount than the
262
// previous list, so these are both valid:
263
//
264
// - a
265
// - b
266
//
267
// - a
268
// - b
269
//
270
// In the former case, we'll have depths (0, 2). In the latter case, depths
271
// (0, 4). We don't actually care about how many spaces there are, only
272
// how many list indentation levels (that is, we want to map both of
273
// those cases to (0, 1), indicating "outermost list" and "first sublist").
274
//
275
// This is made more complicated because lists at two different indentation
276
// levels might be at the same list level:
277
//
278
// - a
279
// - b
280
// - c
281
// - d
282
//
283
// Here, 'b' and 'd' are at the same list level (2) but different indent
284
// levels (2, 4).
285
//
286
// Users can also create "staircases" like this:
287
//
288
// - a
289
// - b
290
// # c
291
//
292
// While this is silly, we'd like to render it as faithfully as possible.
293
//
294
// In order to do this, we convert the list of nodes into a tree,
295
// normalizing indentation levels and inserting dummy nodes as necessary to
296
// make the tree well-formed. See additional notes at buildTree().
297
//
298
// In the case above, the result is a tree like this:
299
//
300
// - <null>
301
// - <null>
302
// - a
303
// - b
304
// # c
305
306
$l = 0;
307
$r = count($items);
308
$tree = $this->buildTree($items, $l, $r, $cur_level = 0);
309
310
311
// We may need to open a list on a <null> node, but they do not have
312
// list style information yet. We need to propagate list style information
313
// backward through the tree. In the above example, the tree now looks
314
// like this:
315
//
316
// - <null (style=#)>
317
// - <null (style=-)>
318
// - a
319
// - b
320
// # c
321
322
$this->adjustTreeStyleInformation($tree);
323
324
// Finally, we have enough information to render the tree.
325
326
$out = $this->renderTree($tree, 0, $has_marks, $starts_at);
327
328
if ($this->getEngine()->isTextMode()) {
329
$out = implode('', $out);
330
$out = rtrim($out, "\n");
331
$out = preg_replace('/ +$/m', '', $out);
332
return $out;
333
}
334
335
return phutil_implode_html('', $out);
336
}
337
338
/**
339
* See additional notes in @{method:markupText}.
340
*/
341
private function buildTree(array $items, $l, $r, $cur_level) {
342
if ($l == $r) {
343
return array();
344
}
345
346
if ($cur_level > self::MAXIMUM_LIST_NESTING_DEPTH) {
347
// This algorithm is recursive and we don't need you blowing the stack
348
// with your oh-so-clever 50,000-item-deep list. Cap indentation levels
349
// at a reasonable number and just shove everything deeper up to this
350
// level.
351
$nodes = array();
352
for ($ii = $l; $ii < $r; $ii++) {
353
$nodes[] = array(
354
'level' => $cur_level,
355
'items' => array(),
356
) + $items[$ii];
357
}
358
return $nodes;
359
}
360
361
$min = $l;
362
for ($ii = $r - 1; $ii >= $l; $ii--) {
363
if ($items[$ii]['depth'] <= $items[$min]['depth']) {
364
$min = $ii;
365
}
366
}
367
368
$min_depth = $items[$min]['depth'];
369
370
$nodes = array();
371
if ($min != $l) {
372
$nodes[] = array(
373
'text' => null,
374
'level' => $cur_level,
375
'style' => null,
376
'mark' => null,
377
'items' => $this->buildTree($items, $l, $min, $cur_level + 1),
378
);
379
}
380
381
$last = $min;
382
for ($ii = $last + 1; $ii < $r; $ii++) {
383
if ($items[$ii]['depth'] == $min_depth) {
384
$nodes[] = array(
385
'level' => $cur_level,
386
'items' => $this->buildTree($items, $last + 1, $ii, $cur_level + 1),
387
) + $items[$last];
388
$last = $ii;
389
}
390
}
391
$nodes[] = array(
392
'level' => $cur_level,
393
'items' => $this->buildTree($items, $last + 1, $r, $cur_level + 1),
394
) + $items[$last];
395
396
return $nodes;
397
}
398
399
400
/**
401
* See additional notes in @{method:markupText}.
402
*/
403
private function adjustTreeStyleInformation(array &$tree) {
404
// The effect here is just to walk backward through the nodes at this level
405
// and apply the first style in the list to any empty nodes we inserted
406
// before it. As we go, also recurse down the tree.
407
408
$style = '-';
409
for ($ii = count($tree) - 1; $ii >= 0; $ii--) {
410
if ($tree[$ii]['style'] !== null) {
411
// This is the earliest node we've seen with style, so set the
412
// style to its style.
413
$style = $tree[$ii]['style'];
414
} else {
415
// This node has no style, so apply the current style.
416
$tree[$ii]['style'] = $style;
417
}
418
if ($tree[$ii]['items']) {
419
$this->adjustTreeStyleInformation($tree[$ii]['items']);
420
}
421
}
422
}
423
424
425
/**
426
* See additional notes in @{method:markupText}.
427
*/
428
private function renderTree(
429
array $tree,
430
$level,
431
$has_marks,
432
$starts_at = 1) {
433
434
$style = idx(head($tree), 'style');
435
436
$out = array();
437
438
if (!$this->getEngine()->isTextMode()) {
439
switch ($style) {
440
case '#':
441
$tag = 'ol';
442
break;
443
case '-':
444
$tag = 'ul';
445
break;
446
}
447
448
$start_attr = null;
449
if (ctype_digit(phutil_string_cast($starts_at)) && $starts_at > 1) {
450
$start_attr = hsprintf(' start="%d"', $starts_at);
451
}
452
453
if ($has_marks) {
454
$out[] = hsprintf(
455
'<%s class="remarkup-list remarkup-list-with-checkmarks"%s>',
456
$tag,
457
$start_attr);
458
} else {
459
$out[] = hsprintf(
460
'<%s class="remarkup-list"%s>',
461
$tag,
462
$start_attr);
463
}
464
465
$out[] = "\n";
466
}
467
468
$number = $starts_at;
469
foreach ($tree as $item) {
470
if ($this->getEngine()->isTextMode()) {
471
if ($item['text'] === null) {
472
// Don't render anything.
473
} else {
474
$indent = str_repeat(' ', 2 * $level);
475
$out[] = $indent;
476
if ($item['mark'] !== null) {
477
if ($item['mark']) {
478
$out[] = '[X] ';
479
} else {
480
$out[] = '[ ] ';
481
}
482
} else {
483
switch ($style) {
484
case '#':
485
$out[] = $number.'. ';
486
$number++;
487
break;
488
case '-':
489
$out[] = '- ';
490
break;
491
}
492
}
493
494
$parts = preg_split('/\n{2,}/', $item['text']);
495
foreach ($parts as $key => $part) {
496
if ($key != 0) {
497
$out[] = "\n\n ".$indent;
498
}
499
$out[] = $this->applyRules($part);
500
}
501
$out[] = "\n";
502
}
503
} else {
504
if ($item['text'] === null) {
505
$out[] = hsprintf('<li class="remarkup-list-item phantom-item">');
506
} else {
507
if ($item['mark'] !== null) {
508
if ($item['mark'] == true) {
509
$out[] = hsprintf(
510
'<li class="remarkup-list-item remarkup-checked-item">');
511
} else {
512
$out[] = hsprintf(
513
'<li class="remarkup-list-item remarkup-unchecked-item">');
514
}
515
$out[] = phutil_tag(
516
'input',
517
array(
518
'type' => 'checkbox',
519
'checked' => ($item['mark'] ? 'checked' : null),
520
'disabled' => 'disabled',
521
));
522
$out[] = ' ';
523
} else {
524
$out[] = hsprintf('<li class="remarkup-list-item">');
525
}
526
527
$parts = preg_split('/\n{2,}/', $item['text']);
528
foreach ($parts as $key => $part) {
529
if ($key != 0) {
530
$out[] = array(
531
"\n",
532
phutil_tag('br'),
533
phutil_tag('br'),
534
"\n",
535
);
536
}
537
$out[] = $this->applyRules($part);
538
}
539
}
540
}
541
542
if ($item['items']) {
543
$subitems = $this->renderTree($item['items'], $level + 1, $has_marks);
544
foreach ($subitems as $i) {
545
$out[] = $i;
546
}
547
}
548
if (!$this->getEngine()->isTextMode()) {
549
$out[] = hsprintf("</li>\n");
550
}
551
}
552
553
if (!$this->getEngine()->isTextMode()) {
554
switch ($style) {
555
case '#':
556
$out[] = hsprintf('</ol>');
557
break;
558
case '-':
559
$out[] = hsprintf('</ul>');
560
break;
561
}
562
}
563
564
return $out;
565
}
566
567
}
568
569