Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/applications/files/document/PhabricatorJupyterDocumentEngine.php
12241 views
1
<?php
2
3
final class PhabricatorJupyterDocumentEngine
4
extends PhabricatorDocumentEngine {
5
6
const ENGINEKEY = 'jupyter';
7
8
public function getViewAsLabel(PhabricatorDocumentRef $ref) {
9
return pht('View as Jupyter Notebook');
10
}
11
12
protected function getDocumentIconIcon(PhabricatorDocumentRef $ref) {
13
return 'fa-sun-o';
14
}
15
16
protected function getDocumentRenderingText(PhabricatorDocumentRef $ref) {
17
return pht('Rendering Jupyter Notebook...');
18
}
19
20
public function shouldRenderAsync(PhabricatorDocumentRef $ref) {
21
return true;
22
}
23
24
protected function getContentScore(PhabricatorDocumentRef $ref) {
25
$name = $ref->getName();
26
27
if (preg_match('/\\.ipynb\z/i', $name)) {
28
return 2000;
29
}
30
31
return 500;
32
}
33
34
protected function canRenderDocumentType(PhabricatorDocumentRef $ref) {
35
return $ref->isProbablyJSON();
36
}
37
38
public function canDiffDocuments(
39
PhabricatorDocumentRef $uref = null,
40
PhabricatorDocumentRef $vref = null) {
41
return true;
42
}
43
44
public function newEngineBlocks(
45
PhabricatorDocumentRef $uref = null,
46
PhabricatorDocumentRef $vref = null) {
47
48
$blocks = new PhabricatorDocumentEngineBlocks();
49
50
try {
51
if ($uref) {
52
$u_blocks = $this->newDiffBlocks($uref);
53
} else {
54
$u_blocks = array();
55
}
56
57
if ($vref) {
58
$v_blocks = $this->newDiffBlocks($vref);
59
} else {
60
$v_blocks = array();
61
}
62
63
$blocks->addBlockList($uref, $u_blocks);
64
$blocks->addBlockList($vref, $v_blocks);
65
} catch (Exception $ex) {
66
phlog($ex);
67
$blocks->addMessage($ex->getMessage());
68
}
69
70
return $blocks;
71
}
72
73
public function newBlockDiffViews(
74
PhabricatorDocumentRef $uref,
75
PhabricatorDocumentEngineBlock $ublock,
76
PhabricatorDocumentRef $vref,
77
PhabricatorDocumentEngineBlock $vblock) {
78
79
$ucell = $ublock->getContent();
80
$vcell = $vblock->getContent();
81
82
$utype = idx($ucell, 'cell_type');
83
$vtype = idx($vcell, 'cell_type');
84
85
if ($utype === $vtype) {
86
switch ($utype) {
87
case 'markdown':
88
$usource = $this->readString($ucell, 'source');
89
$vsource = $this->readString($vcell, 'source');
90
91
$diff = id(new PhutilProseDifferenceEngine())
92
->getDiff($usource, $vsource);
93
94
$u_content = $this->newProseDiffCell($diff, array('=', '-'));
95
$v_content = $this->newProseDiffCell($diff, array('=', '+'));
96
97
$u_content = $this->newJupyterCell(null, $u_content, null);
98
$v_content = $this->newJupyterCell(null, $v_content, null);
99
100
$u_content = $this->newCellContainer($u_content);
101
$v_content = $this->newCellContainer($v_content);
102
103
return id(new PhabricatorDocumentEngineBlockDiff())
104
->setOldContent($u_content)
105
->addOldClass('old')
106
->setNewContent($v_content)
107
->addNewClass('new');
108
case 'code/line':
109
$usource = idx($ucell, 'raw');
110
$vsource = idx($vcell, 'raw');
111
$udisplay = idx($ucell, 'display');
112
$vdisplay = idx($vcell, 'display');
113
114
$intraline_segments = ArcanistDiffUtils::generateIntralineDiff(
115
$usource,
116
$vsource);
117
118
$u_segments = array();
119
foreach ($intraline_segments[0] as $u_segment) {
120
$u_segments[] = $u_segment;
121
}
122
123
$v_segments = array();
124
foreach ($intraline_segments[1] as $v_segment) {
125
$v_segments[] = $v_segment;
126
}
127
128
$usource = PhabricatorDifferenceEngine::applyIntralineDiff(
129
$udisplay,
130
$u_segments);
131
132
$vsource = PhabricatorDifferenceEngine::applyIntralineDiff(
133
$vdisplay,
134
$v_segments);
135
136
list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);
137
list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);
138
139
$classes = array(
140
'jupyter-cell-flush',
141
);
142
143
$u_content = $this->newJupyterCell($u_label, $u_content, $classes);
144
$v_content = $this->newJupyterCell($v_label, $v_content, $classes);
145
146
$u_content = $this->newCellContainer($u_content);
147
$v_content = $this->newCellContainer($v_content);
148
149
return id(new PhabricatorDocumentEngineBlockDiff())
150
->setOldContent($u_content)
151
->addOldClass('old')
152
->setNewContent($v_content)
153
->addNewClass('new');
154
}
155
}
156
157
return parent::newBlockDiffViews($uref, $ublock, $vref, $vblock);
158
}
159
160
public function newBlockContentView(
161
PhabricatorDocumentRef $ref,
162
PhabricatorDocumentEngineBlock $block) {
163
164
$viewer = $this->getViewer();
165
$cell = $block->getContent();
166
167
$cell_content = $this->renderJupyterCell($viewer, $cell);
168
169
return $this->newCellContainer($cell_content);
170
}
171
172
private function newCellContainer($cell_content) {
173
$notebook_table = phutil_tag(
174
'table',
175
array(
176
'class' => 'jupyter-notebook',
177
),
178
$cell_content);
179
180
$container = phutil_tag(
181
'div',
182
array(
183
'class' => 'document-engine-jupyter document-engine-diff',
184
),
185
$notebook_table);
186
187
return $container;
188
}
189
190
private function newProseDiffCell(PhutilProseDiff $diff, array $mask) {
191
$mask = array_fuse($mask);
192
193
$result = array();
194
foreach ($diff->getParts() as $part) {
195
$type = $part['type'];
196
$text = $part['text'];
197
198
if (!isset($mask[$type])) {
199
continue;
200
}
201
202
switch ($type) {
203
case '-':
204
$result[] = phutil_tag(
205
'span',
206
array(
207
'class' => 'bright',
208
),
209
$text);
210
break;
211
case '+':
212
$result[] = phutil_tag(
213
'span',
214
array(
215
'class' => 'bright',
216
),
217
$text);
218
break;
219
case '=':
220
$result[] = $text;
221
break;
222
}
223
}
224
225
return array(
226
null,
227
phutil_tag(
228
'div',
229
array(
230
'class' => 'jupyter-cell-markdown',
231
),
232
$result),
233
);
234
}
235
236
private function newDiffBlocks(PhabricatorDocumentRef $ref) {
237
$viewer = $this->getViewer();
238
$content = $ref->loadData();
239
240
$cells = $this->newCells($content, true);
241
242
$idx = 1;
243
$blocks = array();
244
foreach ($cells as $cell) {
245
// When the cell is a source code line, we can hash just the raw
246
// input rather than all the cell metadata.
247
248
switch (idx($cell, 'cell_type')) {
249
case 'code/line':
250
$hash_input = $cell['raw'];
251
break;
252
case 'markdown':
253
$hash_input = $this->readString($cell, 'source');
254
break;
255
default:
256
$hash_input = serialize($cell);
257
break;
258
}
259
260
$hash = PhabricatorHash::digestWithNamedKey(
261
$hash_input,
262
'document-engine.content-digest');
263
264
$blocks[] = id(new PhabricatorDocumentEngineBlock())
265
->setBlockKey($idx)
266
->setDifferenceHash($hash)
267
->setContent($cell);
268
269
$idx++;
270
}
271
272
return $blocks;
273
}
274
275
protected function newDocumentContent(PhabricatorDocumentRef $ref) {
276
$viewer = $this->getViewer();
277
$content = $ref->loadData();
278
279
try {
280
$cells = $this->newCells($content, false);
281
} catch (Exception $ex) {
282
return $this->newMessage($ex->getMessage());
283
}
284
285
$rows = array();
286
foreach ($cells as $cell) {
287
$rows[] = $this->renderJupyterCell($viewer, $cell);
288
}
289
290
$notebook_table = phutil_tag(
291
'table',
292
array(
293
'class' => 'jupyter-notebook',
294
),
295
$rows);
296
297
$container = phutil_tag(
298
'div',
299
array(
300
'class' => 'document-engine-jupyter',
301
),
302
$notebook_table);
303
304
return $container;
305
}
306
307
private function newCells($content, $for_diff) {
308
try {
309
$data = phutil_json_decode($content);
310
} catch (PhutilJSONParserException $ex) {
311
throw new Exception(
312
pht(
313
'This is not a valid JSON document and can not be rendered as '.
314
'a Jupyter notebook: %s.',
315
$ex->getMessage()));
316
}
317
318
if (!is_array($data)) {
319
throw new Exception(
320
pht(
321
'This document does not encode a valid JSON object and can not '.
322
'be rendered as a Jupyter notebook.'));
323
}
324
325
$nbformat = idx($data, 'nbformat');
326
if ($nbformat == null || !strlen($nbformat)) {
327
throw new Exception(
328
pht(
329
'This document is missing an "nbformat" field. Jupyter notebooks '.
330
'must have this field.'));
331
}
332
333
if ($nbformat !== 4) {
334
throw new Exception(
335
pht(
336
'This Jupyter notebook uses an unsupported version of the file '.
337
'format (found version %s, expected version 4).',
338
$nbformat));
339
}
340
341
$cells = idx($data, 'cells');
342
if (!is_array($cells)) {
343
throw new Exception(
344
pht(
345
'This Jupyter notebook does not specify a list of "cells".'));
346
}
347
348
if (!$cells) {
349
throw new Exception(
350
pht(
351
'This Jupyter notebook does not specify any notebook cells.'));
352
}
353
354
if (!$for_diff) {
355
return $cells;
356
}
357
358
// If we're extracting cells to build a diff view, split code cells into
359
// individual lines and individual outputs. We want users to be able to
360
// add inline comments to each line and each output block.
361
362
$results = array();
363
foreach ($cells as $cell) {
364
$cell_type = idx($cell, 'cell_type');
365
if ($cell_type === 'markdown') {
366
$source = $this->readString($cell, 'source');
367
368
// Attempt to split contiguous blocks of markdown into smaller
369
// pieces.
370
371
$chunks = preg_split(
372
'/\n\n+/',
373
$source);
374
375
foreach ($chunks as $chunk) {
376
$result = $cell;
377
$result['source'] = array($chunk);
378
$results[] = $result;
379
}
380
381
continue;
382
}
383
384
if ($cell_type !== 'code') {
385
$results[] = $cell;
386
continue;
387
}
388
389
$label = $this->newCellLabel($cell);
390
391
$lines = $this->readStringList($cell, 'source');
392
$content = $this->highlightLines($lines);
393
394
$count = count($lines);
395
for ($ii = 0; $ii < $count; $ii++) {
396
$is_head = ($ii === 0);
397
$is_last = ($ii === ($count - 1));
398
399
if ($is_head) {
400
$line_label = $label;
401
} else {
402
$line_label = null;
403
}
404
405
$results[] = array(
406
'cell_type' => 'code/line',
407
'label' => $line_label,
408
'raw' => $lines[$ii],
409
'display' => idx($content, $ii),
410
'head' => $is_head,
411
'last' => $is_last,
412
);
413
}
414
415
$outputs = array();
416
$output_list = idx($cell, 'outputs');
417
if (is_array($output_list)) {
418
foreach ($output_list as $output) {
419
$results[] = array(
420
'cell_type' => 'code/output',
421
'output' => $output,
422
);
423
}
424
}
425
}
426
427
return $results;
428
}
429
430
431
private function renderJupyterCell(
432
PhabricatorUser $viewer,
433
array $cell) {
434
435
list($label, $content) = $this->renderJupyterCellContent($viewer, $cell);
436
437
$classes = null;
438
switch (idx($cell, 'cell_type')) {
439
case 'code/line':
440
$classes = 'jupyter-cell-flush';
441
break;
442
}
443
444
return $this->newJupyterCell(
445
$label,
446
$content,
447
$classes);
448
}
449
450
private function newJupyterCell($label, $content, $classes) {
451
$label_cell = phutil_tag(
452
'td',
453
array(
454
'class' => 'jupyter-label',
455
),
456
$label);
457
458
$content_cell = phutil_tag(
459
'td',
460
array(
461
'class' => $classes,
462
),
463
$content);
464
465
return phutil_tag(
466
'tr',
467
array(),
468
array(
469
$label_cell,
470
$content_cell,
471
));
472
}
473
474
private function renderJupyterCellContent(
475
PhabricatorUser $viewer,
476
array $cell) {
477
478
$cell_type = idx($cell, 'cell_type');
479
switch ($cell_type) {
480
case 'markdown':
481
return $this->newMarkdownCell($cell);
482
case 'code':
483
return $this->newCodeCell($cell);
484
case 'code/line':
485
return $this->newCodeLineCell($cell);
486
case 'code/output':
487
return $this->newCodeOutputCell($cell);
488
}
489
490
$json_content = id(new PhutilJSON())
491
->encodeFormatted($cell);
492
493
return $this->newRawCell($json_content);
494
}
495
496
private function newRawCell($content) {
497
return array(
498
null,
499
phutil_tag(
500
'div',
501
array(
502
'class' => 'jupyter-cell-raw PhabricatorMonospaced',
503
),
504
$content),
505
);
506
}
507
508
private function newMarkdownCell(array $cell) {
509
$content = $this->readStringList($cell, 'source');
510
511
// TODO: This should ideally highlight as Markdown, but the "md"
512
// highlighter in Pygments is painfully slow and not terribly useful.
513
$content = $this->highlightLines($content, 'txt');
514
515
return array(
516
null,
517
phutil_tag(
518
'div',
519
array(
520
'class' => 'jupyter-cell-markdown',
521
),
522
$content),
523
);
524
}
525
526
private function newCodeCell(array $cell) {
527
$label = $this->newCellLabel($cell);
528
529
$content = $this->readStringList($cell, 'source');
530
$content = $this->highlightLines($content);
531
532
$outputs = array();
533
$output_list = idx($cell, 'outputs');
534
if (is_array($output_list)) {
535
foreach ($output_list as $output) {
536
$outputs[] = $this->newOutput($output);
537
}
538
}
539
540
return array(
541
$label,
542
array(
543
phutil_tag(
544
'div',
545
array(
546
'class' =>
547
'jupyter-cell-code jupyter-cell-code-block '.
548
'PhabricatorMonospaced remarkup-code',
549
),
550
array(
551
$content,
552
)),
553
$outputs,
554
),
555
);
556
}
557
558
private function newCodeLineCell(array $cell, $content = null) {
559
$classes = array();
560
$classes[] = 'PhabricatorMonospaced';
561
$classes[] = 'remarkup-code';
562
$classes[] = 'jupyter-cell-code';
563
$classes[] = 'jupyter-cell-code-line';
564
565
if ($cell['head']) {
566
$classes[] = 'jupyter-cell-code-head';
567
}
568
569
if ($cell['last']) {
570
$classes[] = 'jupyter-cell-code-last';
571
}
572
573
$classes = implode(' ', $classes);
574
575
if ($content === null) {
576
$content = $cell['display'];
577
}
578
579
return array(
580
$cell['label'],
581
array(
582
phutil_tag(
583
'div',
584
array(
585
'class' => $classes,
586
),
587
array(
588
$content,
589
)),
590
),
591
);
592
}
593
594
private function newCodeOutputCell(array $cell) {
595
return array(
596
null,
597
$this->newOutput($cell['output']),
598
);
599
}
600
601
private function newOutput(array $output) {
602
if (!is_array($output)) {
603
return pht('<Invalid Output>');
604
}
605
606
$classes = array(
607
'jupyter-output',
608
'PhabricatorMonospaced',
609
);
610
611
$output_name = idx($output, 'name');
612
switch ($output_name) {
613
case 'stderr':
614
$classes[] = 'jupyter-output-stderr';
615
break;
616
}
617
618
$output_type = idx($output, 'output_type');
619
switch ($output_type) {
620
case 'execute_result':
621
case 'display_data':
622
$data = idx($output, 'data');
623
624
$image_formats = array(
625
'image/png',
626
'image/jpeg',
627
'image/jpg',
628
'image/gif',
629
);
630
631
foreach ($image_formats as $image_format) {
632
if (!isset($data[$image_format])) {
633
continue;
634
}
635
636
$raw_data = $this->readString($data, $image_format);
637
638
$content = phutil_tag(
639
'img',
640
array(
641
'src' => 'data:'.$image_format.';base64,'.$raw_data,
642
));
643
644
break 2;
645
}
646
647
if (isset($data['text/html'])) {
648
$content = $data['text/html'];
649
$classes[] = 'jupyter-output-html';
650
break;
651
}
652
653
if (isset($data['application/javascript'])) {
654
$content = $data['application/javascript'];
655
$classes[] = 'jupyter-output-html';
656
break;
657
}
658
659
if (isset($data['text/plain'])) {
660
$content = $data['text/plain'];
661
break;
662
}
663
664
break;
665
case 'stream':
666
default:
667
$content = $this->readString($output, 'text');
668
break;
669
}
670
671
return phutil_tag(
672
'div',
673
array(
674
'class' => implode(' ', $classes),
675
),
676
$content);
677
}
678
679
private function newCellLabel(array $cell) {
680
$execution_count = idx($cell, 'execution_count');
681
if ($execution_count) {
682
$label = 'In ['.$execution_count.']:';
683
} else {
684
$label = null;
685
}
686
687
return $label;
688
}
689
690
private function highlightLines(array $lines, $force_language = null) {
691
if ($force_language === null) {
692
$head = head($lines);
693
$matches = null;
694
if (preg_match('/^%%(.*)$/', $head, $matches)) {
695
$restore = array_shift($lines);
696
$lang = $matches[1];
697
} else {
698
$restore = null;
699
$lang = 'py';
700
}
701
} else {
702
$restore = null;
703
$lang = $force_language;
704
}
705
706
$content = PhabricatorSyntaxHighlighter::highlightWithLanguage(
707
$lang,
708
implode('', $lines));
709
$content = phutil_split_lines($content);
710
711
if ($restore !== null) {
712
$language_tag = phutil_tag(
713
'span',
714
array(
715
'class' => 'language-tag',
716
),
717
$restore);
718
719
array_unshift($content, $language_tag);
720
}
721
722
return $content;
723
}
724
725
public function shouldSuggestEngine(PhabricatorDocumentRef $ref) {
726
return true;
727
}
728
729
private function readString(array $src, $key) {
730
$list = $this->readStringList($src, $key);
731
return implode('', $list);
732
}
733
734
private function readStringList(array $src, $key) {
735
$list = idx($src, $key);
736
737
if (is_array($list)) {
738
$list = $list;
739
} else if (is_string($list)) {
740
$list = array($list);
741
} else {
742
$list = array();
743
}
744
745
return $list;
746
}
747
748
}
749
750