Path: blob/master/src/applications/files/document/PhabricatorJupyterDocumentEngine.php
12241 views
<?php12final class PhabricatorJupyterDocumentEngine3extends PhabricatorDocumentEngine {45const ENGINEKEY = 'jupyter';67public function getViewAsLabel(PhabricatorDocumentRef $ref) {8return pht('View as Jupyter Notebook');9}1011protected function getDocumentIconIcon(PhabricatorDocumentRef $ref) {12return 'fa-sun-o';13}1415protected function getDocumentRenderingText(PhabricatorDocumentRef $ref) {16return pht('Rendering Jupyter Notebook...');17}1819public function shouldRenderAsync(PhabricatorDocumentRef $ref) {20return true;21}2223protected function getContentScore(PhabricatorDocumentRef $ref) {24$name = $ref->getName();2526if (preg_match('/\\.ipynb\z/i', $name)) {27return 2000;28}2930return 500;31}3233protected function canRenderDocumentType(PhabricatorDocumentRef $ref) {34return $ref->isProbablyJSON();35}3637public function canDiffDocuments(38PhabricatorDocumentRef $uref = null,39PhabricatorDocumentRef $vref = null) {40return true;41}4243public function newEngineBlocks(44PhabricatorDocumentRef $uref = null,45PhabricatorDocumentRef $vref = null) {4647$blocks = new PhabricatorDocumentEngineBlocks();4849try {50if ($uref) {51$u_blocks = $this->newDiffBlocks($uref);52} else {53$u_blocks = array();54}5556if ($vref) {57$v_blocks = $this->newDiffBlocks($vref);58} else {59$v_blocks = array();60}6162$blocks->addBlockList($uref, $u_blocks);63$blocks->addBlockList($vref, $v_blocks);64} catch (Exception $ex) {65phlog($ex);66$blocks->addMessage($ex->getMessage());67}6869return $blocks;70}7172public function newBlockDiffViews(73PhabricatorDocumentRef $uref,74PhabricatorDocumentEngineBlock $ublock,75PhabricatorDocumentRef $vref,76PhabricatorDocumentEngineBlock $vblock) {7778$ucell = $ublock->getContent();79$vcell = $vblock->getContent();8081$utype = idx($ucell, 'cell_type');82$vtype = idx($vcell, 'cell_type');8384if ($utype === $vtype) {85switch ($utype) {86case 'markdown':87$usource = $this->readString($ucell, 'source');88$vsource = $this->readString($vcell, 'source');8990$diff = id(new PhutilProseDifferenceEngine())91->getDiff($usource, $vsource);9293$u_content = $this->newProseDiffCell($diff, array('=', '-'));94$v_content = $this->newProseDiffCell($diff, array('=', '+'));9596$u_content = $this->newJupyterCell(null, $u_content, null);97$v_content = $this->newJupyterCell(null, $v_content, null);9899$u_content = $this->newCellContainer($u_content);100$v_content = $this->newCellContainer($v_content);101102return id(new PhabricatorDocumentEngineBlockDiff())103->setOldContent($u_content)104->addOldClass('old')105->setNewContent($v_content)106->addNewClass('new');107case 'code/line':108$usource = idx($ucell, 'raw');109$vsource = idx($vcell, 'raw');110$udisplay = idx($ucell, 'display');111$vdisplay = idx($vcell, 'display');112113$intraline_segments = ArcanistDiffUtils::generateIntralineDiff(114$usource,115$vsource);116117$u_segments = array();118foreach ($intraline_segments[0] as $u_segment) {119$u_segments[] = $u_segment;120}121122$v_segments = array();123foreach ($intraline_segments[1] as $v_segment) {124$v_segments[] = $v_segment;125}126127$usource = PhabricatorDifferenceEngine::applyIntralineDiff(128$udisplay,129$u_segments);130131$vsource = PhabricatorDifferenceEngine::applyIntralineDiff(132$vdisplay,133$v_segments);134135list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);136list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);137138$classes = array(139'jupyter-cell-flush',140);141142$u_content = $this->newJupyterCell($u_label, $u_content, $classes);143$v_content = $this->newJupyterCell($v_label, $v_content, $classes);144145$u_content = $this->newCellContainer($u_content);146$v_content = $this->newCellContainer($v_content);147148return id(new PhabricatorDocumentEngineBlockDiff())149->setOldContent($u_content)150->addOldClass('old')151->setNewContent($v_content)152->addNewClass('new');153}154}155156return parent::newBlockDiffViews($uref, $ublock, $vref, $vblock);157}158159public function newBlockContentView(160PhabricatorDocumentRef $ref,161PhabricatorDocumentEngineBlock $block) {162163$viewer = $this->getViewer();164$cell = $block->getContent();165166$cell_content = $this->renderJupyterCell($viewer, $cell);167168return $this->newCellContainer($cell_content);169}170171private function newCellContainer($cell_content) {172$notebook_table = phutil_tag(173'table',174array(175'class' => 'jupyter-notebook',176),177$cell_content);178179$container = phutil_tag(180'div',181array(182'class' => 'document-engine-jupyter document-engine-diff',183),184$notebook_table);185186return $container;187}188189private function newProseDiffCell(PhutilProseDiff $diff, array $mask) {190$mask = array_fuse($mask);191192$result = array();193foreach ($diff->getParts() as $part) {194$type = $part['type'];195$text = $part['text'];196197if (!isset($mask[$type])) {198continue;199}200201switch ($type) {202case '-':203$result[] = phutil_tag(204'span',205array(206'class' => 'bright',207),208$text);209break;210case '+':211$result[] = phutil_tag(212'span',213array(214'class' => 'bright',215),216$text);217break;218case '=':219$result[] = $text;220break;221}222}223224return array(225null,226phutil_tag(227'div',228array(229'class' => 'jupyter-cell-markdown',230),231$result),232);233}234235private function newDiffBlocks(PhabricatorDocumentRef $ref) {236$viewer = $this->getViewer();237$content = $ref->loadData();238239$cells = $this->newCells($content, true);240241$idx = 1;242$blocks = array();243foreach ($cells as $cell) {244// When the cell is a source code line, we can hash just the raw245// input rather than all the cell metadata.246247switch (idx($cell, 'cell_type')) {248case 'code/line':249$hash_input = $cell['raw'];250break;251case 'markdown':252$hash_input = $this->readString($cell, 'source');253break;254default:255$hash_input = serialize($cell);256break;257}258259$hash = PhabricatorHash::digestWithNamedKey(260$hash_input,261'document-engine.content-digest');262263$blocks[] = id(new PhabricatorDocumentEngineBlock())264->setBlockKey($idx)265->setDifferenceHash($hash)266->setContent($cell);267268$idx++;269}270271return $blocks;272}273274protected function newDocumentContent(PhabricatorDocumentRef $ref) {275$viewer = $this->getViewer();276$content = $ref->loadData();277278try {279$cells = $this->newCells($content, false);280} catch (Exception $ex) {281return $this->newMessage($ex->getMessage());282}283284$rows = array();285foreach ($cells as $cell) {286$rows[] = $this->renderJupyterCell($viewer, $cell);287}288289$notebook_table = phutil_tag(290'table',291array(292'class' => 'jupyter-notebook',293),294$rows);295296$container = phutil_tag(297'div',298array(299'class' => 'document-engine-jupyter',300),301$notebook_table);302303return $container;304}305306private function newCells($content, $for_diff) {307try {308$data = phutil_json_decode($content);309} catch (PhutilJSONParserException $ex) {310throw new Exception(311pht(312'This is not a valid JSON document and can not be rendered as '.313'a Jupyter notebook: %s.',314$ex->getMessage()));315}316317if (!is_array($data)) {318throw new Exception(319pht(320'This document does not encode a valid JSON object and can not '.321'be rendered as a Jupyter notebook.'));322}323324$nbformat = idx($data, 'nbformat');325if ($nbformat == null || !strlen($nbformat)) {326throw new Exception(327pht(328'This document is missing an "nbformat" field. Jupyter notebooks '.329'must have this field.'));330}331332if ($nbformat !== 4) {333throw new Exception(334pht(335'This Jupyter notebook uses an unsupported version of the file '.336'format (found version %s, expected version 4).',337$nbformat));338}339340$cells = idx($data, 'cells');341if (!is_array($cells)) {342throw new Exception(343pht(344'This Jupyter notebook does not specify a list of "cells".'));345}346347if (!$cells) {348throw new Exception(349pht(350'This Jupyter notebook does not specify any notebook cells.'));351}352353if (!$for_diff) {354return $cells;355}356357// If we're extracting cells to build a diff view, split code cells into358// individual lines and individual outputs. We want users to be able to359// add inline comments to each line and each output block.360361$results = array();362foreach ($cells as $cell) {363$cell_type = idx($cell, 'cell_type');364if ($cell_type === 'markdown') {365$source = $this->readString($cell, 'source');366367// Attempt to split contiguous blocks of markdown into smaller368// pieces.369370$chunks = preg_split(371'/\n\n+/',372$source);373374foreach ($chunks as $chunk) {375$result = $cell;376$result['source'] = array($chunk);377$results[] = $result;378}379380continue;381}382383if ($cell_type !== 'code') {384$results[] = $cell;385continue;386}387388$label = $this->newCellLabel($cell);389390$lines = $this->readStringList($cell, 'source');391$content = $this->highlightLines($lines);392393$count = count($lines);394for ($ii = 0; $ii < $count; $ii++) {395$is_head = ($ii === 0);396$is_last = ($ii === ($count - 1));397398if ($is_head) {399$line_label = $label;400} else {401$line_label = null;402}403404$results[] = array(405'cell_type' => 'code/line',406'label' => $line_label,407'raw' => $lines[$ii],408'display' => idx($content, $ii),409'head' => $is_head,410'last' => $is_last,411);412}413414$outputs = array();415$output_list = idx($cell, 'outputs');416if (is_array($output_list)) {417foreach ($output_list as $output) {418$results[] = array(419'cell_type' => 'code/output',420'output' => $output,421);422}423}424}425426return $results;427}428429430private function renderJupyterCell(431PhabricatorUser $viewer,432array $cell) {433434list($label, $content) = $this->renderJupyterCellContent($viewer, $cell);435436$classes = null;437switch (idx($cell, 'cell_type')) {438case 'code/line':439$classes = 'jupyter-cell-flush';440break;441}442443return $this->newJupyterCell(444$label,445$content,446$classes);447}448449private function newJupyterCell($label, $content, $classes) {450$label_cell = phutil_tag(451'td',452array(453'class' => 'jupyter-label',454),455$label);456457$content_cell = phutil_tag(458'td',459array(460'class' => $classes,461),462$content);463464return phutil_tag(465'tr',466array(),467array(468$label_cell,469$content_cell,470));471}472473private function renderJupyterCellContent(474PhabricatorUser $viewer,475array $cell) {476477$cell_type = idx($cell, 'cell_type');478switch ($cell_type) {479case 'markdown':480return $this->newMarkdownCell($cell);481case 'code':482return $this->newCodeCell($cell);483case 'code/line':484return $this->newCodeLineCell($cell);485case 'code/output':486return $this->newCodeOutputCell($cell);487}488489$json_content = id(new PhutilJSON())490->encodeFormatted($cell);491492return $this->newRawCell($json_content);493}494495private function newRawCell($content) {496return array(497null,498phutil_tag(499'div',500array(501'class' => 'jupyter-cell-raw PhabricatorMonospaced',502),503$content),504);505}506507private function newMarkdownCell(array $cell) {508$content = $this->readStringList($cell, 'source');509510// TODO: This should ideally highlight as Markdown, but the "md"511// highlighter in Pygments is painfully slow and not terribly useful.512$content = $this->highlightLines($content, 'txt');513514return array(515null,516phutil_tag(517'div',518array(519'class' => 'jupyter-cell-markdown',520),521$content),522);523}524525private function newCodeCell(array $cell) {526$label = $this->newCellLabel($cell);527528$content = $this->readStringList($cell, 'source');529$content = $this->highlightLines($content);530531$outputs = array();532$output_list = idx($cell, 'outputs');533if (is_array($output_list)) {534foreach ($output_list as $output) {535$outputs[] = $this->newOutput($output);536}537}538539return array(540$label,541array(542phutil_tag(543'div',544array(545'class' =>546'jupyter-cell-code jupyter-cell-code-block '.547'PhabricatorMonospaced remarkup-code',548),549array(550$content,551)),552$outputs,553),554);555}556557private function newCodeLineCell(array $cell, $content = null) {558$classes = array();559$classes[] = 'PhabricatorMonospaced';560$classes[] = 'remarkup-code';561$classes[] = 'jupyter-cell-code';562$classes[] = 'jupyter-cell-code-line';563564if ($cell['head']) {565$classes[] = 'jupyter-cell-code-head';566}567568if ($cell['last']) {569$classes[] = 'jupyter-cell-code-last';570}571572$classes = implode(' ', $classes);573574if ($content === null) {575$content = $cell['display'];576}577578return array(579$cell['label'],580array(581phutil_tag(582'div',583array(584'class' => $classes,585),586array(587$content,588)),589),590);591}592593private function newCodeOutputCell(array $cell) {594return array(595null,596$this->newOutput($cell['output']),597);598}599600private function newOutput(array $output) {601if (!is_array($output)) {602return pht('<Invalid Output>');603}604605$classes = array(606'jupyter-output',607'PhabricatorMonospaced',608);609610$output_name = idx($output, 'name');611switch ($output_name) {612case 'stderr':613$classes[] = 'jupyter-output-stderr';614break;615}616617$output_type = idx($output, 'output_type');618switch ($output_type) {619case 'execute_result':620case 'display_data':621$data = idx($output, 'data');622623$image_formats = array(624'image/png',625'image/jpeg',626'image/jpg',627'image/gif',628);629630foreach ($image_formats as $image_format) {631if (!isset($data[$image_format])) {632continue;633}634635$raw_data = $this->readString($data, $image_format);636637$content = phutil_tag(638'img',639array(640'src' => 'data:'.$image_format.';base64,'.$raw_data,641));642643break 2;644}645646if (isset($data['text/html'])) {647$content = $data['text/html'];648$classes[] = 'jupyter-output-html';649break;650}651652if (isset($data['application/javascript'])) {653$content = $data['application/javascript'];654$classes[] = 'jupyter-output-html';655break;656}657658if (isset($data['text/plain'])) {659$content = $data['text/plain'];660break;661}662663break;664case 'stream':665default:666$content = $this->readString($output, 'text');667break;668}669670return phutil_tag(671'div',672array(673'class' => implode(' ', $classes),674),675$content);676}677678private function newCellLabel(array $cell) {679$execution_count = idx($cell, 'execution_count');680if ($execution_count) {681$label = 'In ['.$execution_count.']:';682} else {683$label = null;684}685686return $label;687}688689private function highlightLines(array $lines, $force_language = null) {690if ($force_language === null) {691$head = head($lines);692$matches = null;693if (preg_match('/^%%(.*)$/', $head, $matches)) {694$restore = array_shift($lines);695$lang = $matches[1];696} else {697$restore = null;698$lang = 'py';699}700} else {701$restore = null;702$lang = $force_language;703}704705$content = PhabricatorSyntaxHighlighter::highlightWithLanguage(706$lang,707implode('', $lines));708$content = phutil_split_lines($content);709710if ($restore !== null) {711$language_tag = phutil_tag(712'span',713array(714'class' => 'language-tag',715),716$restore);717718array_unshift($content, $language_tag);719}720721return $content;722}723724public function shouldSuggestEngine(PhabricatorDocumentRef $ref) {725return true;726}727728private function readString(array $src, $key) {729$list = $this->readStringList($src, $key);730return implode('', $list);731}732733private function readStringList(array $src, $key) {734$list = idx($src, $key);735736if (is_array($list)) {737$list = $list;738} else if (is_string($list)) {739$list = array($list);740} else {741$list = array();742}743744return $list;745}746747}748749750