Path: blob/master/src/applications/differential/parser/DifferentialHunkParser.php
12256 views
<?php12final class DifferentialHunkParser extends Phobject {34private $oldLines;5private $newLines;6private $intraLineDiffs;7private $depthOnlyLines;8private $visibleLinesMask;9private $normalized;1011/**12* Get a map of lines on which hunks start, other than line 1. This13* datastructure is used to determine when to render "Context not available."14* in diffs with multiple hunks.15*16* @return dict<int, bool> Map of lines where hunks start, other than line 1.17*/18public function getHunkStartLines(array $hunks) {19assert_instances_of($hunks, 'DifferentialHunk');2021$map = array();22foreach ($hunks as $hunk) {23$line = $hunk->getOldOffset();24if ($line > 1) {25$map[$line] = true;26}27}2829return $map;30}3132private function setVisibleLinesMask($mask) {33$this->visibleLinesMask = $mask;34return $this;35}36public function getVisibleLinesMask() {37if ($this->visibleLinesMask === null) {38throw new PhutilInvalidStateException('generateVisibleLinesMask');39}40return $this->visibleLinesMask;41}4243private function setIntraLineDiffs($intra_line_diffs) {44$this->intraLineDiffs = $intra_line_diffs;45return $this;46}47public function getIntraLineDiffs() {48if ($this->intraLineDiffs === null) {49throw new PhutilInvalidStateException('generateIntraLineDiffs');50}51return $this->intraLineDiffs;52}5354private function setNewLines($new_lines) {55$this->newLines = $new_lines;56return $this;57}58public function getNewLines() {59if ($this->newLines === null) {60throw new PhutilInvalidStateException('parseHunksForLineData');61}62return $this->newLines;63}6465private function setOldLines($old_lines) {66$this->oldLines = $old_lines;67return $this;68}69public function getOldLines() {70if ($this->oldLines === null) {71throw new PhutilInvalidStateException('parseHunksForLineData');72}73return $this->oldLines;74}7576public function getOldLineTypeMap() {77$map = array();78$old = $this->getOldLines();79foreach ($old as $o) {80if (!$o) {81continue;82}83$map[$o['line']] = $o['type'];84}85return $map;86}8788public function setOldLineTypeMap(array $map) {89$lines = $this->getOldLines();90foreach ($lines as $key => $data) {91$lines[$key]['type'] = idx($map, $data['line']);92}93$this->oldLines = $lines;94return $this;95}9697public function getNewLineTypeMap() {98$map = array();99$new = $this->getNewLines();100foreach ($new as $n) {101if (!$n) {102continue;103}104$map[$n['line']] = $n['type'];105}106return $map;107}108109public function setNewLineTypeMap(array $map) {110$lines = $this->getNewLines();111foreach ($lines as $key => $data) {112$lines[$key]['type'] = idx($map, $data['line']);113}114$this->newLines = $lines;115return $this;116}117118public function setDepthOnlyLines(array $map) {119$this->depthOnlyLines = $map;120return $this;121}122123public function getDepthOnlyLines() {124return $this->depthOnlyLines;125}126127public function setNormalized($normalized) {128$this->normalized = $normalized;129return $this;130}131132public function getNormalized() {133return $this->normalized;134}135136public function getIsDeleted() {137foreach ($this->getNewLines() as $line) {138if ($line) {139// At least one new line, so the entire file wasn't deleted.140return false;141}142}143144foreach ($this->getOldLines() as $line) {145if ($line) {146// No new lines, at least one old line; the entire file was deleted.147return true;148}149}150151// This is an empty file.152return false;153}154155/**156* Returns true if the hunks change anything, including whitespace.157*/158public function getHasAnyChanges() {159return $this->getHasChanges('any');160}161162private function getHasChanges($filter) {163if ($filter !== 'any' && $filter !== 'text') {164throw new Exception(pht("Unknown change filter '%s'.", $filter));165}166167$old = $this->getOldLines();168$new = $this->getNewLines();169170$is_any = ($filter === 'any');171172foreach ($old as $key => $o) {173$n = $new[$key];174if ($o === null || $n === null) {175// One side is missing, and it's impossible for both sides to be null,176// so the other side must have something, and thus the two sides are177// different and the file has been changed under any type of filter.178return true;179}180181if ($o['type'] !== $n['type']) {182return true;183}184185if ($o['text'] !== $n['text']) {186if ($is_any) {187// The text is different, so there's a change.188return true;189} else if (trim($o['text']) !== trim($n['text'])) {190return true;191}192}193}194195// No changes anywhere in the file.196return false;197}198199200/**201* This function takes advantage of the parsing work done in202* @{method:parseHunksForLineData} and continues the struggle to hammer this203* data into something we can display to a user.204*205* In particular, this function re-parses the hunks to make them equivalent206* in length for easy rendering, adding `null` as necessary to pad the207* length.208*209* Anyhoo, this function is not particularly well-named but I try.210*211* NOTE: this function must be called after212* @{method:parseHunksForLineData}.213*/214public function reparseHunksForSpecialAttributes() {215$rebuild_old = array();216$rebuild_new = array();217218$old_lines = array_reverse($this->getOldLines());219$new_lines = array_reverse($this->getNewLines());220221while (count($old_lines) || count($new_lines)) {222$old_line_data = array_pop($old_lines);223$new_line_data = array_pop($new_lines);224225if ($old_line_data) {226$o_type = $old_line_data['type'];227} else {228$o_type = null;229}230231if ($new_line_data) {232$n_type = $new_line_data['type'];233} else {234$n_type = null;235}236237// This line does not exist in the new file.238if (($o_type != null) && ($n_type == null)) {239$rebuild_old[] = $old_line_data;240$rebuild_new[] = null;241if ($new_line_data) {242array_push($new_lines, $new_line_data);243}244continue;245}246247// This line does not exist in the old file.248if (($n_type != null) && ($o_type == null)) {249$rebuild_old[] = null;250$rebuild_new[] = $new_line_data;251if ($old_line_data) {252array_push($old_lines, $old_line_data);253}254continue;255}256257$rebuild_old[] = $old_line_data;258$rebuild_new[] = $new_line_data;259}260261$this->setOldLines($rebuild_old);262$this->setNewLines($rebuild_new);263264$this->updateChangeTypesForNormalization();265266return $this;267}268269public function generateIntraLineDiffs() {270$old = $this->getOldLines();271$new = $this->getNewLines();272273$diffs = array();274$depth_only = array();275foreach ($old as $key => $o) {276$n = $new[$key];277278if (!$o || !$n) {279continue;280}281282if ($o['type'] != $n['type']) {283$o_segments = array();284$n_segments = array();285$tab_width = 8;286287$o_text = $o['text'];288$n_text = $n['text'];289290if ($o_text !== $n_text && (ltrim($o_text) === ltrim($n_text))) {291$o_depth = $this->getIndentDepth($o_text, $tab_width);292$n_depth = $this->getIndentDepth($n_text, $tab_width);293294if ($o_depth < $n_depth) {295$segment_type = '>';296$segment_width = $this->getCharacterCountForVisualWhitespace(297$n_text,298($n_depth - $o_depth),299$tab_width);300if ($segment_width) {301$n_text = substr($n_text, $segment_width);302$n_segments[] = array(303$segment_type,304$segment_width,305);306}307} else if ($o_depth > $n_depth) {308$segment_type = '<';309$segment_width = $this->getCharacterCountForVisualWhitespace(310$o_text,311($o_depth - $n_depth),312$tab_width);313if ($segment_width) {314$o_text = substr($o_text, $segment_width);315$o_segments[] = array(316$segment_type,317$segment_width,318);319}320}321322// If there are no remaining changes to this line after we've marked323// off the indent depth changes, this line was only modified by324// changing the indent depth. Mark it for later so we can change how325// it is displayed.326if ($o_text === $n_text) {327$depth_only[$key] = $segment_type;328}329}330331$intraline_segments = ArcanistDiffUtils::generateIntralineDiff(332$o_text,333$n_text);334335foreach ($intraline_segments[0] as $o_segment) {336$o_segments[] = $o_segment;337}338339foreach ($intraline_segments[1] as $n_segment) {340$n_segments[] = $n_segment;341}342343$diffs[$key] = array(344$o_segments,345$n_segments,346);347}348}349350$this->setIntraLineDiffs($diffs);351$this->setDepthOnlyLines($depth_only);352353return $this;354}355356public function generateVisibleBlocksMask($lines_context) {357358// See T13468. This is similar to "generateVisibleLinesMask()", but359// attempts to work around a series of bugs which cancel each other360// out but make a mess of the intermediate steps.361362$old = $this->getOldLines();363$new = $this->getNewLines();364365$length = max(count($old), count($new));366367$visible_lines = array();368for ($ii = 0; $ii < $length; $ii++) {369$old_visible = (isset($old[$ii]) && $old[$ii]['type']);370$new_visible = (isset($new[$ii]) && $new[$ii]['type']);371372$visible_lines[$ii] = ($old_visible || $new_visible);373}374375$mask = array();376$reveal_cursor = -1;377for ($ii = 0; $ii < $length; $ii++) {378379// If this line isn't visible, it isn't going to reveal anything.380if (!$visible_lines[$ii]) {381382// If it hasn't been revealed by a nearby line, mark it as masked.383if (empty($mask[$ii])) {384$mask[$ii] = false;385}386387continue;388}389390// If this line is visible, reveal all the lines nearby.391392// First, compute the minimum and maximum offsets we want to reveal.393$min_reveal = max($ii - $lines_context, 0);394$max_reveal = min($ii + $lines_context, $length - 1);395396// Naively, we'd do more work than necessary when revealing context for397// several adjacent visible lines: we would mark all the overlapping398// lines as revealed several times.399400// To avoid duplicating work, keep track of the largest line we've401// revealed to. Since we reveal context by marking every consecutive402// line, we don't need to touch any line above it.403$min_reveal = max($min_reveal, $reveal_cursor);404405// Reveal the remaining unrevealed lines.406for ($jj = $min_reveal; $jj <= $max_reveal; $jj++) {407$mask[$jj] = true;408}409410// Move the cursor to the next line which may still need to be revealed.411$reveal_cursor = $max_reveal + 1;412}413414$this->setVisibleLinesMask($mask);415416return $mask;417}418419public function generateVisibleLinesMask($lines_context) {420$old = $this->getOldLines();421$new = $this->getNewLines();422$max_length = max(count($old), count($new));423$visible = false;424$last = 0;425$mask = array();426427for ($cursor = -$lines_context; $cursor < $max_length; $cursor++) {428$offset = $cursor + $lines_context;429if ((isset($old[$offset]) && $old[$offset]['type']) ||430(isset($new[$offset]) && $new[$offset]['type'])) {431$visible = true;432$last = $offset;433} else if ($cursor > $last + $lines_context) {434$visible = false;435}436if ($visible && $cursor > 0) {437$mask[$cursor] = 1;438}439}440441$this->setVisibleLinesMask($mask);442443return $this;444}445446public function getOldCorpus() {447return $this->getCorpus($this->getOldLines());448}449450public function getNewCorpus() {451return $this->getCorpus($this->getNewLines());452}453454private function getCorpus(array $lines) {455456$corpus = array();457foreach ($lines as $l) {458if ($l === null) {459$corpus[] = "\n";460continue;461}462463if ($l['type'] != '\\') {464if ($l['text'] === null) {465// There's no text on this side of the diff, but insert a placeholder466// newline so the highlighted line numbers match up.467$corpus[] = "\n";468} else {469$corpus[] = $l['text'];470}471}472}473return $corpus;474}475476public function parseHunksForLineData(array $hunks) {477assert_instances_of($hunks, 'DifferentialHunk');478479$old_lines = array();480$new_lines = array();481foreach ($hunks as $hunk) {482$lines = $hunk->getSplitLines();483484$line_type_map = array();485$line_text = array();486foreach ($lines as $line_index => $line) {487if (isset($line[0])) {488$char = $line[0];489switch ($char) {490case ' ':491$line_type_map[$line_index] = null;492$line_text[$line_index] = substr($line, 1);493break;494case "\r":495case "\n":496// NOTE: Normally, the first character is a space, plus, minus or497// backslash, but it may be a newline if it used to be a space and498// trailing whitespace has been stripped via email transmission or499// some similar mechanism. In these cases, we essentially pretend500// the missing space is still there.501$line_type_map[$line_index] = null;502$line_text[$line_index] = $line;503break;504case '+':505case '-':506case '\\':507$line_type_map[$line_index] = $char;508$line_text[$line_index] = substr($line, 1);509break;510default:511throw new Exception(512pht(513'Unexpected leading character "%s" at line index %s!',514$char,515$line_index));516}517} else {518$line_type_map[$line_index] = null;519$line_text[$line_index] = '';520}521}522523$old_line = $hunk->getOldOffset();524$new_line = $hunk->getNewOffset();525526$num_lines = count($lines);527for ($cursor = 0; $cursor < $num_lines; $cursor++) {528$type = $line_type_map[$cursor];529$data = array(530'type' => $type,531'text' => $line_text[$cursor],532'line' => $new_line,533);534if ($type == '\\') {535$type = $line_type_map[$cursor - 1];536$data['text'] = ltrim($data['text']);537}538switch ($type) {539case '+':540$new_lines[] = $data;541++$new_line;542break;543case '-':544$data['line'] = $old_line;545$old_lines[] = $data;546++$old_line;547break;548default:549$new_lines[] = $data;550$data['line'] = $old_line;551$old_lines[] = $data;552++$new_line;553++$old_line;554break;555}556}557}558559$this->setOldLines($old_lines);560$this->setNewLines($new_lines);561562return $this;563}564565public function parseHunksForHighlightMasks(566array $changeset_hunks,567array $old_hunks,568array $new_hunks) {569assert_instances_of($changeset_hunks, 'DifferentialHunk');570assert_instances_of($old_hunks, 'DifferentialHunk');571assert_instances_of($new_hunks, 'DifferentialHunk');572573// Put changes side by side.574$olds = array();575$news = array();576$olds_cursor = -1;577$news_cursor = -1;578foreach ($changeset_hunks as $hunk) {579$n_old = $hunk->getOldOffset();580$n_new = $hunk->getNewOffset();581$changes = $hunk->getSplitLines();582foreach ($changes as $line) {583$diff_type = $line[0]; // Change type in diff of diffs.584$is_same = ($diff_type === ' ');585$is_add = ($diff_type === '+');586$is_rem = ($diff_type === '-');587588$orig_type = $line[1]; // Change type in the original diff.589590if ($is_same) {591// Use the same key for lines that are next to each other.592if ($olds_cursor > $news_cursor) {593$key = $olds_cursor + 1;594} else {595$key = $news_cursor + 1;596}597$olds[$key] = null;598$news[$key] = null;599$olds_cursor = $key;600$news_cursor = $key;601} else if ($is_rem) {602$olds[] = array($n_old, $orig_type);603$olds_cursor++;604} else if ($is_add) {605$news[] = array($n_new, $orig_type);606$news_cursor++;607} else {608throw new Exception(609pht(610'Found unknown intradiff source line, expected a line '.611'beginning with "+", "-", or " " (space): %s.',612$line));613}614615// See T13539. Don't increment the line count if this line was removed,616// or if the line is a "No newline at end of file" marker.617$not_a_line = ($orig_type === '-' || $orig_type === '\\');618if ($not_a_line) {619continue;620}621622if ($is_same || $is_rem) {623$n_old++;624}625626if ($is_same || $is_add) {627$n_new++;628}629}630}631632$offsets_old = $this->computeOffsets($old_hunks);633$offsets_new = $this->computeOffsets($new_hunks);634635// Highlight lines that were added on each side or removed on the other636// side.637$highlight_old = array();638$highlight_new = array();639$last = max(last_key($olds), last_key($news));640for ($i = 0; $i <= $last; $i++) {641if (isset($olds[$i])) {642list($n, $type) = $olds[$i];643if ($type == '+' ||644($type == ' ' && isset($news[$i]) && $news[$i][1] != ' ')) {645if (isset($offsets_old[$n])) {646$highlight_old[] = $offsets_old[$n];647}648}649}650if (isset($news[$i])) {651list($n, $type) = $news[$i];652if ($type == '+' ||653($type == ' ' && isset($olds[$i]) && $olds[$i][1] != ' ')) {654if (isset($offsets_new[$n])) {655$highlight_new[] = $offsets_new[$n];656}657}658}659}660661return array($highlight_old, $highlight_new);662}663664public function makeContextDiff(665array $hunks,666$is_new,667$line_number,668$line_length,669$add_context) {670671assert_instances_of($hunks, 'DifferentialHunk');672673$context = array();674675if ($is_new) {676$prefix = '+';677} else {678$prefix = '-';679}680681foreach ($hunks as $hunk) {682if ($is_new) {683$offset = $hunk->getNewOffset();684$length = $hunk->getNewLen();685} else {686$offset = $hunk->getOldOffset();687$length = $hunk->getOldLen();688}689$start = $line_number - $offset;690$end = $start + $line_length;691// We need to go in if $start == $length, because the last line692// might be a "\No newline at end of file" marker, which we want693// to show if the additional context is > 0.694if ($start <= $length && $end >= 0) {695$start = $start - $add_context;696$end = $end + $add_context;697$hunk_content = array();698$hunk_pos = array('-' => 0, '+' => 0);699$hunk_offset = array('-' => null, '+' => null);700$hunk_last = array('-' => null, '+' => null);701foreach (explode("\n", $hunk->getChanges()) as $line) {702$in_common = strncmp($line, ' ', 1) === 0;703$in_old = strncmp($line, '-', 1) === 0 || $in_common;704$in_new = strncmp($line, '+', 1) === 0 || $in_common;705$in_selected = strncmp($line, $prefix, 1) === 0;706$skip = !$in_selected && !$in_common;707if ($hunk_pos[$prefix] <= $end) {708if ($start <= $hunk_pos[$prefix]) {709if (!$skip || ($hunk_pos[$prefix] != $start &&710$hunk_pos[$prefix] != $end)) {711if ($in_old) {712if ($hunk_offset['-'] === null) {713$hunk_offset['-'] = $hunk_pos['-'];714}715$hunk_last['-'] = $hunk_pos['-'];716}717if ($in_new) {718if ($hunk_offset['+'] === null) {719$hunk_offset['+'] = $hunk_pos['+'];720}721$hunk_last['+'] = $hunk_pos['+'];722}723724$hunk_content[] = $line;725}726}727if ($in_old) { ++$hunk_pos['-']; }728if ($in_new) { ++$hunk_pos['+']; }729}730}731if ($hunk_offset['-'] !== null || $hunk_offset['+'] !== null) {732$header = '@@';733if ($hunk_offset['-'] !== null) {734$header .= ' -'.($hunk->getOldOffset() + $hunk_offset['-']).735','.($hunk_last['-'] - $hunk_offset['-'] + 1);736}737if ($hunk_offset['+'] !== null) {738$header .= ' +'.($hunk->getNewOffset() + $hunk_offset['+']).739','.($hunk_last['+'] - $hunk_offset['+'] + 1);740}741$header .= ' @@';742$context[] = $header;743$context[] = implode("\n", $hunk_content);744}745}746}747return implode("\n", $context);748}749750private function computeOffsets(array $hunks) {751assert_instances_of($hunks, 'DifferentialHunk');752753$offsets = array();754$n = 1;755foreach ($hunks as $hunk) {756$new_length = $hunk->getNewLen();757$new_offset = $hunk->getNewOffset();758759for ($i = 0; $i < $new_length; $i++) {760$offsets[$n] = $new_offset + $i;761$n++;762}763}764765return $offsets;766}767768private function getIndentDepth($text, $tab_width) {769$len = strlen($text);770771$depth = 0;772for ($ii = 0; $ii < $len; $ii++) {773$c = $text[$ii];774775// If this is a space, increase the indent depth by 1.776if ($c == ' ') {777$depth++;778continue;779}780781// If this is a tab, increase the indent depth to the next tabstop.782783// For example, if the tab width is 4, these sequences both lead us to784// a visual width of 8, i.e. the cursor will be in the 8th column:785//786// <tab><tab>787// <space><tab><space><space><space><tab>788789if ($c == "\t") {790$depth = ($depth + $tab_width);791$depth = $depth - ($depth % $tab_width);792continue;793}794795break;796}797798return $depth;799}800801private function getCharacterCountForVisualWhitespace(802$text,803$depth,804$tab_width) {805806// Here, we know the visual indent depth of a line has been increased by807// some amount (for example, 6 characters).808809// We want to find the largest whitespace prefix of the string we can810// which still fits into that amount of visual space.811812// In most cases, this is very easy. For example, if the string has been813// indented by two characters and the string begins with two spaces, that's814// a perfect match.815816// However, if the string has been indented by 7 characters, the tab width817// is 8, and the string begins with "<space><space><tab>", we can only818// mark the two spaces as an indent change. These cases are unusual.819820$character_depth = 0;821$visual_depth = 0;822823$len = strlen($text);824for ($ii = 0; $ii < $len; $ii++) {825if ($visual_depth >= $depth) {826break;827}828829$c = $text[$ii];830831if ($c == ' ') {832$character_depth++;833$visual_depth++;834continue;835}836837if ($c == "\t") {838// Figure out how many visual spaces we have until the next tabstop.839$tab_visual = ($visual_depth + $tab_width);840$tab_visual = $tab_visual - ($tab_visual % $tab_width);841$tab_visual = ($tab_visual - $visual_depth);842843// If this tab would take us over the limit, we're all done.844$remaining_depth = ($depth - $visual_depth);845if ($remaining_depth < $tab_visual) {846break;847}848849$character_depth++;850$visual_depth += $tab_visual;851continue;852}853854break;855}856857return $character_depth;858}859860private function updateChangeTypesForNormalization() {861if (!$this->getNormalized()) {862return;863}864865// If we've parsed based on a normalized diff alignment, we may currently866// believe some lines are unchanged when they have actually changed. This867// happens when:868//869// - a line changes;870// - the change is a kind of change we normalize away when aligning the871// diff, like an indentation change;872// - we normalize the change away to align the diff; and so873// - the old and new copies of the line are now aligned in the new874// normalized diff.875//876// Then we end up with an alignment where the two lines that differ only877// in some some trivial way are aligned. This is great, and exactly what878// we're trying to accomplish by doing all this alignment stuff in the879// first place.880//881// However, in this case the correctly-aligned lines will be incorrectly882// marked as unchanged because the diff alorithm was fed normalized copies883// of the lines, and these copies truly weren't any different.884//885// When lines are aligned and marked identical, but they're not actually886// identical, we now mark them as changed. The rest of the processing will887// figure out how to render them appropritely.888889$new = $this->getNewLines();890$old = $this->getOldLines();891foreach ($old as $key => $o) {892$n = $new[$key];893894if (!$o || !$n) {895continue;896}897898if ($o['type'] === null && $n['type'] === null) {899if ($o['text'] !== $n['text']) {900$old[$key]['type'] = '-';901$new[$key]['type'] = '+';902}903}904}905906$this->setOldLines($old);907$this->setNewLines($new);908}909910911}912913914