Path: blob/master/src/applications/differential/engine/DifferentialChangesetEngine.php
12256 views
<?php12final class DifferentialChangesetEngine extends Phobject {34private $viewer;56public function setViewer(PhabricatorUser $viewer) {7$this->viewer = $viewer;8return $this;9}1011public function getViewer() {12return $this->viewer;13}1415public function rebuildChangesets(array $changesets) {16assert_instances_of($changesets, 'DifferentialChangeset');1718$changesets = $this->loadChangesetFiles($changesets);1920foreach ($changesets as $changeset) {21$this->detectGeneratedCode($changeset);22$this->computeHashes($changeset);23}2425$this->detectCopiedCode($changesets);26}2728private function loadChangesetFiles(array $changesets) {29$viewer = $this->getViewer();3031$file_phids = array();32foreach ($changesets as $changeset) {33$file_phid = $changeset->getNewFileObjectPHID();34if ($file_phid !== null) {35$file_phids[] = $file_phid;36}37}3839if ($file_phids) {40$files = id(new PhabricatorFileQuery())41->setViewer($viewer)42->withPHIDs($file_phids)43->execute();44$files = mpull($files, null, 'getPHID');45} else {46$files = array();47}4849foreach ($changesets as $changeset_key => $changeset) {50$file_phid = $changeset->getNewFileObjectPHID();51if ($file_phid === null) {52continue;53}5455$file = idx($files, $file_phid);56if (!$file) {57unset($changesets[$changeset_key]);58continue;59}6061$changeset->attachNewFileObject($file);62}6364return $changesets;65}666768/* -( Generated Code )----------------------------------------------------- */697071private function detectGeneratedCode(DifferentialChangeset $changeset) {72$is_generated_trusted = $this->isTrustedGeneratedCode($changeset);73if ($is_generated_trusted) {74$changeset->setTrustedChangesetAttribute(75DifferentialChangeset::ATTRIBUTE_GENERATED,76$is_generated_trusted);77}7879$is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset);80if ($is_generated_untrusted) {81$changeset->setUntrustedChangesetAttribute(82DifferentialChangeset::ATTRIBUTE_GENERATED,83$is_generated_untrusted);84}85}8687private function isTrustedGeneratedCode(DifferentialChangeset $changeset) {8889$filename = $changeset->getFilename();9091$paths = PhabricatorEnv::getEnvConfig('differential.generated-paths');92foreach ($paths as $regexp) {93if (preg_match($regexp, $filename)) {94return true;95}96}9798return false;99}100101private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) {102103if ($changeset->getHunks()) {104$new_data = $changeset->makeNewFile();105if (strpos($new_data, '@'.'generated') !== false) {106return true;107}108109// See PHI1112. This is the official pattern for marking Go code as110// generated.111if (preg_match('(^// Code generated .* DO NOT EDIT\.$)m', $new_data)) {112return true;113}114}115116return false;117}118119120/* -( Content Hashes )----------------------------------------------------- */121122123private function computeHashes(DifferentialChangeset $changeset) {124125$effect_key = DifferentialChangeset::METADATA_EFFECT_HASH;126127$effect_hash = $this->newEffectHash($changeset);128if ($effect_hash !== null) {129$changeset->setChangesetMetadata($effect_key, $effect_hash);130}131}132133private function newEffectHash(DifferentialChangeset $changeset) {134135if ($changeset->getHunks()) {136$new_data = $changeset->makeNewFile();137return PhabricatorHash::digestForIndex($new_data);138}139140if ($changeset->getNewFileObjectPHID()) {141$file = $changeset->getNewFileObject();142143// See T13522. For now, the "contentHash" is not really a content hash144// for files >4MB. This is okay: we will just always detect them as145// changed, which is the safer behavior.146147$hash = $file->getContentHash();148if ($hash !== null) {149$hash = sprintf('file-hash:%s', $hash);150return PhabricatorHash::digestForIndex($hash);151}152}153154return null;155}156157158/* -( Copied Code )-------------------------------------------------------- */159160161private function detectCopiedCode(array $changesets) {162// See PHI944. If the total number of changed lines is excessively large,163// don't bother with copied code detection. This can take a lot of time and164// memory and it's not generally of any use for very large changes.165$max_size = 65535;166167$total_size = 0;168foreach ($changesets as $changeset) {169$total_size += ($changeset->getAddLines() + $changeset->getDelLines());170}171172if ($total_size > $max_size) {173return;174}175176$min_width = 30;177$min_lines = 3;178179$map = array();180$files = array();181$types = array();182foreach ($changesets as $changeset) {183$file = $changeset->getFilename();184foreach ($changeset->getHunks() as $hunk) {185$lines = $hunk->getStructuredOldFile();186foreach ($lines as $line => $info) {187$type = $info['type'];188if ($type == '\\') {189continue;190}191$types[$file][$line] = $type;192193$text = $info['text'];194$text = trim($text);195$files[$file][$line] = $text;196197if (strlen($text) >= $min_width) {198$map[$text][] = array($file, $line);199}200}201}202}203204foreach ($changesets as $changeset) {205$copies = array();206foreach ($changeset->getHunks() as $hunk) {207$added = $hunk->getStructuredNewFile();208$atype = array();209210foreach ($added as $line => $info) {211$atype[$line] = $info['type'];212$added[$line] = trim($info['text']);213}214215$skip_lines = 0;216foreach ($added as $line => $code) {217if ($skip_lines) {218// We're skipping lines that we already processed because we219// extended a block above them downward to include them.220$skip_lines--;221continue;222}223224if ($atype[$line] !== '+') {225// This line hasn't been changed in the new file, so don't try226// to figure out where it came from.227continue;228}229230if (empty($map[$code])) {231// This line was too short to trigger copy/move detection.232continue;233}234235if (count($map[$code]) > 16) {236// If there are a large number of identical lines in this diff,237// don't try to figure out where this block came from: the analysis238// is O(N^2), since we need to compare every line against every239// other line. Even if we arrive at a result, it is unlikely to be240// meaningful. See T5041.241continue;242}243244$best_length = 0;245246// Explore all candidates.247foreach ($map[$code] as $val) {248list($file, $orig_line) = $val;249$length = 1;250251// Search backward and forward to find all of the adjacent lines252// which match.253foreach (array(-1, 1) as $direction) {254$offset = $direction;255while (true) {256if (isset($copies[$line + $offset])) {257// If we run into a block above us which we've already258// attributed to a move or copy from elsewhere, stop259// looking.260break;261}262263if (!isset($added[$line + $offset])) {264// If we've run off the beginning or end of the new file,265// stop looking.266break;267}268269if (!isset($files[$file][$orig_line + $offset])) {270// If we've run off the beginning or end of the original271// file, we also stop looking.272break;273}274275$old = $files[$file][$orig_line + $offset];276$new = $added[$line + $offset];277if ($old !== $new) {278// If the old line doesn't match the new line, stop279// looking.280break;281}282283$length++;284$offset += $direction;285}286}287288if ($length < $best_length) {289// If we already know of a better source (more matching lines)290// for this move/copy, stick with that one. We prefer long291// copies/moves which match a lot of context over short ones.292continue;293}294295if ($length == $best_length) {296if (idx($types[$file], $orig_line) != '-') {297// If we already know of an equally good source (same number298// of matching lines) and this isn't a move, stick with the299// other one. We prefer moves over copies.300continue;301}302}303304$best_length = $length;305// ($offset - 1) contains number of forward matching lines.306$best_offset = $offset - 1;307$best_file = $file;308$best_line = $orig_line;309}310311$file = ($best_file == $changeset->getFilename() ? '' : $best_file);312for ($i = $best_length; $i--; ) {313$type = idx($types[$best_file], $best_line + $best_offset - $i);314$copies[$line + $best_offset - $i] = ($best_length < $min_lines315? array() // Ignore short blocks.316: array($file, $best_line + $best_offset - $i, $type));317}318319$skip_lines = $best_offset;320}321}322323$copies = array_filter($copies);324if ($copies) {325$metadata = $changeset->getMetadata();326$metadata['copy:lines'] = $copies;327$changeset->setMetadata($metadata);328}329}330331}332333}334335336