Path: blob/master/src/applications/diviner/workflow/DivinerGenerateWorkflow.php
12256 views
<?php12final class DivinerGenerateWorkflow extends DivinerWorkflow {34private $atomCache;56protected function didConstruct() {7$this8->setName('generate')9->setSynopsis(pht('Generate documentation.'))10->setArguments(11array(12array(13'name' => 'clean',14'help' => pht('Clear the caches before generating documentation.'),15),16array(17'name' => 'book',18'param' => 'path',19'help' => pht('Path to a Diviner book configuration.'),20),21array(22'name' => 'publisher',23'param' => 'class',24'help' => pht('Specify a subclass of %s.', 'DivinerPublisher'),25'default' => 'DivinerLivePublisher',26),27array(28'name' => 'repository',29'param' => 'identifier',30'help' => pht('Repository that the documentation belongs to.'),31),32));33}3435protected function getAtomCache() {36if (!$this->atomCache) {37$book_root = $this->getConfig('root');38$book_name = $this->getConfig('name');39$cache_directory = $book_root.'/.divinercache/'.$book_name;40$this->atomCache = new DivinerAtomCache($cache_directory);41}42return $this->atomCache;43}4445protected function log($message) {46$console = PhutilConsole::getConsole();47$console->writeErr($message."\n");48}4950public function execute(PhutilArgumentParser $args) {51$book = $args->getArg('book');52if ($book) {53$books = array($book);54} else {55$cwd = getcwd();56$this->log(pht('FINDING DOCUMENTATION BOOKS'));5758$books = id(new FileFinder($cwd))59->withType('f')60->withSuffix('book')61->find();6263if (!$books) {64throw new PhutilArgumentUsageException(65pht(66"There are no Diviner '%s' files anywhere beneath the current ".67"directory. Use '%s' to specify a documentation book to generate.",68'.book',69'--book <book>'));70} else {71$this->log(pht('Found %s book(s).', phutil_count($books)));72}73}7475foreach ($books as $book) {76$short_name = basename($book);7778$this->log(pht('Generating book "%s"...', $short_name));79$this->generateBook($book, $args);80$this->log(pht('Completed generation of "%s".', $short_name)."\n");81}82}8384private function generateBook($book, PhutilArgumentParser $args) {85$this->atomCache = null;8687$this->readBookConfiguration($book);8889if ($args->getArg('clean')) {90$this->log(pht('CLEARING CACHES'));91$this->getAtomCache()->delete();92$this->log(pht('Done.')."\n");93}9495// The major challenge of documentation generation is one of dependency96// management. When regenerating documentation, we want to do the smallest97// amount of work we can, so that regenerating documentation after minor98// changes is quick.99//100// = Atom Cache =101//102// In the first stage, we find all the direct changes to source code since103// the last run. This stage relies on two data structures:104//105// - File Hash Map: `map<file_hash, node_hash>`106// - Atom Map: `map<node_hash, true>`107//108// First, we hash all the source files in the project to detect any which109// have changed since the previous run (i.e., their hash is not present in110// the File Hash Map). If a file's content hash appears in the map, it has111// not changed, so we don't need to reparse it.112//113// We break the contents of each file into "atoms", which represent a unit114// of source code (like a function, method, class or file). Each atom has a115// "node hash" based on the content of the atom: if a function definition116// changes, the node hash of the atom changes too. The primary output of117// the atom cache is a list of node hashes which exist in the project. This118// is the Atom Map. The node hash depends only on the definition of the atom119// and the atomizer implementation. It ends with an "N", for "node".120//121// (We need the Atom Map in addition to the File Hash Map because each file122// may have several atoms in it (e.g., multiple functions, or a class and123// its methods). The File Hash Map contains an exhaustive list of all atoms124// with type "file", but not child atoms of those top-level atoms.)125//126// = Graph Cache =127//128// We now know which atoms exist, and can compare the Atom Map to some129// existing cache to figure out what has changed. However, this isn't130// sufficient to figure out which documentation actually needs to be131// regenerated, because atoms depend on other atoms. For example, if `B132// extends A` and the definition for `A` changes, we need to regenerate the133// documentation in `B`. Similarly, if `X` links to `Y` and `Y` changes, we134// should regenerate `X`. (In both these cases, the documentation for the135// connected atom may not actually change, but in some cases it will, and136// the extra work we need to do is generally very small compared to the137// size of the project.)138//139// To figure out which other nodes have changed, we compute a "graph hash"140// for each node. This hash combines the "node hash" with the node hashes141// of connected nodes. Our primary output is a list of graph hashes, which142// a documentation generator can use to easily determine what work needs143// to be done by comparing the list with a list of cached graph hashes,144// then generating documentation for new hashes and deleting documentation145// for missing hashes. The graph hash ends with a "G", for "graph".146//147// In this stage, we rely on three data structures:148//149// - Symbol Map: `map<node_hash, symbol_hash>`150// - Edge Map: `map<node_hash, list<symbol_hash>>`151// - Graph Map: `map<node_hash, graph_hash>`152//153// Calculating the graph hash requires several steps, because we need to154// figure out which nodes an atom is attached to. The atom contains symbolic155// references to other nodes by name (e.g., `extends SomeClass`) in the form156// of @{class:DivinerAtomRefs}. We can also build a symbolic reference for157// any atom from the atom itself. Each @{class:DivinerAtomRef} generates a158// symbol hash, which ends with an "S", for "symbol".159//160// First, we update the symbol map. We remove (and mark dirty) any symbols161// associated with node hashes which no longer exist (e.g., old/dead nodes).162// Second, we add (and mark dirty) any symbols associated with new nodes.163// We also add edges defined by new nodes to the graph.164//165// We initialize a list of dirty nodes to the list of new nodes, then find166// all nodes connected to dirty symbols and add them to the dirty node list.167// This list now contains every node with a new or changed graph hash.168//169// We walk the dirty list and compute the new graph hashes, adding them170// to the graph hash map. This Graph Map can then be passed to an actual171// documentation generator, which can compare the graph hashes to a list172// of already-generated graph hashes and easily assess which documents need173// to be regenerated and which can be deleted.174175$this->buildAtomCache();176$this->buildGraphCache();177178$publisher_class = $args->getArg('publisher');179$symbols = id(new PhutilSymbolLoader())180->setName($publisher_class)181->setConcreteOnly(true)182->setAncestorClass('DivinerPublisher')183->selectAndLoadSymbols();184185if (!$symbols) {186throw new PhutilArgumentUsageException(187pht(188"Publisher class '%s' must be a concrete subclass of %s.",189$publisher_class,190'DivinerPublisher'));191}192$publisher = newv($publisher_class, array());193194$identifier = $args->getArg('repository');195$repository = null;196if ($identifier !== null && strlen($identifier)) {197$repository = id(new PhabricatorRepositoryQuery())198->setViewer(PhabricatorUser::getOmnipotentUser())199->withIdentifiers(array($identifier))200->executeOne();201202if (!$repository) {203throw new PhutilArgumentUsageException(204pht(205'Repository "%s" does not exist.',206$identifier));207}208209$publisher->setRepositoryPHID($repository->getPHID());210}211212$this->publishDocumentation($args->getArg('clean'), $publisher);213}214215216/* -( Atom Cache )--------------------------------------------------------- */217218219private function buildAtomCache() {220$this->log(pht('BUILDING ATOM CACHE'));221222$file_hashes = $this->findFilesInProject();223$this->log(224pht(225'Found %s file(s) in project.',226phutil_count($file_hashes)));227228$this->deleteDeadAtoms($file_hashes);229$atomize = $this->getFilesToAtomize($file_hashes);230$this->log(231pht(232'Found %s unatomized, uncached file(s).',233phutil_count($atomize)));234235$file_atomizers = $this->getAtomizersForFiles($atomize);236$this->log(237pht(238'Found %s file(s) to atomize.',239phutil_count($file_atomizers)));240241$futures = $this->buildAtomizerFutures($file_atomizers);242$this->log(243pht(244'Atomizing %s file(s).',245phutil_count($file_atomizers)));246247if ($futures) {248$this->resolveAtomizerFutures($futures, $file_hashes);249$this->log(pht('Atomization complete.'));250} else {251$this->log(pht('Atom cache is up to date, no files to atomize.'));252}253254$this->log(pht('Writing atom cache.'));255$this->getAtomCache()->saveAtoms();256$this->log(pht('Done.')."\n");257}258259private function getAtomizersForFiles(array $files) {260$rules = $this->getRules();261$exclude = $this->getExclude();262$atomizers = array();263264foreach ($files as $file) {265foreach ($exclude as $pattern) {266if (preg_match($pattern, $file)) {267continue 2;268}269}270271foreach ($rules as $rule => $atomizer) {272$ok = preg_match($rule, $file);273if ($ok === false) {274throw new Exception(275pht("Rule '%s' is not a valid regular expression.", $rule));276}277if ($ok) {278$atomizers[$file] = $atomizer;279continue;280}281}282}283284return $atomizers;285}286287private function getRules() {288return $this->getConfig('rules', array(289'/\\.diviner$/' => 'DivinerArticleAtomizer',290'/\\.php$/' => 'DivinerPHPAtomizer',291));292}293294private function getExclude() {295$exclude = (array)$this->getConfig('exclude', array());296return $exclude;297}298299private function findFilesInProject() {300$raw_hashes = id(new FileFinder($this->getConfig('root')))301->excludePath('*/.*')302->withType('f')303->setGenerateChecksums(true)304->find();305306$version = $this->getDivinerAtomWorldVersion();307308$file_hashes = array();309foreach ($raw_hashes as $file => $md5_hash) {310$rel_file = Filesystem::readablePath($file, $this->getConfig('root'));311// We want the hash to change if the file moves or Diviner gets updated,312// not just if the file content changes. Derive a hash from everything313// we care about.314$file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F';315}316317return $file_hashes;318}319320private function deleteDeadAtoms(array $file_hashes) {321$atom_cache = $this->getAtomCache();322323$hash_to_file = array_flip($file_hashes);324foreach ($atom_cache->getFileHashMap() as $hash => $atom) {325if (empty($hash_to_file[$hash])) {326$atom_cache->deleteFileHash($hash);327}328}329}330331private function getFilesToAtomize(array $file_hashes) {332$atom_cache = $this->getAtomCache();333334$atomize = array();335foreach ($file_hashes as $file => $hash) {336if (!$atom_cache->fileHashExists($hash)) {337$atomize[] = $file;338}339}340341return $atomize;342}343344private function buildAtomizerFutures(array $file_atomizers) {345$atomizers = array();346foreach ($file_atomizers as $file => $atomizer) {347$atomizers[$atomizer][] = $file;348}349350$root = dirname(phutil_get_library_root('phabricator'));351$config_root = $this->getConfig('root');352353$bar = id(new PhutilConsoleProgressBar())354->setTotal(count($file_atomizers));355356$futures = array();357foreach ($atomizers as $class => $files) {358foreach (array_chunk($files, 32) as $chunk) {359$future = new ExecFuture(360'%s atomize --ugly --book %s --atomizer %s -- %Ls',361$root.'/bin/diviner',362$this->getBookConfigPath(),363$class,364$chunk);365$future->setCWD($config_root);366367$futures[] = $future;368369$bar->update(count($chunk));370}371}372373$bar->done();374375return $futures;376}377378private function resolveAtomizerFutures(array $futures, array $file_hashes) {379assert_instances_of($futures, 'Future');380381$atom_cache = $this->getAtomCache();382$bar = id(new PhutilConsoleProgressBar())383->setTotal(count($futures));384$futures = id(new FutureIterator($futures))385->limit(4);386387foreach ($futures as $key => $future) {388try {389$atoms = $future->resolveJSON();390391foreach ($atoms as $atom) {392if ($atom['type'] == DivinerAtom::TYPE_FILE) {393$file_hash = $file_hashes[$atom['file']];394$atom_cache->addFileHash($file_hash, $atom['hash']);395}396$atom_cache->addAtom($atom);397}398} catch (Exception $e) {399phlog($e);400}401402$bar->update(1);403}404$bar->done();405}406407/**408* Get a global version number, which changes whenever any atom or atomizer409* implementation changes in a way which is not backward-compatible.410*/411private function getDivinerAtomWorldVersion() {412$version = array();413$version['atom'] = DivinerAtom::getAtomSerializationVersion();414$version['rules'] = $this->getRules();415416$atomizers = id(new PhutilClassMapQuery())417->setAncestorClass('DivinerAtomizer')418->execute();419420$atomizer_versions = array();421foreach ($atomizers as $atomizer) {422$name = get_class($atomizer);423$atomizer_versions[$name] = call_user_func(424array(425$name,426'getAtomizerVersion',427));428}429430ksort($atomizer_versions);431$version['atomizers'] = $atomizer_versions;432433return md5(serialize($version));434}435436437/* -( Graph Cache )-------------------------------------------------------- */438439440private function buildGraphCache() {441$this->log(pht('BUILDING GRAPH CACHE'));442443$atom_cache = $this->getAtomCache();444$symbol_map = $atom_cache->getSymbolMap();445$atoms = $atom_cache->getAtomMap();446447$dirty_symbols = array();448$dirty_nhashes = array();449450$del_atoms = array_diff_key($symbol_map, $atoms);451$this->log(452pht(453'Found %s obsolete atom(s) in graph.',454phutil_count($del_atoms)));455456foreach ($del_atoms as $nhash => $shash) {457$atom_cache->deleteSymbol($nhash);458$dirty_symbols[$shash] = true;459460$atom_cache->deleteEdges($nhash);461$atom_cache->deleteGraph($nhash);462}463464$new_atoms = array_diff_key($atoms, $symbol_map);465$this->log(466pht(467'Found %s new atom(s) in graph.',468phutil_count($new_atoms)));469470foreach ($new_atoms as $nhash => $ignored) {471$shash = $this->computeSymbolHash($nhash);472$atom_cache->addSymbol($nhash, $shash);473$dirty_symbols[$shash] = true;474475$atom_cache->addEdges($nhash, $this->getEdges($nhash));476477$dirty_nhashes[$nhash] = true;478}479480$this->log(pht('Propagating changes through the graph.'));481482// Find all the nodes which point at a dirty node, and dirty them. Then483// find all the nodes which point at those nodes and dirty them, and so484// on. (This is slightly overkill since we probably don't need to propagate485// dirtiness across documentation "links" between symbols, but we do want486// to propagate it across "extends", and we suffer only a little bit of487// collateral damage by over-dirtying as long as the documentation isn't488// too well-connected.)489490$symbol_stack = array_keys($dirty_symbols);491while ($symbol_stack) {492$symbol_hash = array_pop($symbol_stack);493494foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) {495$dirty_nhashes[$edge] = true;496$src_hash = $this->computeSymbolHash($edge);497if (empty($dirty_symbols[$src_hash])) {498$dirty_symbols[$src_hash] = true;499$symbol_stack[] = $src_hash;500}501}502}503504$this->log(505pht(506'Found %s affected atoms.',507phutil_count($dirty_nhashes)));508509foreach ($dirty_nhashes as $nhash => $ignored) {510$atom_cache->addGraph($nhash, $this->computeGraphHash($nhash));511}512513$this->log(pht('Writing graph cache.'));514515$atom_cache->saveGraph();516$atom_cache->saveEdges();517$atom_cache->saveSymbols();518519$this->log(pht('Done.')."\n");520}521522private function computeSymbolHash($node_hash) {523$atom_cache = $this->getAtomCache();524$atom = $atom_cache->getAtom($node_hash);525526if (!$atom) {527throw new Exception(528pht("No such atom with node hash '%s'!", $node_hash));529}530531$ref = DivinerAtomRef::newFromDictionary($atom['ref']);532return $ref->toHash();533}534535private function getEdges($node_hash) {536$atom_cache = $this->getAtomCache();537$atom = $atom_cache->getAtom($node_hash);538539$refs = array();540541// Make the atom depend on its own symbol, so that all atoms with the same542// symbol are dirtied (e.g., if a codebase defines the function `f()`543// several times, all of them should be dirtied when one is dirtied).544$refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true;545546foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) {547$ref = DivinerAtomRef::newFromDictionary($ref_dict);548if ($ref->getBook() == $atom['book']) {549$refs[$ref->toHash()] = true;550}551}552553return array_keys($refs);554}555556private function computeGraphHash($node_hash) {557$atom_cache = $this->getAtomCache();558$atom = $atom_cache->getAtom($node_hash);559560$edges = $this->getEdges($node_hash);561sort($edges);562563$inputs = array(564'atomHash' => $atom['hash'],565'edges' => $edges,566);567568return md5(serialize($inputs)).'G';569}570571private function publishDocumentation($clean, DivinerPublisher $publisher) {572$atom_cache = $this->getAtomCache();573$graph_map = $atom_cache->getGraphMap();574575$this->log(pht('PUBLISHING DOCUMENTATION'));576577$publisher578->setDropCaches($clean)579->setConfig($this->getAllConfig())580->setAtomCache($atom_cache)581->setRenderer(new DivinerDefaultRenderer())582->publishAtoms(array_values($graph_map));583584$this->log(pht('Done.'));585}586587}588589590