Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/phabricator
Path: blob/master/src/applications/diviner/workflow/DivinerGenerateWorkflow.php
12256 views
1
<?php
2
3
final class DivinerGenerateWorkflow extends DivinerWorkflow {
4
5
private $atomCache;
6
7
protected function didConstruct() {
8
$this
9
->setName('generate')
10
->setSynopsis(pht('Generate documentation.'))
11
->setArguments(
12
array(
13
array(
14
'name' => 'clean',
15
'help' => pht('Clear the caches before generating documentation.'),
16
),
17
array(
18
'name' => 'book',
19
'param' => 'path',
20
'help' => pht('Path to a Diviner book configuration.'),
21
),
22
array(
23
'name' => 'publisher',
24
'param' => 'class',
25
'help' => pht('Specify a subclass of %s.', 'DivinerPublisher'),
26
'default' => 'DivinerLivePublisher',
27
),
28
array(
29
'name' => 'repository',
30
'param' => 'identifier',
31
'help' => pht('Repository that the documentation belongs to.'),
32
),
33
));
34
}
35
36
protected function getAtomCache() {
37
if (!$this->atomCache) {
38
$book_root = $this->getConfig('root');
39
$book_name = $this->getConfig('name');
40
$cache_directory = $book_root.'/.divinercache/'.$book_name;
41
$this->atomCache = new DivinerAtomCache($cache_directory);
42
}
43
return $this->atomCache;
44
}
45
46
protected function log($message) {
47
$console = PhutilConsole::getConsole();
48
$console->writeErr($message."\n");
49
}
50
51
public function execute(PhutilArgumentParser $args) {
52
$book = $args->getArg('book');
53
if ($book) {
54
$books = array($book);
55
} else {
56
$cwd = getcwd();
57
$this->log(pht('FINDING DOCUMENTATION BOOKS'));
58
59
$books = id(new FileFinder($cwd))
60
->withType('f')
61
->withSuffix('book')
62
->find();
63
64
if (!$books) {
65
throw new PhutilArgumentUsageException(
66
pht(
67
"There are no Diviner '%s' files anywhere beneath the current ".
68
"directory. Use '%s' to specify a documentation book to generate.",
69
'.book',
70
'--book <book>'));
71
} else {
72
$this->log(pht('Found %s book(s).', phutil_count($books)));
73
}
74
}
75
76
foreach ($books as $book) {
77
$short_name = basename($book);
78
79
$this->log(pht('Generating book "%s"...', $short_name));
80
$this->generateBook($book, $args);
81
$this->log(pht('Completed generation of "%s".', $short_name)."\n");
82
}
83
}
84
85
private function generateBook($book, PhutilArgumentParser $args) {
86
$this->atomCache = null;
87
88
$this->readBookConfiguration($book);
89
90
if ($args->getArg('clean')) {
91
$this->log(pht('CLEARING CACHES'));
92
$this->getAtomCache()->delete();
93
$this->log(pht('Done.')."\n");
94
}
95
96
// The major challenge of documentation generation is one of dependency
97
// management. When regenerating documentation, we want to do the smallest
98
// amount of work we can, so that regenerating documentation after minor
99
// changes is quick.
100
//
101
// = Atom Cache =
102
//
103
// In the first stage, we find all the direct changes to source code since
104
// the last run. This stage relies on two data structures:
105
//
106
// - File Hash Map: `map<file_hash, node_hash>`
107
// - Atom Map: `map<node_hash, true>`
108
//
109
// First, we hash all the source files in the project to detect any which
110
// have changed since the previous run (i.e., their hash is not present in
111
// the File Hash Map). If a file's content hash appears in the map, it has
112
// not changed, so we don't need to reparse it.
113
//
114
// We break the contents of each file into "atoms", which represent a unit
115
// of source code (like a function, method, class or file). Each atom has a
116
// "node hash" based on the content of the atom: if a function definition
117
// changes, the node hash of the atom changes too. The primary output of
118
// the atom cache is a list of node hashes which exist in the project. This
119
// is the Atom Map. The node hash depends only on the definition of the atom
120
// and the atomizer implementation. It ends with an "N", for "node".
121
//
122
// (We need the Atom Map in addition to the File Hash Map because each file
123
// may have several atoms in it (e.g., multiple functions, or a class and
124
// its methods). The File Hash Map contains an exhaustive list of all atoms
125
// with type "file", but not child atoms of those top-level atoms.)
126
//
127
// = Graph Cache =
128
//
129
// We now know which atoms exist, and can compare the Atom Map to some
130
// existing cache to figure out what has changed. However, this isn't
131
// sufficient to figure out which documentation actually needs to be
132
// regenerated, because atoms depend on other atoms. For example, if `B
133
// extends A` and the definition for `A` changes, we need to regenerate the
134
// documentation in `B`. Similarly, if `X` links to `Y` and `Y` changes, we
135
// should regenerate `X`. (In both these cases, the documentation for the
136
// connected atom may not actually change, but in some cases it will, and
137
// the extra work we need to do is generally very small compared to the
138
// size of the project.)
139
//
140
// To figure out which other nodes have changed, we compute a "graph hash"
141
// for each node. This hash combines the "node hash" with the node hashes
142
// of connected nodes. Our primary output is a list of graph hashes, which
143
// a documentation generator can use to easily determine what work needs
144
// to be done by comparing the list with a list of cached graph hashes,
145
// then generating documentation for new hashes and deleting documentation
146
// for missing hashes. The graph hash ends with a "G", for "graph".
147
//
148
// In this stage, we rely on three data structures:
149
//
150
// - Symbol Map: `map<node_hash, symbol_hash>`
151
// - Edge Map: `map<node_hash, list<symbol_hash>>`
152
// - Graph Map: `map<node_hash, graph_hash>`
153
//
154
// Calculating the graph hash requires several steps, because we need to
155
// figure out which nodes an atom is attached to. The atom contains symbolic
156
// references to other nodes by name (e.g., `extends SomeClass`) in the form
157
// of @{class:DivinerAtomRefs}. We can also build a symbolic reference for
158
// any atom from the atom itself. Each @{class:DivinerAtomRef} generates a
159
// symbol hash, which ends with an "S", for "symbol".
160
//
161
// First, we update the symbol map. We remove (and mark dirty) any symbols
162
// associated with node hashes which no longer exist (e.g., old/dead nodes).
163
// Second, we add (and mark dirty) any symbols associated with new nodes.
164
// We also add edges defined by new nodes to the graph.
165
//
166
// We initialize a list of dirty nodes to the list of new nodes, then find
167
// all nodes connected to dirty symbols and add them to the dirty node list.
168
// This list now contains every node with a new or changed graph hash.
169
//
170
// We walk the dirty list and compute the new graph hashes, adding them
171
// to the graph hash map. This Graph Map can then be passed to an actual
172
// documentation generator, which can compare the graph hashes to a list
173
// of already-generated graph hashes and easily assess which documents need
174
// to be regenerated and which can be deleted.
175
176
$this->buildAtomCache();
177
$this->buildGraphCache();
178
179
$publisher_class = $args->getArg('publisher');
180
$symbols = id(new PhutilSymbolLoader())
181
->setName($publisher_class)
182
->setConcreteOnly(true)
183
->setAncestorClass('DivinerPublisher')
184
->selectAndLoadSymbols();
185
186
if (!$symbols) {
187
throw new PhutilArgumentUsageException(
188
pht(
189
"Publisher class '%s' must be a concrete subclass of %s.",
190
$publisher_class,
191
'DivinerPublisher'));
192
}
193
$publisher = newv($publisher_class, array());
194
195
$identifier = $args->getArg('repository');
196
$repository = null;
197
if ($identifier !== null && strlen($identifier)) {
198
$repository = id(new PhabricatorRepositoryQuery())
199
->setViewer(PhabricatorUser::getOmnipotentUser())
200
->withIdentifiers(array($identifier))
201
->executeOne();
202
203
if (!$repository) {
204
throw new PhutilArgumentUsageException(
205
pht(
206
'Repository "%s" does not exist.',
207
$identifier));
208
}
209
210
$publisher->setRepositoryPHID($repository->getPHID());
211
}
212
213
$this->publishDocumentation($args->getArg('clean'), $publisher);
214
}
215
216
217
/* -( Atom Cache )--------------------------------------------------------- */
218
219
220
private function buildAtomCache() {
221
$this->log(pht('BUILDING ATOM CACHE'));
222
223
$file_hashes = $this->findFilesInProject();
224
$this->log(
225
pht(
226
'Found %s file(s) in project.',
227
phutil_count($file_hashes)));
228
229
$this->deleteDeadAtoms($file_hashes);
230
$atomize = $this->getFilesToAtomize($file_hashes);
231
$this->log(
232
pht(
233
'Found %s unatomized, uncached file(s).',
234
phutil_count($atomize)));
235
236
$file_atomizers = $this->getAtomizersForFiles($atomize);
237
$this->log(
238
pht(
239
'Found %s file(s) to atomize.',
240
phutil_count($file_atomizers)));
241
242
$futures = $this->buildAtomizerFutures($file_atomizers);
243
$this->log(
244
pht(
245
'Atomizing %s file(s).',
246
phutil_count($file_atomizers)));
247
248
if ($futures) {
249
$this->resolveAtomizerFutures($futures, $file_hashes);
250
$this->log(pht('Atomization complete.'));
251
} else {
252
$this->log(pht('Atom cache is up to date, no files to atomize.'));
253
}
254
255
$this->log(pht('Writing atom cache.'));
256
$this->getAtomCache()->saveAtoms();
257
$this->log(pht('Done.')."\n");
258
}
259
260
private function getAtomizersForFiles(array $files) {
261
$rules = $this->getRules();
262
$exclude = $this->getExclude();
263
$atomizers = array();
264
265
foreach ($files as $file) {
266
foreach ($exclude as $pattern) {
267
if (preg_match($pattern, $file)) {
268
continue 2;
269
}
270
}
271
272
foreach ($rules as $rule => $atomizer) {
273
$ok = preg_match($rule, $file);
274
if ($ok === false) {
275
throw new Exception(
276
pht("Rule '%s' is not a valid regular expression.", $rule));
277
}
278
if ($ok) {
279
$atomizers[$file] = $atomizer;
280
continue;
281
}
282
}
283
}
284
285
return $atomizers;
286
}
287
288
private function getRules() {
289
return $this->getConfig('rules', array(
290
'/\\.diviner$/' => 'DivinerArticleAtomizer',
291
'/\\.php$/' => 'DivinerPHPAtomizer',
292
));
293
}
294
295
private function getExclude() {
296
$exclude = (array)$this->getConfig('exclude', array());
297
return $exclude;
298
}
299
300
private function findFilesInProject() {
301
$raw_hashes = id(new FileFinder($this->getConfig('root')))
302
->excludePath('*/.*')
303
->withType('f')
304
->setGenerateChecksums(true)
305
->find();
306
307
$version = $this->getDivinerAtomWorldVersion();
308
309
$file_hashes = array();
310
foreach ($raw_hashes as $file => $md5_hash) {
311
$rel_file = Filesystem::readablePath($file, $this->getConfig('root'));
312
// We want the hash to change if the file moves or Diviner gets updated,
313
// not just if the file content changes. Derive a hash from everything
314
// we care about.
315
$file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F';
316
}
317
318
return $file_hashes;
319
}
320
321
private function deleteDeadAtoms(array $file_hashes) {
322
$atom_cache = $this->getAtomCache();
323
324
$hash_to_file = array_flip($file_hashes);
325
foreach ($atom_cache->getFileHashMap() as $hash => $atom) {
326
if (empty($hash_to_file[$hash])) {
327
$atom_cache->deleteFileHash($hash);
328
}
329
}
330
}
331
332
private function getFilesToAtomize(array $file_hashes) {
333
$atom_cache = $this->getAtomCache();
334
335
$atomize = array();
336
foreach ($file_hashes as $file => $hash) {
337
if (!$atom_cache->fileHashExists($hash)) {
338
$atomize[] = $file;
339
}
340
}
341
342
return $atomize;
343
}
344
345
private function buildAtomizerFutures(array $file_atomizers) {
346
$atomizers = array();
347
foreach ($file_atomizers as $file => $atomizer) {
348
$atomizers[$atomizer][] = $file;
349
}
350
351
$root = dirname(phutil_get_library_root('phabricator'));
352
$config_root = $this->getConfig('root');
353
354
$bar = id(new PhutilConsoleProgressBar())
355
->setTotal(count($file_atomizers));
356
357
$futures = array();
358
foreach ($atomizers as $class => $files) {
359
foreach (array_chunk($files, 32) as $chunk) {
360
$future = new ExecFuture(
361
'%s atomize --ugly --book %s --atomizer %s -- %Ls',
362
$root.'/bin/diviner',
363
$this->getBookConfigPath(),
364
$class,
365
$chunk);
366
$future->setCWD($config_root);
367
368
$futures[] = $future;
369
370
$bar->update(count($chunk));
371
}
372
}
373
374
$bar->done();
375
376
return $futures;
377
}
378
379
private function resolveAtomizerFutures(array $futures, array $file_hashes) {
380
assert_instances_of($futures, 'Future');
381
382
$atom_cache = $this->getAtomCache();
383
$bar = id(new PhutilConsoleProgressBar())
384
->setTotal(count($futures));
385
$futures = id(new FutureIterator($futures))
386
->limit(4);
387
388
foreach ($futures as $key => $future) {
389
try {
390
$atoms = $future->resolveJSON();
391
392
foreach ($atoms as $atom) {
393
if ($atom['type'] == DivinerAtom::TYPE_FILE) {
394
$file_hash = $file_hashes[$atom['file']];
395
$atom_cache->addFileHash($file_hash, $atom['hash']);
396
}
397
$atom_cache->addAtom($atom);
398
}
399
} catch (Exception $e) {
400
phlog($e);
401
}
402
403
$bar->update(1);
404
}
405
$bar->done();
406
}
407
408
/**
409
* Get a global version number, which changes whenever any atom or atomizer
410
* implementation changes in a way which is not backward-compatible.
411
*/
412
private function getDivinerAtomWorldVersion() {
413
$version = array();
414
$version['atom'] = DivinerAtom::getAtomSerializationVersion();
415
$version['rules'] = $this->getRules();
416
417
$atomizers = id(new PhutilClassMapQuery())
418
->setAncestorClass('DivinerAtomizer')
419
->execute();
420
421
$atomizer_versions = array();
422
foreach ($atomizers as $atomizer) {
423
$name = get_class($atomizer);
424
$atomizer_versions[$name] = call_user_func(
425
array(
426
$name,
427
'getAtomizerVersion',
428
));
429
}
430
431
ksort($atomizer_versions);
432
$version['atomizers'] = $atomizer_versions;
433
434
return md5(serialize($version));
435
}
436
437
438
/* -( Graph Cache )-------------------------------------------------------- */
439
440
441
private function buildGraphCache() {
442
$this->log(pht('BUILDING GRAPH CACHE'));
443
444
$atom_cache = $this->getAtomCache();
445
$symbol_map = $atom_cache->getSymbolMap();
446
$atoms = $atom_cache->getAtomMap();
447
448
$dirty_symbols = array();
449
$dirty_nhashes = array();
450
451
$del_atoms = array_diff_key($symbol_map, $atoms);
452
$this->log(
453
pht(
454
'Found %s obsolete atom(s) in graph.',
455
phutil_count($del_atoms)));
456
457
foreach ($del_atoms as $nhash => $shash) {
458
$atom_cache->deleteSymbol($nhash);
459
$dirty_symbols[$shash] = true;
460
461
$atom_cache->deleteEdges($nhash);
462
$atom_cache->deleteGraph($nhash);
463
}
464
465
$new_atoms = array_diff_key($atoms, $symbol_map);
466
$this->log(
467
pht(
468
'Found %s new atom(s) in graph.',
469
phutil_count($new_atoms)));
470
471
foreach ($new_atoms as $nhash => $ignored) {
472
$shash = $this->computeSymbolHash($nhash);
473
$atom_cache->addSymbol($nhash, $shash);
474
$dirty_symbols[$shash] = true;
475
476
$atom_cache->addEdges($nhash, $this->getEdges($nhash));
477
478
$dirty_nhashes[$nhash] = true;
479
}
480
481
$this->log(pht('Propagating changes through the graph.'));
482
483
// Find all the nodes which point at a dirty node, and dirty them. Then
484
// find all the nodes which point at those nodes and dirty them, and so
485
// on. (This is slightly overkill since we probably don't need to propagate
486
// dirtiness across documentation "links" between symbols, but we do want
487
// to propagate it across "extends", and we suffer only a little bit of
488
// collateral damage by over-dirtying as long as the documentation isn't
489
// too well-connected.)
490
491
$symbol_stack = array_keys($dirty_symbols);
492
while ($symbol_stack) {
493
$symbol_hash = array_pop($symbol_stack);
494
495
foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) {
496
$dirty_nhashes[$edge] = true;
497
$src_hash = $this->computeSymbolHash($edge);
498
if (empty($dirty_symbols[$src_hash])) {
499
$dirty_symbols[$src_hash] = true;
500
$symbol_stack[] = $src_hash;
501
}
502
}
503
}
504
505
$this->log(
506
pht(
507
'Found %s affected atoms.',
508
phutil_count($dirty_nhashes)));
509
510
foreach ($dirty_nhashes as $nhash => $ignored) {
511
$atom_cache->addGraph($nhash, $this->computeGraphHash($nhash));
512
}
513
514
$this->log(pht('Writing graph cache.'));
515
516
$atom_cache->saveGraph();
517
$atom_cache->saveEdges();
518
$atom_cache->saveSymbols();
519
520
$this->log(pht('Done.')."\n");
521
}
522
523
private function computeSymbolHash($node_hash) {
524
$atom_cache = $this->getAtomCache();
525
$atom = $atom_cache->getAtom($node_hash);
526
527
if (!$atom) {
528
throw new Exception(
529
pht("No such atom with node hash '%s'!", $node_hash));
530
}
531
532
$ref = DivinerAtomRef::newFromDictionary($atom['ref']);
533
return $ref->toHash();
534
}
535
536
private function getEdges($node_hash) {
537
$atom_cache = $this->getAtomCache();
538
$atom = $atom_cache->getAtom($node_hash);
539
540
$refs = array();
541
542
// Make the atom depend on its own symbol, so that all atoms with the same
543
// symbol are dirtied (e.g., if a codebase defines the function `f()`
544
// several times, all of them should be dirtied when one is dirtied).
545
$refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true;
546
547
foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) {
548
$ref = DivinerAtomRef::newFromDictionary($ref_dict);
549
if ($ref->getBook() == $atom['book']) {
550
$refs[$ref->toHash()] = true;
551
}
552
}
553
554
return array_keys($refs);
555
}
556
557
private function computeGraphHash($node_hash) {
558
$atom_cache = $this->getAtomCache();
559
$atom = $atom_cache->getAtom($node_hash);
560
561
$edges = $this->getEdges($node_hash);
562
sort($edges);
563
564
$inputs = array(
565
'atomHash' => $atom['hash'],
566
'edges' => $edges,
567
);
568
569
return md5(serialize($inputs)).'G';
570
}
571
572
private function publishDocumentation($clean, DivinerPublisher $publisher) {
573
$atom_cache = $this->getAtomCache();
574
$graph_map = $atom_cache->getGraphMap();
575
576
$this->log(pht('PUBLISHING DOCUMENTATION'));
577
578
$publisher
579
->setDropCaches($clean)
580
->setConfig($this->getAllConfig())
581
->setAtomCache($atom_cache)
582
->setRenderer(new DivinerDefaultRenderer())
583
->publishAtoms(array_values($graph_map));
584
585
$this->log(pht('Done.'));
586
}
587
588
}
589
590