Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
labmlai
GitHub Repository: labmlai/annotated_deep_learning_paper_implementations
Path: blob/master/utils/papers_list.py
4918 views
1
import json
2
import re
3
from pathlib import Path
4
5
from labml import logger
6
from labml.logger import Text
7
8
HOME = Path('./labml_nn').absolute()
9
print(HOME)
10
11
REGEX = re.compile(r"""
12
\(
13
https://arxiv\.org/abs/ # Start of a numeric entity reference
14
(?P<id>[0-9\.]+) # Paper ID
15
\)
16
""", re.VERBOSE)
17
18
IGNORE = {
19
'neox/model.html',
20
'transformers/index.html',
21
'transformers/configs.html',
22
'optimizers/noam.html',
23
'transformers/basic/autoregressive_experiment.html',
24
'transformers/xl/relative_mha.html',
25
'capsule_networks/mnist.html',
26
'transformers/rope/value_pe/index.html',
27
}
28
29
IGNORE_PAPERS = {
30
'2002.04745', # On Layer Normalization in the Transformer Architecture
31
'1606.08415', # Gaussian Error Linear Units (GELUs)
32
'1710.10196', # Progressive Growing of GANs for Improved Quality, Stability, and Variation
33
'1904.11486', # Making Convolutional Networks Shift-Invariant Again
34
'1801.04406', # Which Training Methods for GANs do actually Converge?
35
'1812.04948', # A Style-Based Generator Architecture for Generative Adversarial Networks
36
'1705.10528', # Constrained Policy Optimization
37
}
38
39
40
def collect(path: Path):
41
if path.is_file():
42
html = path.relative_to(HOME)
43
if html.suffix not in {'.py'}:
44
return []
45
46
if html.stem == '__init__':
47
html = html.parent / 'index.html'
48
else:
49
html = html.parent / f'{html.stem}.html'
50
51
if str(html) in IGNORE:
52
return []
53
54
with open(str(path), 'r') as f:
55
contents = f.read()
56
papers = set()
57
for m in REGEX.finditer(contents):
58
if m.group('id') in IGNORE_PAPERS:
59
continue
60
papers.add(m.group('id'))
61
62
if len(papers) > 1:
63
logger.log([(str(html), Text.key), ': ', str(papers)])
64
return [{'url': str(html), 'arxiv_id': p} for p in papers]
65
66
urls = []
67
for f in path.iterdir():
68
urls += collect(f)
69
70
return urls
71
72
73
def main():
74
papers = []
75
for f in HOME.iterdir():
76
papers += collect(f)
77
78
papers.sort(key=lambda p: p['arxiv_id'])
79
80
by_id = {}
81
for p in papers:
82
if p['arxiv_id'] not in by_id:
83
by_id[p['arxiv_id']] = []
84
by_id[p['arxiv_id']].append(f'''https://nn.labml.ai/{p['url']}''')
85
86
logger.log([('Papers', Text.key), ': ', f'{len(by_id) :,}'])
87
88
with open(str(HOME.parent / 'docs' / 'papers.json'), 'w') as f:
89
f.write(json.dumps(by_id, indent=1))
90
91
92
if __name__ == '__main__':
93
main()
94
95