CoCalc -- add_galaxy_instance

GitHub Repository: galaxyproject/training-material
Path: blob/main/bin/add_galaxy_instance_badges.py
¹⁶⁷⁷ views
1
#!/usr/bin/env python
2
import argparse
3
import glob
4
from collections import defaultdict
5
import os
6
import re
7
import subprocess
8
import time
9
import yaml
10
DRY_RUN = False
11

12

13
def discover_trainings(topics_dir):
14
    """Auto-discover all topic metadata files."""
15
    for training_dir in glob.glob(os.path.join(topics_dir, '*')):
16
        metadata_file = os.path.join(training_dir, 'metadata.yaml')
17
        if not os.path.exists(metadata_file):
18
            continue
19

20
        with open(metadata_file, 'r') as handle:
21
            training_data = yaml.safe_load(handle)
22

23
        training = {
24
            'title': training_data['title'],
25
            'trainings': {},
26
        }
27

28
        for material in glob.glob(os.path.join(training_dir, 'tutorials', '*', 'tutorial.md')) + glob.glob(os.path.join(training_dir, 'tutorials', '*', 'slides.html')):
29
            with open(material, 'r') as handle:
30
                material_data = yaml.safe_load_all(handle)
31
                material_data = next(material_data)
32

33
            name = material.split('/')[-2]
34
            training['trainings'][name] = material_data['title']
35

36
        training['count'] = len(training['trainings'].keys())
37

38
        yield training_data['name'], training
39

40

41
def safe_name(server, dashes=True):
42
    """Make human strings 'safe' for usage in paths."""
43
    safe_name = re.sub('\s', '_', server)
44
    if dashes:
45
        safe_name = re.sub('[^A-Za-z0-9_-]', '_', safe_name)
46
    else:
47
        safe_name = re.sub('[^A-Za-z0-9_]', '_', safe_name)
48

49
    return server
50

51

52
def get_badge_path(label, value, color):
53
    """Return a string representing the expected badge filename. Returns something like 'Training Name|Supported' or 'Training Name|Unsupported'."""
54
    safe_label = label.replace('@', '%40').replace(' ', '%20').replace('-', '--').replace('/', '%2F')
55
    safe_value = value.replace('@', '%40').replace(' ', '%20').replace('-', '--').replace('/', '%2F')
56
    return '%s-%s-%s.svg' % (safe_label, safe_value, color)
57

58

59
def realise_badge(badge, badge_cache_dir):
60
    """Download the badge to the badge_cache_dir (if needed) and return this real path to the user."""
61
    if not os.path.exists(os.path.join(badge_cache_dir, badge)):
62
        # Download the missing image
63
        cmd = [
64
            'wget', 'https://img.shields.io/badge/%s' % badge,
65
            '--quiet', '-O', os.path.join(badge_cache_dir, badge)
66
        ]
67
        if not DRY_RUN:
68
            try:
69
                subprocess.check_call(cmd)
70
            except subprocess.CalledProcessError:
71
                print('unable to retrieve badges, please try again later')
72
            time.sleep(1)
73
        else:
74
            print(' '.join(cmd))
75
        # Be nice to their servers
76
    return os.path.join(badge_cache_dir, badge)
77

78

79
def badge_it(label, value, color, CACHE_DIR, identifier_parts, output_dir):
80
    # Get a path to a (cached) badge file.
81
    real_badge_path = realise_badge(get_badge_path(
82
        label, value, color
83
    ), CACHE_DIR)
84
    # Deteremine the per-instance output name
85
    output_filedir = os.path.join(args.output, *map(safe_name, identifier_parts[0:-1]))
86
    if not os.path.exists(output_filedir):
87
        os.makedirs(output_filedir)
88

89
    output_filename = safe_name(identifier_parts[-1]) + '.svg'
90
    # Ensure dir exists
91
    output_filepath = os.path.join(output_filedir, output_filename)
92

93
    # Copy the badge to a per-instance named .svg file.
94
    up = ['..'] * (len(identifier_parts) - 1)
95
    total = up + [real_badge_path[len('badges/'):]]
96
    symlink_source = os.path.join(*total)
97
    if not DRY_RUN:
98
        # Remove it if it exists, since this is easier than testing for
99
        # equality.
100
        if os.path.exists(output_filepath):
101
            os.unlink(output_filepath)
102

103
        # Now (re-)create the symlink
104
        os.symlink(symlink_source, output_filepath)
105
    else:
106
        print(' '.join(['ln -s ', symlink_source, output_filepath]))
107
    return output_filename
108

109

110
if __name__ == '__main__':
111
    parser = argparse.ArgumentParser(description='Build the badge directory for instances to use.')
112
    parser.add_argument('--public-server-list', help='Url to access the public galaxy server list at',
113
                        default='https://raw.githubusercontent.com/martenson/public-galaxy-servers/master/servers.csv')
114
    parser.add_argument('--topics-directory', help='Path to the topics directory', default='./topics/')
115
    parser.add_argument('--instances', help='File containing the instances and their supported trainings', default='metadata/instances.yaml')
116

117
    parser.add_argument('--output', help='Path to the the directory where the badges should be stored. The directory will be created if it does not exist.', default='badges')
118
    args = parser.parse_args()
119

120
    # Validate training dir argument
121
    if not os.path.exists(args.topics_directory) and os.path.is_dir(args.topics_directory):
122
        raise Exception("Invalid topics directory")
123
    all_trainings = {k: v for (k, v) in discover_trainings(args.topics_directory)}
124

125
    # Create output directory if not existing.
126
    if not os.path.exists(args.output):
127
        os.makedirs(args.output)
128

129
    # Also check/create the badge cache directory.
130
    CACHE_DIR = os.path.join(args.output, 'cache')
131
    if not os.path.exists(CACHE_DIR):
132
        os.makedirs(CACHE_DIR)
133

134
    # Load the validated list of instances which support trainings
135
    with open(args.instances, 'r') as handle:
136
        data = yaml.safe_load(handle)
137

138
    # Collect a list of instances seen
139
    instances = []
140
    for topic in data:
141
        for training in data[topic]['tutorials']:
142
            for instance in data[topic]['tutorials'][training]['instances']:
143
                data[topic]['tutorials'][training]['instances'][instance]['supported'] = True
144
                instances.append(instance)
145
    # All of these instances support at least one training.
146
    instances = sorted(set(instances))
147

148
    # Mark the unsupported ones as such for easier processing later.
149
    for topic in data:
150
        for training in data[topic]['tutorials']:
151
            for instance in instances:
152
                # Not in one of the existing supported ones
153
                if instance not in data[topic]['tutorials'][training]['instances']:
154
                    data[topic]['tutorials'][training]['instances'][instance]['supported'] = False
155

156
    # Map of instance -> badges
157
    instance_badges = {}
158

159
    # Count of tutorials in each topic.
160
    for topic in data:
161
        # All trainings, not just those available
162
        for training in sorted(data[topic]['tutorials']):
163
            for instance in data[topic]['tutorials'][training]['instances']:
164
                if instance not in instance_badges:
165
                    instance_badges[instance] = {}
166

167
                if topic not in instance_badges[instance]:
168
                    instance_badges[instance][topic] = []
169

170
                # If available, green badge
171
                is_supported = data[topic]['tutorials'][training]['instances'][instance]['supported']
172

173
                # We'll only place the badge in the HTML if the training is
174
                # supported (but the unavailable badge will still be available
175
                # in case they ever go out of compliance.)
176

177
                label = all_trainings[topic]['trainings'][training]
178
                if is_supported:
179
                    output_filename = badge_it(
180
                        label,
181
                        'Supported', 'green',
182
                        CACHE_DIR, (instance, topic, training), args.output
183
                    )
184
                    instance_badges[instance][topic].append(output_filename)
185
                else:
186
                    badge_it(
187
                        label,
188
                        'Unsupported', 'lightgrey',
189
                        CACHE_DIR, (instance, topic, training), args.output
190
                    )
191

192
    # All instances, not just checked
193
    for instance in sorted(instance_badges):
194
        total = sum([len(instance_badges[instance][topic]) for topic in instance_badges[instance]])
195

196
        if total == 0:
197
            continue
198

199
        for topic in instance_badges[instance]:
200
            # Get the number of badges in this topic.
201
            count = len(instance_badges[instance][topic])
202

203
            if float(count) / all_trainings[topic]['count'] > 0.90:
204
                color = 'green'
205
            elif float(count) / all_trainings[topic]['count'] > 0.25:
206
                color = 'orange'
207
            else:
208
                color = 'red'
209

210
            output_filename = badge_it(
211
                all_trainings[topic]['title'], '%s%%2f%s' % (count, all_trainings[topic]['count']), color,
212
                CACHE_DIR, (instance, topic), args.output
213
            )
214

215
Product

Resources

Company