Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
dbolya
GitHub Repository: dbolya/tide
Path: blob/master/tidecv/datasets.py
110 views
1
from .data import Data
2
from . import functions as f
3
4
import zipfile
5
from pathlib import Path
6
from appdirs import user_data_dir
7
import urllib.request
8
from collections import defaultdict
9
import shutil
10
import json
11
import os
12
13
def default_name(path:str) -> str:
14
return os.path.splitext(os.path.basename(path))[0]
15
16
def get_tide_path():
17
if 'TIDE_PATH' in os.environ:
18
tide_path = os.environ['TIDE_PATH']
19
else:
20
tide_path = user_data_dir('tidecv', appauthor=False)
21
22
if not os.path.exists(tide_path):
23
os.makedirs(tide_path)
24
25
return tide_path
26
27
def download_annotations(name:str, url:str, force_download:bool=False) -> str:
28
tide_path = get_tide_path()
29
candidate_path = os.path.join(tide_path, name)
30
finished_file_path = os.path.join(candidate_path, '_finished')
31
zip_file_path = os.path.join(candidate_path, '_tmp.zip')
32
33
# Check if the file has already been downloaded
34
# If there isn't a file called _finished, that means we didn't finish downloading last time, so try again
35
already_downloaded = os.path.exists(candidate_path) and os.path.exists(finished_file_path)
36
37
if not force_download and already_downloaded:
38
return candidate_path
39
else:
40
print('{} annotations not found. Downloading...'.format(name))
41
42
if os.path.exists(candidate_path):
43
shutil.rmtree(candidate_path)
44
os.makedirs(candidate_path)
45
46
urllib.request.urlretrieve(url, zip_file_path)
47
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
48
zip_ref.extractall(candidate_path)
49
50
os.remove(zip_file_path)
51
open(finished_file_path, 'a').close() # Make an empty _finished file to mark that we were successful
52
53
print('Successfully downloaded {} to "{}"'.format(name, candidate_path))
54
return candidate_path
55
56
57
58
59
60
def COCO(path:str=None, name:str=None, year:int=2017, ann_set:str='val', force_download:bool=False) -> Data:
61
"""
62
Loads ground truth from a COCO-style annotation file.
63
64
If path is not specified, this will download the COCO annotations for the year and ann_set specified.
65
Valid years are 2014, 2017 and valid ann_sets are 'val' and 'train'.
66
"""
67
if path is None:
68
path = download_annotations(
69
'COCO{}'.format(year),
70
'http://images.cocodataset.org/annotations/annotations_trainval{}.zip'.format(year),
71
force_download)
72
73
path = os.path.join(path, 'annotations', 'instances_{}{}.json'.format(ann_set, year))
74
75
if name is None: name = default_name(path)
76
77
with open(path, 'r') as json_file:
78
cocojson = json.load(json_file)
79
80
images = cocojson['images']
81
anns = cocojson['annotations']
82
cats = cocojson['categories'] if 'categories' in cocojson else None
83
84
85
# Add everything from the coco json into our data structure
86
data = Data(name, max_dets=100)
87
88
image_lookup = {}
89
90
for idx, image in enumerate(images):
91
image_lookup[image['id']] = image
92
data.add_image(image['id'], image['file_name'])
93
94
if cats is not None:
95
for cat in cats:
96
data.add_class(cat['id'], cat['name'])
97
98
for ann in anns:
99
image = ann['image_id']
100
_class = ann['category_id']
101
box = ann['bbox']
102
mask = f.toRLE(ann['segmentation'], image_lookup[image]['width'], image_lookup[image]['height'])
103
104
if ann['iscrowd']: data.add_ignore_region(image, _class, box, mask)
105
else: data.add_ground_truth (image, _class, box, mask)
106
107
return data
108
109
def COCOResult(path:str, name:str=None) -> Data:
110
""" Loads predictions from a COCO-style results file. """
111
if name is None: name = default_name(path)
112
113
with open(path, 'r') as json_file:
114
dets = json.load(json_file)
115
116
data = Data(name)
117
118
for det in dets:
119
image = det['image_id']
120
_cls = det['category_id']
121
score = det['score']
122
box = det['bbox'] if 'bbox' in det else None
123
mask = det['segmentation'] if 'segmentation' in det else None
124
125
data.add_detection(image, _cls, score, box, mask)
126
127
return data
128
129
130
131
def LVIS(path:str=None, name:str=None, version_str:str='v1', force_download:bool=False) -> Data:
132
"""
133
Load an LVIS-style dataset.
134
The version string is used for downloading the dataset and should be one of the versions of LVIS (e.g., v0.5, v1).
135
136
Note that LVIS evaulation is special, but we can emulate it by adding ignore regions.
137
The detector isn't punished for predicted class that LVIS annotators haven't guarenteed are in
138
the image (i.e., the sum of GT annotated classes in the image and those marked explicitly not
139
in the image.) In order to emulate this behavior, add ignore region labels for every class not
140
found to be in the image. This is not that inefficient because ignore regions are separate out
141
during mAP calculation and error processing, so adding a bunch of them doesn't hurt.
142
143
The LVIS AP numbers are slightly lower than what the LVIS API reports because of these workarounds.
144
"""
145
if path is None:
146
path = download_annotations(
147
'LVIS{}'.format(version_str),
148
'https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_{}_val.json.zip'.format(version_str),
149
force_download)
150
151
path = os.path.join(path, 'lvis_{}_val.json'.format(version_str))
152
153
154
if name is None: name = default_name(path)
155
156
157
with open(path, 'r') as json_file:
158
lvisjson = json.load(json_file)
159
160
images = lvisjson['images']
161
anns = lvisjson['annotations']
162
cats = lvisjson['categories'] if 'categories' in lvisjson else None
163
164
data = Data(name, max_dets=300)
165
image_lookup = {}
166
classes_in_img = defaultdict(lambda: set())
167
168
for image in images:
169
image_lookup[image['id']] = image
170
data.add_image(image['id'], image['coco_url']) # LVIS has no image names, only coco urls
171
172
# Negative categories are guarenteed by the annotators to not be in the image.
173
# Thus we should care about them during evaluation.
174
for cat_id in image['neg_category_ids']:
175
classes_in_img[image['id']].add(cat_id)
176
177
if cats is not None:
178
for cat in cats:
179
data.add_class(cat['id'], cat['synset'])
180
181
for ann in anns:
182
image = ann['image_id']
183
_class = ann['category_id']
184
box = ann['bbox']
185
mask = f.toRLE(ann['segmentation'], image_lookup[image]['width'], image_lookup[image]['height'])
186
187
data.add_ground_truth(image, _class, box, mask)
188
189
# There's an annotation for this class, so we should consider the class for evaluation.
190
classes_in_img[image].add(_class)
191
192
all_classes = set(data.classes.keys())
193
194
# LVIS doesn't penalize the detector for detecting classes that the annotators haven't guarenteed to be in/out of
195
# the image. Here we simulate that property by adding ignore regions for all such classes.
196
for image in images:
197
ignored_classes = all_classes.difference(classes_in_img[image['id']])
198
199
# LVIS doesn't penalize the detector for mistakes made on classes explicitly marked as not exhaustively annoted
200
# We can emulate this by adding ignore regions for every category listed, so add them to the ignored classes.
201
ignored_classes.update(set(image['not_exhaustive_category_ids']))
202
203
for _cls in ignored_classes:
204
data.add_ignore_region(image['id'], _cls)
205
206
return data
207
208
def LVISResult(path:str, name:str=None) -> Data:
209
""" Loads predictions from a LVIS-style results file. Note that this is the same as a COCO-style results file. """
210
return COCOResult(path, name)
211
212
213
def Pascal(path:str=None, name:str=None, year:int=2007, ann_set:str='val', force_download:bool=False) -> Data:
214
"""
215
Loads the Pascal VOC 2007 or 2012 data from a COCO json.
216
217
Valid years are 2007 and 2012, and valid ann_sets are 'train' and 'val'.
218
"""
219
if path is None:
220
path = download_annotations(
221
'Pascal',
222
'https://s3.amazonaws.com/images.cocodataset.org/external/external_PASCAL_VOC.zip',
223
force_download)
224
225
path = os.path.join(path, 'PASCAL_VOC', 'pascal_{}{}.json'.format(ann_set, year))
226
227
return COCO(path, name)
228
229
230
231
def Cityscapes(path:str, name:str=None):
232
"""
233
Loads the fine cityscapes annotations as instance segmentation masks, and also generates bounding boxes for them.
234
235
Note that we can't automatically download Cityscapes because it requires registration and an agreement to the ToS.
236
You can get cityscapes here: https://www.cityscapes-dataset.com/
237
238
Path should be to gtFine/<ann_set>. E.g., <path_to_cityscapes>/gtFine/val.
239
"""
240
if name is None: name = default_name(path)
241
data = Data(name)
242
243
instance_classes = {
244
'person' : 24,
245
'rider' : 25,
246
'car' : 26,
247
'truck' : 27,
248
'train' : 31,
249
'motorcycle': 32,
250
'bicycle' : 33,
251
'bus' : 28,
252
'caravan' : 29,
253
'trailer' : 30,
254
}
255
256
ignored_classes = set([29, 30])
257
258
for class_name, class_id in instance_classes.items():
259
data.add_class(class_id, class_name)
260
261
for ann in Path(path).glob('*/*.json'):
262
with open(ann) as json_file:
263
ann_json = json.load(json_file)
264
265
# Note: a string for an image ID is okay
266
image_id = os.path.basename(ann).replace('_gtFine_polygons.json', '')
267
objs = ann_json['objects']
268
269
data.add_image(image_id, image_id) # The id in this case is just the name of the image
270
271
# Caravan and Trailer should be ignored from all evaluation
272
for _cls in ignored_classes:
273
data.add_ignore_region(image_id, _cls)
274
275
for obj in objs:
276
class_label = obj['label']
277
is_crowd = False
278
279
# Cityscapes labelers can label objects without defined boundaries as 'group'. In COCO-land this would be
280
# a crowd annotation. So in this case, let's make it an ignore region.
281
if class_label.endswith('group'):
282
is_crowd = True
283
class_label = class_label[:-5] # Remove the group at the end
284
285
# We are only considering instance classes
286
if not class_label in instance_classes:
287
continue
288
289
class_id = instance_classes[class_label]
290
291
# If the class is not used in evaluation, don't include it
292
if class_id in ignored_classes:
293
continue
294
295
poly = [sum(obj['polygon'], [])] # Converts a list of points to a list of lists of ints, where every 2 ints represents a point
296
box = f.polyToBox(poly)
297
298
if is_crowd:
299
data.add_ignore_region(image_id, class_id, box, poly)
300
else:
301
data.add_ground_truth(image_id, class_id, box, poly)
302
303
return data
304
305
306
307
308
309