Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/file/src/is_json.c
39478 views
1
/*-
2
* Copyright (c) 2018 Christos Zoulas
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
*
14
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24
* POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
/*
28
* Parse JSON object serialization format (RFC-7159)
29
*/
30
31
#ifndef TEST
32
#include "file.h"
33
34
#ifndef lint
35
FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
36
#endif
37
38
#include "magic.h"
39
#else
40
#include <stdio.h>
41
#include <stddef.h>
42
#endif
43
#include <string.h>
44
45
#ifdef DEBUG
46
#include <stdio.h>
47
#define DPRINTF(a, b, c) \
48
printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
49
(int)(b - c), (const char *)(c))
50
#define __file_debugused
51
#else
52
#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
53
#define __file_debugused __attribute__((__unused__))
54
#endif
55
56
#define JSON_ARRAY 0
57
#define JSON_CONSTANT 1
58
#define JSON_NUMBER 2
59
#define JSON_OBJECT 3
60
#define JSON_STRING 4
61
#define JSON_ARRAYN 5
62
#define JSON_MAX 6
63
64
/*
65
* if JSON_COUNT != 0:
66
* count all the objects, require that we have the whole data file
67
* otherwise:
68
* stop if we find an object or an array
69
*/
70
#ifndef JSON_COUNT
71
#define JSON_COUNT 0
72
#endif
73
74
static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75
size_t);
76
77
static int
78
json_isspace(const unsigned char uc)
79
{
80
switch (uc) {
81
case ' ':
82
case '\n':
83
case '\r':
84
case '\t':
85
return 1;
86
default:
87
return 0;
88
}
89
}
90
91
static int
92
json_isdigit(unsigned char uc)
93
{
94
switch (uc) {
95
case '0': case '1': case '2': case '3': case '4':
96
case '5': case '6': case '7': case '8': case '9':
97
return 1;
98
default:
99
return 0;
100
}
101
}
102
103
static int
104
json_isxdigit(unsigned char uc)
105
{
106
if (json_isdigit(uc))
107
return 1;
108
switch (uc) {
109
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111
return 1;
112
default:
113
return 0;
114
}
115
}
116
117
static const unsigned char *
118
json_skip_space(const unsigned char *uc, const unsigned char *ue)
119
{
120
while (uc < ue && json_isspace(*uc))
121
uc++;
122
return uc;
123
}
124
125
/*ARGSUSED*/
126
static int
127
json_parse_string(const unsigned char **ucp, const unsigned char *ue,
128
size_t lvl __file_debugused)
129
{
130
const unsigned char *uc = *ucp;
131
size_t i;
132
133
DPRINTF("Parse string: ", uc, *ucp);
134
while (uc < ue) {
135
switch (*uc++) {
136
case '\0':
137
goto out;
138
case '\\':
139
if (uc == ue)
140
goto out;
141
switch (*uc++) {
142
case '\0':
143
goto out;
144
case '"':
145
case '\\':
146
case '/':
147
case 'b':
148
case 'f':
149
case 'n':
150
case 'r':
151
case 't':
152
continue;
153
case 'u':
154
if (ue - uc < 4) {
155
uc = ue;
156
goto out;
157
}
158
for (i = 0; i < 4; i++)
159
if (!json_isxdigit(*uc++))
160
goto out;
161
continue;
162
default:
163
goto out;
164
}
165
case '"':
166
DPRINTF("Good string: ", uc, *ucp);
167
*ucp = uc;
168
return 1;
169
default:
170
continue;
171
}
172
}
173
out:
174
DPRINTF("Bad string: ", uc, *ucp);
175
*ucp = uc;
176
return 0;
177
}
178
179
static int
180
json_parse_array(const unsigned char **ucp, const unsigned char *ue,
181
size_t *st, size_t lvl)
182
{
183
const unsigned char *uc = *ucp;
184
185
DPRINTF("Parse array: ", uc, *ucp);
186
while (uc < ue) {
187
uc = json_skip_space(uc, ue);
188
if (uc == ue)
189
goto out;
190
if (*uc == ']')
191
goto done;
192
if (!json_parse(&uc, ue, st, lvl + 1))
193
goto out;
194
if (uc == ue)
195
goto out;
196
switch (*uc) {
197
case ',':
198
uc++;
199
continue;
200
case ']':
201
done:
202
st[JSON_ARRAYN]++;
203
DPRINTF("Good array: ", uc, *ucp);
204
*ucp = uc + 1;
205
return 1;
206
default:
207
goto out;
208
}
209
}
210
out:
211
DPRINTF("Bad array: ", uc, *ucp);
212
*ucp = uc;
213
return 0;
214
}
215
216
static int
217
json_parse_object(const unsigned char **ucp, const unsigned char *ue,
218
size_t *st, size_t lvl)
219
{
220
const unsigned char *uc = *ucp;
221
DPRINTF("Parse object: ", uc, *ucp);
222
while (uc < ue) {
223
uc = json_skip_space(uc, ue);
224
if (uc == ue)
225
goto out;
226
if (*uc == '}') {
227
uc++;
228
goto done;
229
}
230
if (*uc++ != '"') {
231
DPRINTF("not string", uc, *ucp);
232
goto out;
233
}
234
DPRINTF("next field", uc, *ucp);
235
if (!json_parse_string(&uc, ue, lvl)) {
236
DPRINTF("not string", uc, *ucp);
237
goto out;
238
}
239
uc = json_skip_space(uc, ue);
240
if (uc == ue)
241
goto out;
242
if (*uc++ != ':') {
243
DPRINTF("not colon", uc, *ucp);
244
goto out;
245
}
246
if (!json_parse(&uc, ue, st, lvl + 1)) {
247
DPRINTF("not json", uc, *ucp);
248
goto out;
249
}
250
if (uc == ue)
251
goto out;
252
switch (*uc++) {
253
case ',':
254
continue;
255
case '}': /* { */
256
done:
257
DPRINTF("Good object: ", uc, *ucp);
258
*ucp = uc;
259
return 1;
260
default:
261
DPRINTF("not more", uc, *ucp);
262
*ucp = uc - 1;
263
goto out;
264
}
265
}
266
out:
267
DPRINTF("Bad object: ", uc, *ucp);
268
*ucp = uc;
269
return 0;
270
}
271
272
/*ARGSUSED*/
273
static int
274
json_parse_number(const unsigned char **ucp, const unsigned char *ue,
275
size_t lvl __file_debugused)
276
{
277
const unsigned char *uc = *ucp;
278
int got = 0;
279
280
DPRINTF("Parse number: ", uc, *ucp);
281
if (uc == ue)
282
return 0;
283
if (*uc == '-')
284
uc++;
285
286
for (; uc < ue; uc++) {
287
if (!json_isdigit(*uc))
288
break;
289
got = 1;
290
}
291
if (uc == ue)
292
goto out;
293
if (*uc == '.')
294
uc++;
295
for (; uc < ue; uc++) {
296
if (!json_isdigit(*uc))
297
break;
298
got = 1;
299
}
300
if (uc == ue)
301
goto out;
302
if (got && (*uc == 'e' || *uc == 'E')) {
303
uc++;
304
got = 0;
305
if (uc == ue)
306
goto out;
307
if (*uc == '+' || *uc == '-')
308
uc++;
309
for (; uc < ue; uc++) {
310
if (!json_isdigit(*uc))
311
break;
312
got = 1;
313
}
314
}
315
out:
316
if (!got)
317
DPRINTF("Bad number: ", uc, *ucp);
318
else
319
DPRINTF("Good number: ", uc, *ucp);
320
*ucp = uc;
321
return got;
322
}
323
324
/*ARGSUSED*/
325
static int
326
json_parse_const(const unsigned char **ucp, const unsigned char *ue,
327
const char *str, size_t len, size_t lvl __file_debugused)
328
{
329
const unsigned char *uc = *ucp;
330
331
DPRINTF("Parse const: ", uc, *ucp);
332
*ucp += --len - 1;
333
if (*ucp > ue)
334
*ucp = ue;
335
for (; uc < ue && --len;) {
336
if (*uc++ != *++str) {
337
DPRINTF("Bad const: ", uc, *ucp);
338
return 0;
339
}
340
}
341
DPRINTF("Good const: ", uc, *ucp);
342
return 1;
343
}
344
345
static int
346
json_parse(const unsigned char **ucp, const unsigned char *ue,
347
size_t *st, size_t lvl)
348
{
349
const unsigned char *uc, *ouc;
350
int rv = 0;
351
int t;
352
353
ouc = uc = json_skip_space(*ucp, ue);
354
if (uc == ue)
355
goto out;
356
357
// Avoid recursion
358
if (lvl > 500) {
359
DPRINTF("Too many levels", uc, *ucp);
360
return 0;
361
}
362
#if JSON_COUNT
363
/* bail quickly if not counting */
364
if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
365
return 1;
366
#endif
367
368
DPRINTF("Parse general: ", uc, *ucp);
369
switch (*uc++) {
370
case '"':
371
rv = json_parse_string(&uc, ue, lvl + 1);
372
t = JSON_STRING;
373
break;
374
case '[':
375
rv = json_parse_array(&uc, ue, st, lvl + 1);
376
t = JSON_ARRAY;
377
break;
378
case '{': /* '}' */
379
rv = json_parse_object(&uc, ue, st, lvl + 1);
380
t = JSON_OBJECT;
381
break;
382
case 't':
383
rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
384
t = JSON_CONSTANT;
385
break;
386
case 'f':
387
rv = json_parse_const(&uc, ue, "false", sizeof("false"),
388
lvl + 1);
389
t = JSON_CONSTANT;
390
break;
391
case 'n':
392
rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
393
t = JSON_CONSTANT;
394
break;
395
default:
396
--uc;
397
rv = json_parse_number(&uc, ue, lvl + 1);
398
t = JSON_NUMBER;
399
break;
400
}
401
if (rv)
402
st[t]++;
403
uc = json_skip_space(uc, ue);
404
out:
405
DPRINTF("End general: ", uc, *ucp);
406
*ucp = uc;
407
if (lvl == 0) {
408
if (!rv)
409
return 0;
410
if (uc == ue)
411
return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
412
if (*ouc == *uc && json_parse(&uc, ue, st, 1))
413
return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
414
else
415
return 0;
416
}
417
return rv;
418
}
419
420
#ifndef TEST
421
int
422
file_is_json(struct magic_set *ms, const struct buffer *b)
423
{
424
const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
425
const unsigned char *ue = uc + b->flen;
426
size_t st[JSON_MAX];
427
int mime = ms->flags & MAGIC_MIME;
428
int jt;
429
430
431
if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
432
return 0;
433
434
memset(st, 0, sizeof(st));
435
436
if ((jt = json_parse(&uc, ue, st, 0)) == 0)
437
return 0;
438
439
if (mime == MAGIC_MIME_ENCODING)
440
return 1;
441
if (mime) {
442
if (file_printf(ms, "application/%s",
443
jt == 1 ? "json" : "x-ndjson") == -1)
444
return -1;
445
return 1;
446
}
447
if (file_printf(ms, "%sJSON text data",
448
jt == 1 ? "" : "New Line Delimited ") == -1)
449
return -1;
450
#if JSON_COUNT
451
#define P(n) st[n], st[n] > 1 ? "s" : ""
452
if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
453
"u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
454
"u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
455
"u >1array%s)",
456
P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
457
P(JSON_NUMBER), P(JSON_ARRAYN))
458
== -1)
459
return -1;
460
#endif
461
return 1;
462
}
463
464
#else
465
466
#include <sys/types.h>
467
#include <sys/stat.h>
468
#include <stdio.h>
469
#include <fcntl.h>
470
#include <unistd.h>
471
#include <stdlib.h>
472
#include <stdint.h>
473
#include <err.h>
474
475
int
476
main(int argc, char *argv[])
477
{
478
int fd;
479
struct stat st;
480
unsigned char *p;
481
size_t stats[JSON_MAX];
482
483
if ((fd = open(argv[1], O_RDONLY)) == -1)
484
err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
485
486
if (fstat(fd, &st) == -1)
487
err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
488
489
if ((p = CAST(char *, malloc(st.st_size))) == NULL)
490
err(EXIT_FAILURE, "Can't allocate %jd bytes",
491
(intmax_t)st.st_size);
492
if (read(fd, p, st.st_size) != st.st_size)
493
err(EXIT_FAILURE, "Can't read %jd bytes",
494
(intmax_t)st.st_size);
495
memset(stats, 0, sizeof(stats));
496
printf("is json %d\n", json_parse((const unsigned char **)&p,
497
p + st.st_size, stats, 0));
498
return 0;
499
}
500
#endif
501
502