Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/waterbox/libc/functions/stdio/_PDCLIB_scan.c
2 views
1
/* _PDCLIB_scan( const char *, struct _PDCLIB_status_t * )
2
3
This file is part of the Public Domain C Library (PDCLib).
4
Permission is granted to use, modify, and / or redistribute at will.
5
*/
6
7
#include <stdio.h>
8
#include <stdbool.h>
9
#include <stdlib.h>
10
#include <stdarg.h>
11
#include <stdint.h>
12
#include <ctype.h>
13
#include <string.h>
14
#include <stddef.h>
15
#include <limits.h>
16
17
#ifndef REGTEST
18
19
#include "_PDCLIB_io.h"
20
21
/* Using an integer's bits as flags for both the conversion flags and length
22
modifiers.
23
*/
24
#define E_suppressed 1<<0
25
#define E_char 1<<6
26
#define E_short 1<<7
27
#define E_long 1<<8
28
#define E_llong 1<<9
29
#define E_intmax 1<<10
30
#define E_size 1<<11
31
#define E_ptrdiff 1<<12
32
#define E_intptr 1<<13
33
#define E_ldouble 1<<14
34
#define E_unsigned 1<<16
35
36
37
/* Helper function to get a character from the string or stream, whatever is
38
used for input. When reading from a string, returns EOF on end-of-string
39
so that handling of the return value can be uniform for both streams and
40
strings.
41
*/
42
static int GET( struct _PDCLIB_status_t * status )
43
{
44
int rc = EOF;
45
if ( status->stream != NULL )
46
{
47
rc = getc( status->stream );
48
}
49
else
50
{
51
rc = ( *status->s == '\0' ) ? EOF : (unsigned char)*((status->s)++);
52
}
53
if ( rc != EOF )
54
{
55
++(status->i);
56
++(status->current);
57
}
58
return rc;
59
}
60
61
62
/* Helper function to put a read character back into the string or stream,
63
whatever is used for input.
64
*/
65
static void UNGET( int c, struct _PDCLIB_status_t * status )
66
{
67
if ( status->stream != NULL )
68
{
69
ungetc( c, status->stream ); /* TODO: Error? */
70
}
71
else
72
{
73
--(status->s);
74
}
75
--(status->i);
76
--(status->current);
77
}
78
79
80
/* Helper function to check if a character is part of a given scanset */
81
static bool IN_SCANSET( const char * scanlist, const char * end_scanlist, int rc )
82
{
83
// SOLAR
84
int previous = -1;
85
while ( scanlist != end_scanlist )
86
{
87
if ( ( *scanlist == '-' ) && ( previous != -1 ) )
88
{
89
/* possible scangroup ("a-z") */
90
if ( ++scanlist == end_scanlist )
91
{
92
/* '-' at end of scanlist does not describe a scangroup */
93
return rc == '-';
94
}
95
while ( ++previous <= (unsigned char)*scanlist )
96
{
97
if ( previous == rc )
98
{
99
return true;
100
}
101
}
102
previous = -1;
103
}
104
else
105
{
106
/* not a scangroup, check verbatim */
107
if ( rc == (unsigned char)*scanlist )
108
{
109
return true;
110
}
111
previous = (unsigned char)(*scanlist++);
112
}
113
}
114
return false;
115
}
116
117
118
const char * _PDCLIB_scan( const char * spec, struct _PDCLIB_status_t * status )
119
{
120
/* generic input character */
121
int rc = EOF;
122
const char * orig_spec = spec;
123
if ( *(++spec) == '%' )
124
{
125
/* %% -> match single '%' */
126
rc = GET( status );
127
switch ( rc )
128
{
129
case EOF:
130
/* input error */
131
if ( status->n == 0 )
132
{
133
status->n = -1;
134
}
135
return NULL;
136
case '%':
137
return ++spec;
138
default:
139
UNGET( rc, status );
140
break;
141
}
142
}
143
/* Initializing status structure */
144
status->flags = 0;
145
status->base = -1;
146
status->current = 0;
147
status->width = 0;
148
status->prec = 0;
149
150
/* '*' suppresses assigning parsed value to variable */
151
if ( *spec == '*' )
152
{
153
status->flags |= E_suppressed;
154
++spec;
155
}
156
157
/* If a width is given, strtol() will return its value. If not given,
158
strtol() will return zero. In both cases, endptr will point to the
159
rest of the conversion specifier - just what we need.
160
*/
161
char const * prev_spec = spec;
162
status->width = (int)strtol( spec, (char**)&spec, 10 );
163
if ( spec == prev_spec )
164
{
165
status->width = UINT_MAX;
166
}
167
168
/* Optional length modifier
169
We step one character ahead in any case, and step back only if we find
170
there has been no length modifier (or step ahead another character if it
171
has been "hh" or "ll").
172
*/
173
switch ( *(spec++) )
174
{
175
case 'h':
176
if ( *spec == 'h' )
177
{
178
/* hh -> char */
179
status->flags |= E_char;
180
++spec;
181
}
182
else
183
{
184
/* h -> short */
185
status->flags |= E_short;
186
}
187
break;
188
case 'l':
189
if ( *spec == 'l' )
190
{
191
/* ll -> long long */
192
status->flags |= E_llong;
193
++spec;
194
}
195
else
196
{
197
/* l -> long */
198
status->flags |= E_long;
199
}
200
break;
201
case 'j':
202
/* j -> intmax_t, which might or might not be long long */
203
status->flags |= E_intmax;
204
break;
205
case 'z':
206
/* z -> size_t, which might or might not be unsigned int */
207
status->flags |= E_size;
208
break;
209
case 't':
210
/* t -> ptrdiff_t, which might or might not be long */
211
status->flags |= E_ptrdiff;
212
break;
213
case 'L':
214
/* L -> long double */
215
status->flags |= E_ldouble;
216
break;
217
default:
218
--spec;
219
break;
220
}
221
222
/* Conversion specifier */
223
224
/* whether valid input had been parsed */
225
bool value_parsed = false;
226
227
switch ( *spec )
228
{
229
case 'd':
230
status->base = 10;
231
break;
232
case 'i':
233
status->base = 0;
234
break;
235
case 'o':
236
status->base = 8;
237
status->flags |= E_unsigned;
238
break;
239
case 'u':
240
status->base = 10;
241
status->flags |= E_unsigned;
242
break;
243
case 'x':
244
status->base = 16;
245
status->flags |= E_unsigned;
246
break;
247
case 'f':
248
case 'F':
249
case 'e':
250
case 'E':
251
case 'g':
252
case 'G':
253
case 'a':
254
case 'A':
255
break;
256
case 'c':
257
{
258
char * c = va_arg( status->arg, char * );
259
/* for %c, default width is one */
260
if ( status->width == UINT_MAX )
261
{
262
status->width = 1;
263
}
264
/* reading until width reached or input exhausted */
265
while ( ( status->current < status->width ) &&
266
( ( rc = GET( status ) ) != EOF ) )
267
{
268
*(c++) = rc;
269
value_parsed = true;
270
}
271
/* width or input exhausted */
272
if ( value_parsed )
273
{
274
++status->n;
275
return ++spec;
276
}
277
else
278
{
279
/* input error, no character read */
280
if ( status->n == 0 )
281
{
282
status->n = -1;
283
}
284
return NULL;
285
}
286
}
287
case 's':
288
{
289
char * c = va_arg( status->arg, char * );
290
while ( ( status->current < status->width ) &&
291
( ( rc = GET( status ) ) != EOF ) )
292
{
293
if ( isspace( rc ) )
294
{
295
UNGET( rc, status );
296
if ( value_parsed )
297
{
298
/* matching sequence terminated by whitespace */
299
*c = '\0';
300
++status->n;
301
return ++spec;
302
}
303
else
304
{
305
/* matching error */
306
return NULL;
307
}
308
}
309
else
310
{
311
/* match */
312
value_parsed = true;
313
*(c++) = rc;
314
}
315
}
316
/* width or input exhausted */
317
if ( value_parsed )
318
{
319
*c = '\0';
320
++status->n;
321
return ++spec;
322
}
323
else
324
{
325
/* input error, no character read */
326
if ( status->n == 0 )
327
{
328
status->n = -1;
329
}
330
return NULL;
331
}
332
}
333
case '[':
334
{
335
const char * endspec = spec;
336
bool negative_scanlist = false;
337
if ( *(++endspec) == '^' )
338
{
339
negative_scanlist = true;
340
++endspec;
341
}
342
spec = endspec;
343
do
344
{
345
// TODO: This can run beyond a malformed format string
346
++endspec;
347
} while ( *endspec != ']' );
348
// read according to scanlist, equiv. to %s above
349
char * c = va_arg( status->arg, char * );
350
while ( ( status->current < status->width ) &&
351
( ( rc = GET( status ) ) != EOF ) )
352
{
353
if ( negative_scanlist )
354
{
355
if ( IN_SCANSET( spec, endspec, rc ) )
356
{
357
UNGET( rc, status );
358
break;
359
}
360
}
361
else
362
{
363
if ( ! IN_SCANSET( spec, endspec, rc ) )
364
{
365
UNGET( rc, status );
366
break;
367
}
368
}
369
value_parsed = true;
370
*(c++) = rc;
371
}
372
if ( value_parsed )
373
{
374
*c = '\0';
375
++status->n;
376
return ++endspec;
377
}
378
else
379
{
380
if ( rc == EOF )
381
{
382
status->n = -1;
383
}
384
return NULL;
385
}
386
}
387
case 'p':
388
status->base = 16;
389
// TODO: Like _PDCLIB_print, E_pointer(?)
390
status->flags |= E_unsigned | E_long;
391
break;
392
case 'n':
393
{
394
int * val = va_arg( status->arg, int * );
395
*val = status->i;
396
return ++spec;
397
}
398
default:
399
/* No conversion specifier. Bad conversion. */
400
return orig_spec;
401
}
402
403
if ( status->base != -1 )
404
{
405
/* integer conversion */
406
uintmax_t value = 0; /* absolute value read */
407
bool prefix_parsed = false;
408
int sign = 0;
409
while ( ( status->current < status->width ) &&
410
( ( rc = GET( status ) ) != EOF ) )
411
{
412
if ( isspace( rc ) )
413
{
414
if ( sign )
415
{
416
/* matching sequence terminated by whitespace */
417
UNGET( rc, status );
418
break;
419
}
420
else
421
{
422
/* leading whitespace not counted against width */
423
status->current--;
424
}
425
}
426
else if ( ! sign )
427
{
428
/* no sign parsed yet */
429
switch ( rc )
430
{
431
case '-':
432
sign = -1;
433
break;
434
case '+':
435
sign = 1;
436
break;
437
default:
438
/* not a sign; put back character */
439
sign = 1;
440
UNGET( rc, status );
441
break;
442
}
443
}
444
else if ( ! prefix_parsed )
445
{
446
/* no prefix (0x... for hex, 0... for octal) parsed yet */
447
prefix_parsed = true;
448
if ( rc != '0' )
449
{
450
/* not a prefix; if base not yet set, set to decimal */
451
if ( status->base == 0 )
452
{
453
status->base = 10;
454
}
455
UNGET( rc, status );
456
}
457
else
458
{
459
/* starts with zero, so it might be a prefix. */
460
/* check what follows next (might be 0x...) */
461
if ( ( status->current < status->width ) &&
462
( ( rc = GET( status ) ) != EOF ) )
463
{
464
if ( tolower( rc ) == 'x' )
465
{
466
/* 0x... would be prefix for hex base... */
467
if ( ( status->base == 0 ) ||
468
( status->base == 16 ) )
469
{
470
status->base = 16;
471
}
472
else
473
{
474
/* ...unless already set to other value */
475
UNGET( rc, status );
476
value_parsed = true;
477
}
478
}
479
else
480
{
481
/* 0... but not 0x.... would be octal prefix */
482
UNGET( rc, status );
483
if ( status->base == 0 )
484
{
485
status->base = 8;
486
}
487
/* in any case we have read a zero */
488
value_parsed = true;
489
}
490
}
491
else
492
{
493
/* failed to read beyond the initial zero */
494
value_parsed = true;
495
break;
496
}
497
}
498
}
499
else
500
{
501
char * digitptr = memchr( _PDCLIB_digits, tolower( rc ), status->base );
502
if ( digitptr == NULL )
503
{
504
/* end of input item */
505
UNGET( rc, status );
506
break;
507
}
508
value *= status->base;
509
value += digitptr - _PDCLIB_digits;
510
value_parsed = true;
511
}
512
}
513
/* width or input exhausted, or non-matching character */
514
if ( ! value_parsed )
515
{
516
/* out of input before anything could be parsed - input error */
517
/* FIXME: if first character does not match, value_parsed is not set - but it is NOT an input error */
518
if ( ( status->n == 0 ) && ( rc == EOF ) )
519
{
520
status->n = -1;
521
}
522
return NULL;
523
}
524
/* convert value to target type and assign to parameter */
525
if ( ! ( status->flags & E_suppressed ) )
526
{
527
switch ( status->flags & ( E_char | E_short | E_long | E_llong |
528
E_intmax | E_size | E_ptrdiff |
529
E_unsigned ) )
530
{
531
case E_char:
532
*( va_arg( status->arg, char * ) ) = (char)( value * sign );
533
break;
534
case E_char | E_unsigned:
535
*( va_arg( status->arg, unsigned char * ) ) = (unsigned char)( value * sign );
536
break;
537
538
case E_short:
539
*( va_arg( status->arg, short * ) ) = (short)( value * sign );
540
break;
541
case E_short | E_unsigned:
542
*( va_arg( status->arg, unsigned short * ) ) = (unsigned short)( value * sign );
543
break;
544
545
case 0:
546
*( va_arg( status->arg, int * ) ) = (int)( value * sign );
547
break;
548
case E_unsigned:
549
*( va_arg( status->arg, unsigned int * ) ) = (unsigned int)( value * sign );
550
break;
551
552
case E_long:
553
*( va_arg( status->arg, long * ) ) = (long)( value * sign );
554
break;
555
case E_long | E_unsigned:
556
*( va_arg( status->arg, unsigned long * ) ) = (unsigned long)( value * sign );
557
break;
558
559
case E_llong:
560
*( va_arg( status->arg, long long * ) ) = (long long)( value * sign );
561
break;
562
case E_llong | E_unsigned:
563
*( va_arg( status->arg, unsigned long long * ) ) = (unsigned long long)( value * sign );
564
break;
565
566
case E_intmax:
567
*( va_arg( status->arg, intmax_t * ) ) = (intmax_t)( value * sign );
568
break;
569
case E_intmax | E_unsigned:
570
*( va_arg( status->arg, uintmax_t * ) ) = (uintmax_t)( value * sign );
571
break;
572
573
case E_size:
574
/* E_size always implies unsigned */
575
*( va_arg( status->arg, size_t * ) ) = (size_t)( value * sign );
576
break;
577
578
case E_ptrdiff:
579
/* E_ptrdiff always implies signed */
580
*( va_arg( status->arg, ptrdiff_t * ) ) = (ptrdiff_t)( value * sign );
581
break;
582
583
default:
584
puts( "UNSUPPORTED SCANF FLAG COMBINATION" );
585
return NULL; /* behaviour unspecified */
586
}
587
++(status->n);
588
}
589
return ++spec;
590
}
591
/* TODO: Floats. */
592
return NULL;
593
}
594
#endif
595
596
#ifdef TEST
597
#define _PDCLIB_FILEID "_PDCLIB/scan.c"
598
#define _PDCLIB_STRINGIO
599
600
#include "_PDCLIB_test.h"
601
602
#ifndef REGTEST
603
static int testscanf( char const * s, char const * format, ... )
604
{
605
struct _PDCLIB_status_t status;
606
status.n = 0;
607
status.i = 0;
608
status.s = (char *)s;
609
status.stream = NULL;
610
va_start( status.arg, format );
611
if ( *(_PDCLIB_scan( format, &status )) != '\0' )
612
{
613
printf( "_PDCLIB_scan() did not return end-of-specifier on '%s'.\n", format );
614
++TEST_RESULTS;
615
}
616
va_end( status.arg );
617
return status.n;
618
}
619
#endif
620
621
#define TEST_CONVERSION_ONLY
622
623
int main( void )
624
{
625
#ifndef REGTEST
626
char source[100];
627
#include "scanf_testcases.h"
628
#endif
629
return TEST_RESULTS;
630
}
631
632
#endif
633
634