Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
MorsGames
GitHub Repository: MorsGames/sm64plus
Path: blob/master/tools/textconv.c
7854 views
1
#include <ctype.h>
2
#include <errno.h>
3
#include <stdarg.h>
4
#include <stdint.h>
5
#include <stdlib.h>
6
#include <stdio.h>
7
#include <string.h>
8
9
#include "hashtable.h"
10
#include "utf8.h"
11
12
#define ARRAY_COUNT(arr) (sizeof(arr) / sizeof(arr[0]))
13
14
struct CharmapEntry
15
{
16
uint32_t unicode[3];
17
int length; // length of the unicode array. TODO: use dynamic memory allocation
18
int bytesCount;
19
uint8_t bytes[2]; // bytes to convert unicode array to, (e.g. 'A' = 0x0A)
20
};
21
22
static struct HashTable *charmap;
23
24
static void fatal_error(const char *msgfmt, ...)
25
{
26
va_list args;
27
28
fputs("error: ", stderr);
29
30
va_start(args, msgfmt);
31
vfprintf(stderr, msgfmt, args);
32
va_end(args);
33
34
fputc('\n', stderr);
35
36
exit(1);
37
}
38
39
static void parse_error(const char *filename, int lineNum, const char *msgfmt, ...)
40
{
41
va_list args;
42
43
fprintf(stderr, "%s: line %i: ", filename, lineNum);
44
45
va_start(args, msgfmt);
46
vfprintf(stderr, msgfmt, args);
47
va_end(args);
48
49
fputc('\n', stderr);
50
51
exit(1);
52
}
53
54
// Reads the whole file and returns a null-terminated buffer with its contents
55
void *read_text_file(const char *filename)
56
{
57
if (strcmp(filename, "-") != 0)
58
{
59
FILE *file = fopen(filename, "rb");
60
uint8_t *buffer;
61
size_t size;
62
63
if (file == NULL)
64
fatal_error("failed to open file '%s' for reading: %s", filename, strerror(errno));
65
66
// get size
67
fseek(file, 0, SEEK_END);
68
size = ftell(file);
69
70
// allocate buffer
71
buffer = malloc(size + 1);
72
if (buffer == NULL)
73
fatal_error("could not allocate buffer of size %u", (uint32_t)(size + 1));
74
75
// read file
76
fseek(file, 0, SEEK_SET);
77
if (fread(buffer, size, 1, file) != 1)
78
fatal_error("error reading from file '%s': %s", filename, strerror(errno));
79
80
// null-terminate the buffer
81
buffer[size] = 0;
82
83
fclose(file);
84
85
return buffer;
86
}
87
else
88
{
89
size_t size = 0;
90
size_t capacity = 1024;
91
uint8_t *buffer = malloc(capacity + 1);
92
93
if (buffer == NULL)
94
fatal_error("could not allocate buffer of size %u", (uint32_t)(capacity + 1));
95
96
for (;;)
97
{
98
size += fread(buffer + size, 1, capacity - size, stdin);
99
if (size == capacity)
100
{
101
capacity *= 2;
102
buffer = realloc(buffer, capacity + 1);
103
if (buffer == NULL)
104
fatal_error("could not allocate buffer of size %u", (uint32_t)(capacity + 1));
105
}
106
else if (feof(stdin))
107
{
108
break;
109
}
110
else
111
{
112
fatal_error("error reading from stdin: %s", strerror(errno));
113
}
114
}
115
116
// null-terminate the buffer
117
buffer[size] = 0;
118
return buffer;
119
}
120
}
121
122
static char *skip_whitespace(char *str)
123
{
124
while (isspace(*str))
125
str++;
126
return str;
127
}
128
129
// null terminates the current line and returns a pointer to the next line
130
static char *line_split(char *str)
131
{
132
while (*str != '\n')
133
{
134
if (*str == 0)
135
return str; // end of string
136
str++;
137
}
138
*str = 0; // terminate line
139
return str + 1;
140
}
141
142
static char *parse_number(const char *str, unsigned int *num)
143
{
144
char *endptr;
145
unsigned int n = strtol(str, &endptr, 0);
146
147
*num = n;
148
if (endptr > str)
149
return endptr;
150
else
151
return NULL;
152
}
153
154
static int is_identifier_char(char c)
155
{
156
return isalnum(c) || c == '_';
157
}
158
159
static int get_escape_char(int c)
160
{
161
const uint8_t escapeTable[] =
162
{
163
['a'] = '\a',
164
['b'] = '\b',
165
['f'] = '\f',
166
['n'] = '\n',
167
['r'] = '\r',
168
['t'] = '\t',
169
['v'] = '\v',
170
['\\'] = '\\',
171
['\''] = '\'',
172
['"'] = '"',
173
};
174
175
if ((unsigned int)c < ARRAY_COUNT(escapeTable) && escapeTable[c] != 0)
176
return escapeTable[c];
177
else
178
return 0;
179
}
180
181
static void read_charmap(const char *filename)
182
{
183
char *filedata = read_text_file(filename);
184
char *line = filedata;
185
int lineNum = 1;
186
187
while (line[0] != 0)
188
{
189
char *nextLine = line_split(line);
190
191
struct CharmapEntry entry;
192
193
line = skip_whitespace(line);
194
if (line[0] != 0 && line[0] != '#') // ignore empty lines and comments
195
{
196
int len = 0;
197
/* Read Character */
198
199
// opening quote
200
if (*line != '\'')
201
parse_error(filename, lineNum, "expected '");
202
line++;
203
204
// perform analysis of charmap entry, we are in the quote
205
while(1)
206
{
207
if(*line == '\'')
208
{
209
line++;
210
break;
211
}
212
else if(len == ARRAY_COUNT(entry.unicode))
213
{
214
// TODO: Use dynamic memory allocation so this is unnecessary.
215
parse_error(filename, lineNum, "string limit exceeded");
216
}
217
else if (*line == '\\')
218
{
219
line++; // advance to get the character being escaped
220
if (*line == '\r')
221
line++;
222
if (*line == '\n')
223
{
224
// Backslash at end of line is ignored
225
continue;
226
}
227
entry.unicode[len] = get_escape_char(*line);
228
if (entry.unicode[len] == 0)
229
parse_error(filename, lineNum, "unknown escape sequence \\%c", *line);
230
line++; // increment again to get past the escape sequence.
231
}
232
else
233
{
234
line = utf8_decode(line, &entry.unicode[len]);
235
if (line == NULL)
236
parse_error(filename, lineNum, "invalid UTF8");
237
}
238
len++;
239
}
240
entry.length = len;
241
242
// equals sign
243
line = skip_whitespace(line);
244
if (*line != '=')
245
parse_error(filename, lineNum, "expected = after character \\%c", *line);
246
line++;
247
248
entry.bytesCount = 0;
249
250
// value
251
while (1)
252
{
253
uint32_t value;
254
255
if (entry.bytesCount >= 2)
256
parse_error(filename, lineNum, "more than 2 values specified");
257
258
line = skip_whitespace(line);
259
260
line = parse_number(line, &value);
261
if (line == NULL)
262
parse_error(filename, lineNum, "expected number after =");
263
if (value > 0xFF)
264
parse_error(filename, lineNum, "0x%X is larger than 1 byte", value);
265
266
entry.bytes[entry.bytesCount] = value;
267
entry.bytesCount++;
268
269
line = skip_whitespace(line);
270
if (*line == 0)
271
break;
272
if (*line != ',')
273
parse_error(filename, lineNum, "junk at end of line");
274
line++;
275
}
276
277
if (hashtable_query(charmap, &entry) != NULL)
278
parse_error(filename, lineNum, "entry for character already exists");
279
hashtable_insert(charmap, &entry);
280
}
281
282
line = nextLine;
283
lineNum++;
284
}
285
286
free(filedata);
287
}
288
289
static int count_line_num(const char *start, const char *pos)
290
{
291
const char *c;
292
int lineNum = 1;
293
294
for (c = start; c < pos; c++)
295
{
296
if (*c == '\n')
297
lineNum++;
298
}
299
return lineNum;
300
}
301
302
static char *convert_string(char *pos, FILE *fout, const char *inputFileName, char *start, int uncompressed)
303
{
304
int hasString = 0;
305
306
while (1)
307
{
308
pos = skip_whitespace(pos);
309
if (*pos == ')')
310
{
311
if (hasString)
312
break;
313
else
314
parse_error(inputFileName, count_line_num(start, pos), "expected quoted string after '_('");
315
}
316
else if (*pos != '"')
317
parse_error(inputFileName, count_line_num(start, pos), "unexpected character '%c'", *pos);
318
pos++;
319
320
hasString = 1;
321
322
// convert quoted string
323
while (*pos != '"')
324
{
325
struct CharmapEntry input;
326
struct CharmapEntry *last_valid_entry = NULL;
327
struct CharmapEntry *entry;
328
int i, c;
329
int length = 0;
330
char* last_valid_pos = NULL;
331
332
// safely erase the unicode area before use
333
memset(input.unicode, 0, sizeof (input.unicode));
334
input.length = 0;
335
336
// Find a charmap entry of longest length possible starting from this position
337
while (*pos != '"')
338
{
339
if ((uncompressed && length == 1) || length == ARRAY_COUNT(entry->unicode))
340
{
341
// Stop searching after length 3; we only support strings of lengths up
342
// to that right now. Unless uncompressed is set, in which we ignore multi
343
// texts by discarding entries longer than 1.
344
break;
345
}
346
347
if (*pos == 0)
348
parse_error(inputFileName, count_line_num(start, pos), "EOF in string literal");
349
if (*pos == '\\')
350
{
351
pos++;
352
c = get_escape_char(*pos);
353
if (c == 0)
354
parse_error(inputFileName, count_line_num(start, pos), "unknown escape sequence \\%c", *pos);
355
input.unicode[length] = c;
356
pos++;
357
}
358
else
359
{
360
pos = utf8_decode(pos, &input.unicode[length]);
361
if (pos == NULL)
362
parse_error(inputFileName, count_line_num(start, pos), "invalid unicode encountered in file");
363
}
364
length++;
365
input.length = length;
366
367
entry = hashtable_query(charmap, &input);
368
if (entry != NULL)
369
{
370
last_valid_entry = entry;
371
last_valid_pos = pos;
372
}
373
}
374
375
entry = last_valid_entry;
376
pos = last_valid_pos;
377
if (entry == NULL)
378
parse_error(inputFileName, count_line_num(start, pos), "no charmap entry for U+%X", input.unicode[0]);
379
for (i = 0; i < entry->bytesCount; i++)
380
fprintf(fout, "0x%02X,", entry->bytes[i]);
381
}
382
pos++; // skip over closing '"'
383
}
384
pos++; // skip over closing ')'
385
fputs("0xFF", fout);
386
return pos;
387
}
388
389
static void convert_file(const char *infilename, const char *outfilename)
390
{
391
char *in = read_text_file(infilename);
392
FILE *fout = strcmp(outfilename, "-") != 0 ? fopen(outfilename, "wb") : stdout;
393
394
if (fout == NULL)
395
fatal_error("failed to open file '%s' for writing: %s", strerror(errno));
396
397
char *start = in;
398
char *end = in;
399
char *pos = in;
400
401
while (1)
402
{
403
if (*pos == 0) // end of file
404
goto eof;
405
406
// check for comment
407
if (*pos == '/')
408
{
409
pos++;
410
// skip over // comment
411
if (*pos == '/')
412
{
413
pos++;
414
// skip over next newline
415
while (*pos != '\n')
416
{
417
if (*pos == 0)
418
goto eof;
419
pos++;
420
}
421
pos++;
422
}
423
// skip over /* */ comment
424
else if (*pos == '*')
425
{
426
pos++;
427
while (*pos != '*' && pos[1] != '/')
428
{
429
if (*pos == 0)
430
goto eof;
431
pos++;
432
}
433
pos += 2;
434
}
435
}
436
// skip over normal string literal
437
else if (*pos == '"')
438
{
439
pos++;
440
while (*pos != '"')
441
{
442
if (*pos == 0)
443
goto eof;
444
if (*pos == '\\')
445
pos++;
446
pos++;
447
}
448
pos++;
449
}
450
// check for _( sequence
451
else if ((*pos == '_') && (pos == in || !is_identifier_char(pos[-1])))
452
{
453
int uncompressed = 0;
454
end = pos;
455
pos++;
456
if (*pos == '_') // an extra _ signifies uncompressed strings. Enable uncompressed flag
457
{
458
pos++;
459
uncompressed = 1;
460
}
461
if (*pos == '(')
462
{
463
pos++;
464
fwrite(start, end - start, 1, fout);
465
pos = convert_string(pos, fout, infilename, in, uncompressed);
466
start = pos;
467
}
468
}
469
else
470
{
471
pos++;
472
}
473
}
474
475
eof:
476
fwrite(start, pos - start, 1, fout);
477
if (strcmp(outfilename, "-") != 0)
478
fclose(fout);
479
free(in);
480
}
481
482
static unsigned int charmap_hash(const void *value)
483
{
484
const struct CharmapEntry* entry = value;
485
unsigned int ret = 0;
486
for (int i = 0; i < entry->length; i++)
487
ret = ret * 17 + entry->unicode[i];
488
return ret;
489
}
490
491
static int charmap_cmp(const void *a, const void *b)
492
{
493
const struct CharmapEntry *ea = a;
494
const struct CharmapEntry *eb = b;
495
if (ea->length != eb->length)
496
return 0;
497
for(int i = 0; i < ea->length; i++)
498
if(ea->unicode[i] != eb->unicode[i])
499
return 0;
500
return 1;
501
}
502
503
static void usage(const char *execName)
504
{
505
fprintf(stderr, "Usage: %s CHARMAP INPUT OUTPUT\n", execName);
506
}
507
508
int main(int argc, char **argv)
509
{
510
if (argc != 4)
511
{
512
usage(argv[0]);
513
return 1;
514
}
515
516
charmap = hashtable_new(charmap_hash, charmap_cmp, 256, sizeof(struct CharmapEntry));
517
518
read_charmap(argv[1]);
519
convert_file(argv[2], argv[3]);
520
521
hashtable_free(charmap);
522
523
return 0;
524
}
525
526