Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/tools/wrc/utils.c
4389 views
1
/*
2
* Utility routines
3
*
4
* Copyright 1998 Bertho A. Stultiens
5
*
6
* This library is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
10
*
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19
*/
20
21
#include "config.h"
22
23
#include <assert.h>
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <stdarg.h>
27
#include <string.h>
28
#include <ctype.h>
29
30
#include "../tools.h"
31
#include "wrc.h"
32
#include "winternl.h"
33
#include "utils.h"
34
#include "parser.h"
35
36
/* #define WANT_NEAR_INDICATION */
37
38
#ifdef WANT_NEAR_INDICATION
39
void make_print(char *str)
40
{
41
while(*str)
42
{
43
if(!isprint(*str))
44
*str = ' ';
45
str++;
46
}
47
}
48
#endif
49
50
static void generic_msg(const char *s, const char *t, const char *n, va_list ap)
51
{
52
fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t);
53
vfprintf(stderr, s, ap);
54
#ifdef WANT_NEAR_INDICATION
55
{
56
char *cpy;
57
if(n)
58
{
59
cpy = xstrdup(n);
60
make_print(cpy);
61
fprintf(stderr, " near '%s'", cpy);
62
free(cpy);
63
}
64
}
65
#endif
66
}
67
68
69
int parser_error(const char *s, ...)
70
{
71
va_list ap;
72
va_start(ap, s);
73
generic_msg(s, "Error", parser_text, ap);
74
fputc( '\n', stderr );
75
va_end(ap);
76
exit(1);
77
return 1;
78
}
79
80
int parser_warning(const char *s, ...)
81
{
82
va_list ap;
83
va_start(ap, s);
84
generic_msg(s, "Warning", parser_text, ap);
85
va_end(ap);
86
return 0;
87
}
88
89
void fatal_perror( const char *msg, ... )
90
{
91
va_list valist;
92
va_start( valist, msg );
93
fprintf(stderr, "Error: ");
94
vfprintf( stderr, msg, valist );
95
perror( " " );
96
va_end( valist );
97
exit(2);
98
}
99
100
void error(const char *s, ...)
101
{
102
va_list ap;
103
va_start(ap, s);
104
fprintf(stderr, "Error: ");
105
vfprintf(stderr, s, ap);
106
va_end(ap);
107
exit(2);
108
}
109
110
void warning(const char *s, ...)
111
{
112
va_list ap;
113
va_start(ap, s);
114
fprintf(stderr, "Warning: ");
115
vfprintf(stderr, s, ap);
116
va_end(ap);
117
}
118
119
void chat(const char *s, ...)
120
{
121
if(debuglevel & DEBUGLEVEL_CHAT)
122
{
123
va_list ap;
124
va_start(ap, s);
125
fprintf(stderr, "FYI: ");
126
vfprintf(stderr, s, ap);
127
va_end(ap);
128
}
129
}
130
131
int compare_striA( const char *str1, const char *str2 )
132
{
133
for (;;)
134
{
135
/* only the A-Z range is case-insensitive */
136
char ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
137
char ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
138
if (!ch1 || ch1 != ch2) return ch1 - ch2;
139
str1++;
140
str2++;
141
}
142
}
143
144
int compare_striW( const WCHAR *str1, const WCHAR *str2 )
145
{
146
for (;;)
147
{
148
/* only the A-Z range is case-insensitive */
149
WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
150
WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
151
if (!ch1 || ch1 != ch2) return ch1 - ch2;
152
str1++;
153
str2++;
154
}
155
}
156
157
int compare_striAW( const char *str1, const WCHAR *str2 )
158
{
159
for (;;)
160
{
161
/* only the A-Z range is case-insensitive */
162
WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : (unsigned char)*str1;
163
WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
164
if (!ch1 || ch1 != ch2) return ch1 - ch2;
165
str1++;
166
str2++;
167
}
168
}
169
170
/*
171
*****************************************************************************
172
* Function : compare_name_id
173
* Syntax : int compare_name_id(const name_id_t *n1, const name_id_t *n2)
174
* Input :
175
* Output :
176
* Description :
177
* Remarks :
178
*****************************************************************************
179
*/
180
int compare_name_id(const name_id_t *n1, const name_id_t *n2)
181
{
182
if (n1->type != n2->type) return n1->type == name_ord ? 1 : -1;
183
if (n1->type == name_ord) return n1->name.i_name - n2->name.i_name;
184
185
if (n1->name.s_name->type == str_char)
186
{
187
if (n2->name.s_name->type == str_char)
188
return compare_striA(n1->name.s_name->str.cstr, n2->name.s_name->str.cstr);
189
return compare_striAW(n1->name.s_name->str.cstr, n2->name.s_name->str.wstr);
190
}
191
else
192
{
193
if (n2->name.s_name->type == str_char)
194
return -compare_striAW(n2->name.s_name->str.cstr, n1->name.s_name->str.wstr);
195
return compare_striW(n1->name.s_name->str.wstr, n2->name.s_name->str.wstr);
196
}
197
}
198
199
#ifdef _WIN32
200
201
int is_valid_codepage(int id)
202
{
203
return IsValidCodePage( id );
204
}
205
206
static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
207
{
208
WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
209
DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen );
210
if (!ret) return NULL;
211
dst[ret] = 0;
212
*dstlen = ret;
213
return dst;
214
}
215
216
int get_language_codepage( language_t lang )
217
{
218
DWORD codepage;
219
220
if (!lang) return 1252;
221
if (!GetLocaleInfoW( lang, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
222
(WCHAR *)&codepage, sizeof(codepage)/sizeof(WCHAR) )) return -1;
223
return codepage;
224
}
225
226
language_t get_language_from_name( const char *name )
227
{
228
WCHAR nameW[LOCALE_NAME_MAX_LENGTH];
229
230
MultiByteToWideChar( 1252, 0, name, -1, nameW, ARRAY_SIZE(nameW) );
231
return LocaleNameToLCID( nameW, LOCALE_ALLOW_NEUTRAL_NAMES );
232
}
233
234
#else /* _WIN32 */
235
236
struct nls_info
237
{
238
unsigned short codepage;
239
unsigned short unidef;
240
unsigned short trans_unidef;
241
unsigned short *cp2uni;
242
unsigned short *dbcs_offsets;
243
};
244
245
static struct nls_info nlsinfo[128];
246
247
static void init_nls_info( struct nls_info *info, unsigned short *ptr )
248
{
249
unsigned short hdr_size = ptr[0];
250
251
info->codepage = ptr[1];
252
info->unidef = ptr[4];
253
info->trans_unidef = ptr[6];
254
ptr += hdr_size;
255
info->cp2uni = ++ptr;
256
ptr += 256;
257
if (*ptr++) ptr += 256; /* glyph table */
258
info->dbcs_offsets = *ptr ? ptr + 1 : NULL;
259
}
260
261
static const struct nls_info *get_nls_info( unsigned int codepage )
262
{
263
unsigned short *data;
264
char *path;
265
unsigned int i;
266
size_t size;
267
268
for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++)
269
if (nlsinfo[i].codepage == codepage) return &nlsinfo[i];
270
271
assert( i < ARRAY_SIZE(nlsinfo) );
272
273
for (i = 0; nlsdirs[i]; i++)
274
{
275
path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage );
276
if ((data = read_file( path, &size )))
277
{
278
free( path );
279
init_nls_info( &nlsinfo[i], data );
280
return &nlsinfo[i];
281
}
282
free( path );
283
}
284
return NULL;
285
}
286
287
int is_valid_codepage(int cp)
288
{
289
return cp == CP_UTF8 || get_nls_info( cp );
290
}
291
292
static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
293
{
294
const struct nls_info *info = get_nls_info( codepage );
295
unsigned int i;
296
WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
297
298
if (!info) error( "codepage %u not supported\n", codepage );
299
300
if (info->dbcs_offsets)
301
{
302
for (i = 0; srclen; i++, srclen--, src++)
303
{
304
unsigned short off = info->dbcs_offsets[(unsigned char)*src];
305
if (off)
306
{
307
if (srclen == 1) return NULL;
308
dbch = (src[0] << 8) | (unsigned char)src[1];
309
src++;
310
srclen--;
311
dst[i] = info->dbcs_offsets[off + (unsigned char)*src];
312
if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL;
313
}
314
else
315
{
316
dst[i] = info->cp2uni[(unsigned char)*src];
317
if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL;
318
}
319
}
320
}
321
else
322
{
323
for (i = 0; i < srclen; i++)
324
{
325
dst[i] = info->cp2uni[(unsigned char)src[i]];
326
if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL;
327
}
328
}
329
dst[i] = 0;
330
*dstlen = i;
331
return dst;
332
}
333
334
static const NLS_LOCALE_LCID_INDEX *lcids_index;
335
static const NLS_LOCALE_HEADER *locale_table;
336
static const NLS_LOCALE_LCNAME_INDEX *lcnames_index;
337
static const WCHAR *locale_strings;
338
339
static void load_locale_nls(void)
340
{
341
struct
342
{
343
unsigned int ctypes;
344
unsigned int unknown1;
345
unsigned int unknown2;
346
unsigned int unknown3;
347
unsigned int locales;
348
unsigned int charmaps;
349
unsigned int geoids;
350
unsigned int scripts;
351
} *header;
352
char *path;
353
unsigned int i;
354
size_t size;
355
356
for (i = 0; nlsdirs[i]; i++)
357
{
358
path = strmake( "%s/locale.nls", nlsdirs[i] );
359
header = read_file( path, &size );
360
free( path );
361
if (!header) continue;
362
locale_table = (const NLS_LOCALE_HEADER *)((char *)header + header->locales);
363
lcids_index = (const NLS_LOCALE_LCID_INDEX *)((char *)locale_table + locale_table->lcids_offset);
364
lcnames_index = (const NLS_LOCALE_LCNAME_INDEX *)((char *)locale_table + locale_table->lcnames_offset);
365
locale_strings = (const WCHAR *)((char *)locale_table + locale_table->strings_offset);
366
return;
367
}
368
error( "unable to load locale.nls\n" );
369
}
370
371
static int compare_locale_names( const char *n1, const WCHAR *n2 )
372
{
373
for (;;)
374
{
375
WCHAR ch1 = (unsigned char)*n1++;
376
WCHAR ch2 = *n2++;
377
if (ch1 >= 'a' && ch1 <= 'z') ch1 -= 'a' - 'A';
378
if (ch2 >= 'a' && ch2 <= 'z') ch2 -= 'a' - 'A';
379
if (!ch1 || ch1 != ch2) return ch1 - ch2;
380
}
381
}
382
383
static const NLS_LOCALE_LCNAME_INDEX *find_lcname_entry( const char *name )
384
{
385
int min = 0, max = locale_table->nb_lcnames - 1;
386
387
if (!name) return NULL;
388
while (min <= max)
389
{
390
int res, pos = (min + max) / 2;
391
const WCHAR *str = locale_strings + lcnames_index[pos].name;
392
res = compare_locale_names( name, str + 1 );
393
if (res < 0) max = pos - 1;
394
else if (res > 0) min = pos + 1;
395
else return &lcnames_index[pos];
396
}
397
return NULL;
398
}
399
400
static const NLS_LOCALE_LCID_INDEX *find_lcid_entry( LCID lcid )
401
{
402
int min = 0, max = locale_table->nb_lcids - 1;
403
404
while (min <= max)
405
{
406
int pos = (min + max) / 2;
407
if (lcid < lcids_index[pos].id) max = pos - 1;
408
else if (lcid > lcids_index[pos].id) min = pos + 1;
409
else return &lcids_index[pos];
410
}
411
return NULL;
412
}
413
414
static const NLS_LOCALE_DATA *get_locale_data( UINT idx )
415
{
416
ULONG offset = locale_table->locales_offset + idx * locale_table->locale_size;
417
return (const NLS_LOCALE_DATA *)((const char *)locale_table + offset);
418
}
419
420
int get_language_codepage( language_t lang )
421
{
422
const NLS_LOCALE_LCID_INDEX *entry;
423
424
if (!lang) return 1252;
425
if (lang == MAKELANGID( LANG_ENGLISH, SUBLANG_DEFAULT )) return 1252;
426
if (!locale_table) load_locale_nls();
427
if (!(entry = find_lcid_entry( lang ))) return -1;
428
return get_locale_data( entry->idx )->idefaultansicodepage;
429
}
430
431
language_t get_language_from_name( const char *name )
432
{
433
const NLS_LOCALE_LCNAME_INDEX *entry;
434
435
if (!locale_table) load_locale_nls();
436
if (!(entry = find_lcname_entry( name ))) return 0;
437
return get_locale_data( entry->idx )->unique_lcid;
438
}
439
440
#endif /* _WIN32 */
441
442
static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
443
{
444
static const char utf8_length[128] =
445
{
446
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
447
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
448
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
449
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
450
0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
451
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
452
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
453
3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */
454
};
455
static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
456
457
const char *srcend = src + srclen;
458
int len, res;
459
WCHAR *ret, *dst;
460
461
dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) );
462
while (src < srcend)
463
{
464
unsigned char ch = *src++;
465
if (ch < 0x80) /* special fast case for 7-bit ASCII */
466
{
467
*dst++ = ch;
468
continue;
469
}
470
len = utf8_length[ch - 0x80];
471
if (len && src + len <= srcend)
472
{
473
res = ch & utf8_mask[len];
474
switch (len)
475
{
476
case 3:
477
if ((ch = *src ^ 0x80) >= 0x40) break;
478
res = (res << 6) | ch;
479
src++;
480
if (res < 0x10) break;
481
case 2:
482
if ((ch = *src ^ 0x80) >= 0x40) break;
483
res = (res << 6) | ch;
484
if (res >= 0x110000 >> 6) break;
485
src++;
486
if (res < 0x20) break;
487
if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
488
case 1:
489
if ((ch = *src ^ 0x80) >= 0x40) break;
490
res = (res << 6) | ch;
491
src++;
492
if (res < 0x80) break;
493
if (res <= 0xffff) *dst++ = res;
494
else
495
{
496
res -= 0x10000;
497
*dst++ = 0xd800 | (res >> 10);
498
*dst++ = 0xdc00 | (res & 0x3ff);
499
}
500
continue;
501
}
502
}
503
*dst++ = 0xfffd;
504
}
505
*dst = 0;
506
*dstlen = dst - ret;
507
return ret;
508
}
509
510
static char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
511
{
512
char *ret, *dst;
513
514
dst = ret = xmalloc( srclen * 3 + 1 );
515
for ( ; srclen; srclen--, src++)
516
{
517
unsigned int ch = *src;
518
519
if (ch < 0x80) /* 0x00-0x7f: 1 byte */
520
{
521
*dst++ = ch;
522
continue;
523
}
524
if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
525
{
526
dst[1] = 0x80 | (ch & 0x3f);
527
ch >>= 6;
528
dst[0] = 0xc0 | ch;
529
dst += 2;
530
continue;
531
}
532
if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff)
533
{
534
/* 0x10000-0x10ffff: 4 bytes */
535
ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff);
536
dst[3] = 0x80 | (ch & 0x3f);
537
ch >>= 6;
538
dst[2] = 0x80 | (ch & 0x3f);
539
ch >>= 6;
540
dst[1] = 0x80 | (ch & 0x3f);
541
ch >>= 6;
542
dst[0] = 0xf0 | ch;
543
dst += 4;
544
src++;
545
srclen--;
546
continue;
547
}
548
if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd; /* invalid surrogate pair */
549
550
/* 0x800-0xffff: 3 bytes */
551
dst[2] = 0x80 | (ch & 0x3f);
552
ch >>= 6;
553
dst[1] = 0x80 | (ch & 0x3f);
554
ch >>= 6;
555
dst[0] = 0xe0 | ch;
556
dst += 3;
557
}
558
*dst = 0;
559
*dstlen = dst - ret;
560
return ret;
561
}
562
563
string_t *convert_string_unicode( const string_t *str, int codepage )
564
{
565
string_t *ret = xmalloc(sizeof(*ret));
566
567
ret->type = str_unicode;
568
ret->loc = str->loc;
569
570
if (str->type == str_char)
571
{
572
if (!codepage) parser_error( "Current language is Unicode only, cannot convert string" );
573
574
if (codepage == CP_UTF8)
575
ret->str.wstr = utf8_to_unicode( str->str.cstr, str->size, &ret->size );
576
else
577
ret->str.wstr = codepage_to_unicode( codepage, str->str.cstr, str->size, &ret->size );
578
if (!ret->str.wstr) parser_error( "Invalid character in string '%.*s' for codepage %u",
579
str->size, str->str.cstr, codepage );
580
}
581
else
582
{
583
ret->size = str->size;
584
ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1));
585
memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) );
586
ret->str.wstr[ret->size] = 0;
587
}
588
return ret;
589
}
590
591
char *convert_string_utf8( const string_t *str, int codepage )
592
{
593
int len;
594
string_t *wstr = convert_string_unicode( str, codepage );
595
char *ret = unicode_to_utf8( wstr->str.wstr, wstr->size, &len );
596
free_string( wstr );
597
return ret;
598
}
599
600
void free_string(string_t *str)
601
{
602
if (str->type == str_unicode) free( str->str.wstr );
603
else free( str->str.cstr );
604
free( str );
605
}
606
607
/* check if the string is valid utf8 despite a different codepage being in use */
608
int check_valid_utf8( const string_t *str, int codepage )
609
{
610
int i, count;
611
WCHAR *wstr;
612
613
if (!check_utf8) return 0;
614
if (!codepage) return 0;
615
if (codepage == CP_UTF8) return 0;
616
if (!is_valid_codepage( codepage )) return 0;
617
618
for (i = count = 0; i < str->size; i++)
619
{
620
if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done;
621
if ((unsigned char)str->str.cstr[i] >= 0xc2) { count++; continue; }
622
if ((unsigned char)str->str.cstr[i] >= 0x80) goto done;
623
}
624
if (!count) return 0; /* no 8-bit chars at all */
625
626
wstr = utf8_to_unicode( str->str.cstr, str->size, &count );
627
for (i = 0; i < count; i++) if (wstr[i] == 0xfffd) break;
628
free( wstr );
629
return (i == count);
630
631
done:
632
check_utf8 = 0; /* at least one 8-bit non-utf8 string found, stop checking */
633
return 0;
634
}
635
636
const char *get_nameid_str(const name_id_t *n)
637
{
638
int len;
639
640
if (!n) return "<none>";
641
if (n->type == name_ord) return strmake( "%u", n->name.i_name );
642
if (n->name.s_name->type == str_char) return n->name.s_name->str.cstr;
643
return unicode_to_utf8( n->name.s_name->str.wstr, n->name.s_name->size, &len );
644
}
645
646