CoCalc -- utils.c

GitHub Repository: wine-mirror/wine
Path: blob/master/tools/wrc/utils.c
⁸⁶⁰⁰ views
1
/*
2
 * Utility routines
3
 *
4
 * Copyright 1998 Bertho A. Stultiens
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19
 */
20

21
#include "config.h"
22

23
#include <assert.h>
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <stdarg.h>
27
#include <string.h>
28
#include <ctype.h>
29

30
#include "../tools.h"
31
#include "wrc.h"
32
#include "winternl.h"
33
#include "utils.h"
34
#include "parser.h"
35

36
/* #define WANT_NEAR_INDICATION */
37

38
#ifdef WANT_NEAR_INDICATION
39
void make_print(char *str)
40
{
41
	while(*str)
42
	{
43
		if(!isprint(*str))
44
			*str = ' ';
45
		str++;
46
	}
47
}
48
#endif
49

50
static void generic_msg(const char *s, const char *t, const char *n, va_list ap)
51
{
52
	fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t);
53
	vfprintf(stderr, s, ap);
54
#ifdef WANT_NEAR_INDICATION
55
	{
56
		char *cpy;
57
		if(n)
58
		{
59
			cpy = xstrdup(n);
60
			make_print(cpy);
61
			fprintf(stderr, " near '%s'", cpy);
62
			free(cpy);
63
		}
64
	}
65
#endif
66
}
67

68

69
int parser_error(const char *s, ...)
70
{
71
	va_list ap;
72
	va_start(ap, s);
73
	generic_msg(s, "Error", parser_text, ap);
74
        fputc( '\n', stderr );
75
	va_end(ap);
76
	exit(1);
77
	return 1;
78
}
79

80
int parser_warning(const char *s, ...)
81
{
82
	va_list ap;
83
	va_start(ap, s);
84
	generic_msg(s, "Warning", parser_text, ap);
85
	va_end(ap);
86
	return 0;
87
}
88

89
void error(const char *s, ...)
90
{
91
	va_list ap;
92
	va_start(ap, s);
93
	fprintf(stderr, "Error: ");
94
	vfprintf(stderr, s, ap);
95
	va_end(ap);
96
	exit(2);
97
}
98

99
void warning(const char *s, ...)
100
{
101
	va_list ap;
102
	va_start(ap, s);
103
	fprintf(stderr, "Warning: ");
104
	vfprintf(stderr, s, ap);
105
	va_end(ap);
106
}
107

108
void chat(const char *s, ...)
109
{
110
	if(debuglevel & DEBUGLEVEL_CHAT)
111
	{
112
		va_list ap;
113
		va_start(ap, s);
114
		fprintf(stderr, "FYI: ");
115
		vfprintf(stderr, s, ap);
116
		va_end(ap);
117
	}
118
}
119

120
int compare_striA( const char *str1, const char *str2 )
121
{
122
    for (;;)
123
    {
124
        /* only the A-Z range is case-insensitive */
125
        char ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
126
        char ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
127
        if (!ch1 || ch1 != ch2) return ch1 - ch2;
128
        str1++;
129
        str2++;
130
    }
131
}
132

133
int compare_striW( const WCHAR *str1, const WCHAR *str2 )
134
{
135
    for (;;)
136
    {
137
        /* only the A-Z range is case-insensitive */
138
        WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : *str1;
139
        WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
140
        if (!ch1 || ch1 != ch2) return ch1 - ch2;
141
        str1++;
142
        str2++;
143
    }
144
}
145

146
int compare_striAW( const char *str1, const WCHAR *str2 )
147
{
148
    for (;;)
149
    {
150
        /* only the A-Z range is case-insensitive */
151
        WCHAR ch1 = (*str1 >= 'a' && *str1 <= 'z') ? *str1 + 'A' - 'a' : (unsigned char)*str1;
152
        WCHAR ch2 = (*str2 >= 'a' && *str2 <= 'z') ? *str2 + 'A' - 'a' : *str2;
153
        if (!ch1 || ch1 != ch2) return ch1 - ch2;
154
        str1++;
155
        str2++;
156
    }
157
}
158

159
/*
160
 *****************************************************************************
161
 * Function	: compare_name_id
162
 * Syntax	: int compare_name_id(const name_id_t *n1, const name_id_t *n2)
163
 * Input	:
164
 * Output	:
165
 * Description	:
166
 * Remarks	:
167
 *****************************************************************************
168
*/
169
int compare_name_id(const name_id_t *n1, const name_id_t *n2)
170
{
171
    if (n1->type != n2->type) return n1->type == name_ord ? 1 : -1;
172
    if (n1->type == name_ord) return n1->name.i_name - n2->name.i_name;
173

174
    if (n1->name.s_name->type == str_char)
175
    {
176
        if (n2->name.s_name->type == str_char)
177
            return compare_striA(n1->name.s_name->str.cstr, n2->name.s_name->str.cstr);
178
        return compare_striAW(n1->name.s_name->str.cstr, n2->name.s_name->str.wstr);
179
    }
180
    else
181
    {
182
        if (n2->name.s_name->type == str_char)
183
            return -compare_striAW(n2->name.s_name->str.cstr, n1->name.s_name->str.wstr);
184
        return compare_striW(n1->name.s_name->str.wstr, n2->name.s_name->str.wstr);
185
    }
186
}
187

188
#ifdef _WIN32
189

190
int is_valid_codepage(int id)
191
{
192
    return IsValidCodePage( id );
193
}
194

195
static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
196
{
197
    WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
198
    DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen );
199
    if (!ret) return NULL;
200
    dst[ret] = 0;
201
    *dstlen = ret;
202
    return dst;
203
}
204

205
int get_language_codepage( language_t lang )
206
{
207
    DWORD codepage;
208

209
    if (!lang) return 1252;
210
    if (!GetLocaleInfoW( lang, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
211
                         (WCHAR *)&codepage, sizeof(codepage)/sizeof(WCHAR) )) return -1;
212
    return codepage;
213
}
214

215
language_t get_language_from_name( const char *name )
216
{
217
    WCHAR nameW[LOCALE_NAME_MAX_LENGTH];
218

219
    MultiByteToWideChar( 1252, 0, name, -1, nameW, ARRAY_SIZE(nameW) );
220
    return LocaleNameToLCID( nameW, LOCALE_ALLOW_NEUTRAL_NAMES );
221
}
222

223
#else  /* _WIN32 */
224

225
struct nls_info
226
{
227
    unsigned short  codepage;
228
    unsigned short  unidef;
229
    unsigned short  trans_unidef;
230
    unsigned short *cp2uni;
231
    unsigned short *dbcs_offsets;
232
};
233

234
static struct nls_info nlsinfo[128];
235

236
static void init_nls_info( struct nls_info *info, unsigned short *ptr )
237
{
238
    unsigned short hdr_size = ptr[0];
239

240
    info->codepage      = ptr[1];
241
    info->unidef        = ptr[4];
242
    info->trans_unidef  = ptr[6];
243
    ptr += hdr_size;
244
    info->cp2uni = ++ptr;
245
    ptr += 256;
246
    if (*ptr++) ptr += 256;  /* glyph table */
247
    info->dbcs_offsets  = *ptr ? ptr + 1 : NULL;
248
}
249

250
static const struct nls_info *get_nls_info( unsigned int codepage )
251
{
252
    unsigned short *data;
253
    char *path;
254
    unsigned int i;
255
    size_t size;
256

257
    for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++)
258
        if (nlsinfo[i].codepage == codepage) return &nlsinfo[i];
259

260
    assert( i < ARRAY_SIZE(nlsinfo) );
261

262
    for (i = 0; nlsdirs[i]; i++)
263
    {
264
        path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage );
265
        if ((data = read_file( path, &size )))
266
        {
267
            free( path );
268
            init_nls_info( &nlsinfo[i], data );
269
            return &nlsinfo[i];
270
        }
271
        free( path );
272
    }
273
    return NULL;
274
}
275

276
int is_valid_codepage(int cp)
277
{
278
    return cp == CP_UTF8 || get_nls_info( cp );
279
}
280

281
static WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen )
282
{
283
    const struct nls_info *info = get_nls_info( codepage );
284
    unsigned int i;
285
    WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) );
286

287
    if (!info) error( "codepage %u not supported\n", codepage );
288

289
    if (info->dbcs_offsets)
290
    {
291
        for (i = 0; srclen; i++, srclen--, src++)
292
        {
293
            unsigned short off = info->dbcs_offsets[(unsigned char)*src];
294
            if (off)
295
            {
296
                if (srclen == 1) return NULL;
297
                dbch = (src[0] << 8) | (unsigned char)src[1];
298
                src++;
299
                srclen--;
300
                dst[i] = info->dbcs_offsets[off + (unsigned char)*src];
301
                if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL;
302
            }
303
            else
304
            {
305
                dst[i] = info->cp2uni[(unsigned char)*src];
306
                if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL;
307
            }
308
        }
309
    }
310
    else
311
    {
312
        for (i = 0; i < srclen; i++)
313
        {
314
            dst[i] = info->cp2uni[(unsigned char)src[i]];
315
            if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL;
316
        }
317
    }
318
    dst[i] = 0;
319
    *dstlen = i;
320
    return dst;
321
}
322

323
static const NLS_LOCALE_LCID_INDEX *lcids_index;
324
static const NLS_LOCALE_HEADER *locale_table;
325
static const NLS_LOCALE_LCNAME_INDEX *lcnames_index;
326
static const WCHAR *locale_strings;
327

328
static void load_locale_nls(void)
329
{
330
    struct
331
    {
332
        unsigned int ctypes;
333
        unsigned int unknown1;
334
        unsigned int unknown2;
335
        unsigned int unknown3;
336
        unsigned int locales;
337
        unsigned int charmaps;
338
        unsigned int geoids;
339
        unsigned int scripts;
340
    } *header;
341
    char *path;
342
    unsigned int i;
343
    size_t size;
344

345
    for (i = 0; nlsdirs[i]; i++)
346
    {
347
        path = strmake( "%s/locale.nls", nlsdirs[i] );
348
        header = read_file( path, &size );
349
        free( path );
350
        if (!header) continue;
351
        locale_table = (const NLS_LOCALE_HEADER *)((char *)header + header->locales);
352
        lcids_index = (const NLS_LOCALE_LCID_INDEX *)((char *)locale_table + locale_table->lcids_offset);
353
        lcnames_index = (const NLS_LOCALE_LCNAME_INDEX *)((char *)locale_table + locale_table->lcnames_offset);
354
        locale_strings = (const WCHAR *)((char *)locale_table + locale_table->strings_offset);
355
        return;
356
    }
357
    error( "unable to load locale.nls\n" );
358
}
359

360
static int compare_locale_names( const char *n1, const WCHAR *n2 )
361
{
362
    for (;;)
363
    {
364
        WCHAR ch1 = (unsigned char)*n1++;
365
        WCHAR ch2 = *n2++;
366
        if (ch1 >= 'a' && ch1 <= 'z') ch1 -= 'a' - 'A';
367
        if (ch2 >= 'a' && ch2 <= 'z') ch2 -= 'a' - 'A';
368
        if (!ch1 || ch1 != ch2) return ch1 - ch2;
369
    }
370
}
371

372
static const NLS_LOCALE_LCNAME_INDEX *find_lcname_entry( const char *name )
373
{
374
    int min = 0, max = locale_table->nb_lcnames - 1;
375

376
    if (!name) return NULL;
377
    while (min <= max)
378
    {
379
        int res, pos = (min + max) / 2;
380
        const WCHAR *str = locale_strings + lcnames_index[pos].name;
381
        res = compare_locale_names( name, str + 1 );
382
        if (res < 0) max = pos - 1;
383
        else if (res > 0) min = pos + 1;
384
        else return &lcnames_index[pos];
385
    }
386
    return NULL;
387
}
388

389
static const NLS_LOCALE_LCID_INDEX *find_lcid_entry( LCID lcid )
390
{
391
    int min = 0, max = locale_table->nb_lcids - 1;
392

393
    while (min <= max)
394
    {
395
        int pos = (min + max) / 2;
396
        if (lcid < lcids_index[pos].id) max = pos - 1;
397
        else if (lcid > lcids_index[pos].id) min = pos + 1;
398
        else return &lcids_index[pos];
399
    }
400
    return NULL;
401
}
402

403
static const NLS_LOCALE_DATA *get_locale_data( UINT idx )
404
{
405
    ULONG offset = locale_table->locales_offset + idx * locale_table->locale_size;
406
    return (const NLS_LOCALE_DATA *)((const char *)locale_table + offset);
407
}
408

409
int get_language_codepage( language_t lang )
410
{
411
    const NLS_LOCALE_LCID_INDEX *entry;
412

413
    if (!lang) return 1252;
414
    if (lang == MAKELANGID( LANG_ENGLISH, SUBLANG_DEFAULT )) return 1252;
415
    if (!locale_table) load_locale_nls();
416
    if (!(entry = find_lcid_entry( lang ))) return -1;
417
    return get_locale_data( entry->idx )->idefaultansicodepage;
418
}
419

420
language_t get_language_from_name( const char *name )
421
{
422
    const NLS_LOCALE_LCNAME_INDEX *entry;
423

424
    if (!locale_table) load_locale_nls();
425
    if (!(entry = find_lcname_entry( name ))) return 0;
426
    return get_locale_data( entry->idx )->unique_lcid;
427
}
428

429
#endif  /* _WIN32 */
430

431
static WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
432
{
433
    static const char utf8_length[128] =
434
    {
435
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
436
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
437
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
438
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
439
        0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
440
        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
441
        2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
442
        3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0  /* 0xf0-0xff */
443
    };
444
    static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
445

446
    const char *srcend = src + srclen;
447
    int len, res;
448
    WCHAR *ret, *dst;
449

450
    dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) );
451
    while (src < srcend)
452
    {
453
        unsigned char ch = *src++;
454
        if (ch < 0x80)  /* special fast case for 7-bit ASCII */
455
        {
456
            *dst++ = ch;
457
            continue;
458
        }
459
        len = utf8_length[ch - 0x80];
460
        if (len && src + len <= srcend)
461
        {
462
            res = ch & utf8_mask[len];
463
            switch (len)
464
            {
465
            case 3:
466
                if ((ch = *src ^ 0x80) >= 0x40) break;
467
                res = (res << 6) | ch;
468
                src++;
469
                if (res < 0x10) break;
470
            case 2:
471
                if ((ch = *src ^ 0x80) >= 0x40) break;
472
                res = (res << 6) | ch;
473
                if (res >= 0x110000 >> 6) break;
474
                src++;
475
                if (res < 0x20) break;
476
                if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
477
            case 1:
478
                if ((ch = *src ^ 0x80) >= 0x40) break;
479
                res = (res << 6) | ch;
480
                src++;
481
                if (res < 0x80) break;
482
                if (res <= 0xffff) *dst++ = res;
483
                else
484
                {
485
                    res -= 0x10000;
486
                    *dst++ = 0xd800 | (res >> 10);
487
                    *dst++ = 0xdc00 | (res & 0x3ff);
488
                }
489
                continue;
490
            }
491
        }
492
        *dst++ = 0xfffd;
493
    }
494
    *dst = 0;
495
    *dstlen = dst - ret;
496
    return ret;
497
}
498

499
static char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
500
{
501
    char *ret, *dst;
502

503
    dst = ret = xmalloc( srclen * 3 + 1 );
504
    for ( ; srclen; srclen--, src++)
505
    {
506
        unsigned int ch = *src;
507

508
        if (ch < 0x80)  /* 0x00-0x7f: 1 byte */
509
        {
510
            *dst++ = ch;
511
            continue;
512
        }
513
        if (ch < 0x800)  /* 0x80-0x7ff: 2 bytes */
514
        {
515
            dst[1] = 0x80 | (ch & 0x3f);
516
            ch >>= 6;
517
            dst[0] = 0xc0 | ch;
518
            dst += 2;
519
            continue;
520
        }
521
        if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff)
522
        {
523
            /* 0x10000-0x10ffff: 4 bytes */
524
            ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff);
525
            dst[3] = 0x80 | (ch & 0x3f);
526
            ch >>= 6;
527
            dst[2] = 0x80 | (ch & 0x3f);
528
            ch >>= 6;
529
            dst[1] = 0x80 | (ch & 0x3f);
530
            ch >>= 6;
531
            dst[0] = 0xf0 | ch;
532
            dst += 4;
533
            src++;
534
            srclen--;
535
            continue;
536
        }
537
        if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd;  /* invalid surrogate pair */
538

539
        /* 0x800-0xffff: 3 bytes */
540
        dst[2] = 0x80 | (ch & 0x3f);
541
        ch >>= 6;
542
        dst[1] = 0x80 | (ch & 0x3f);
543
        ch >>= 6;
544
        dst[0] = 0xe0 | ch;
545
        dst += 3;
546
    }
547
    *dst = 0;
548
    *dstlen = dst - ret;
549
    return ret;
550
}
551

552
string_t *convert_string_unicode( const string_t *str, int codepage )
553
{
554
    string_t *ret = xmalloc(sizeof(*ret));
555

556
    ret->type = str_unicode;
557
    ret->loc = str->loc;
558

559
    if (str->type == str_char)
560
    {
561
        if (!codepage) parser_error( "Current language is Unicode only, cannot convert string" );
562

563
        if (codepage == CP_UTF8)
564
            ret->str.wstr = utf8_to_unicode( str->str.cstr, str->size, &ret->size );
565
        else
566
            ret->str.wstr = codepage_to_unicode( codepage, str->str.cstr, str->size, &ret->size );
567
        if (!ret->str.wstr) parser_error( "Invalid character in string '%.*s' for codepage %u",
568
                                          str->size, str->str.cstr, codepage );
569
    }
570
    else
571
    {
572
        ret->size     = str->size;
573
        ret->str.wstr = xmalloc(sizeof(WCHAR)*(ret->size+1));
574
        memcpy( ret->str.wstr, str->str.wstr, ret->size * sizeof(WCHAR) );
575
        ret->str.wstr[ret->size] = 0;
576
    }
577
    return ret;
578
}
579

580
char *convert_string_utf8( const string_t *str, int codepage )
581
{
582
    int len;
583
    string_t *wstr = convert_string_unicode( str, codepage );
584
    char *ret = unicode_to_utf8( wstr->str.wstr, wstr->size, &len );
585
    free_string( wstr );
586
    return ret;
587
}
588

589
void free_string(string_t *str)
590
{
591
    if (str->type == str_unicode) free( str->str.wstr );
592
    else free( str->str.cstr );
593
    free( str );
594
}
595

596
/* check if the string is valid utf8 despite a different codepage being in use */
597
int check_valid_utf8( const string_t *str, int codepage )
598
{
599
    int i, count;
600
    WCHAR *wstr;
601

602
    if (!check_utf8) return 0;
603
    if (!codepage) return 0;
604
    if (codepage == CP_UTF8) return 0;
605
    if (!is_valid_codepage( codepage )) return 0;
606

607
    for (i = count = 0; i < str->size; i++)
608
    {
609
        if ((unsigned char)str->str.cstr[i] >= 0xf5) goto done;
610
        if ((unsigned char)str->str.cstr[i] >= 0xc2) { count++; continue; }
611
        if ((unsigned char)str->str.cstr[i] >= 0x80) goto done;
612
    }
613
    if (!count) return 0;  /* no 8-bit chars at all */
614

615
    wstr = utf8_to_unicode( str->str.cstr, str->size, &count );
616
    for (i = 0; i < count; i++) if (wstr[i] == 0xfffd) break;
617
    free( wstr );
618
    return (i == count);
619

620
done:
621
    check_utf8 = 0;  /* at least one 8-bit non-utf8 string found, stop checking */
622
    return 0;
623
}
624

625
const char *get_nameid_str(const name_id_t *n)
626
{
627
    int len;
628

629
    if (!n) return "<none>";
630
    if (n->type == name_ord) return strmake( "%u", n->name.i_name );
631
    if (n->name.s_name->type == str_char) return n->name.s_name->str.cstr;
632
    return unicode_to_utf8( n->name.s_name->str.wstr, n->name.s_name->size, &len );
633
}
634

635
Product

Resources

Company