#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#include <string.h>
#include "wmc.h"
#include "utils.h"
#include "lang.h"
#include "mcy.tab.h"
static const WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
static const WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
static const WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
static const WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
static const WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
static const WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
static const WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
static const WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
static const WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
static const WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
static const WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
static const WCHAR ustr_dxgi[] = { 'D', 'x', 'g', 'i', 0 };
static const WCHAR ustr_null[] = { 'N', 'u', 'l', 'l', 0 };
static const WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
static const WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
static const WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
static const WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
static const WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
static const WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
static const WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
static const WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
#define CH_SHORTNAME 0x01
#define CH_LONGNAME 0x02
#define CH_IDENT 0x04
#define CH_NUMBER 0x08
#define CH_PUNCT 0x40
#define CH_INVALID 0x80
static const char char_table[256] = {
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03,
0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80,
0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10,
0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07,
0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80,
};
static int isisochar(int ch)
{
return !(ch & (~0xff));
}
static int codepage;
void set_codepage(int cp)
{
codepage = cp;
}
#define INPUTBUFFER_SIZE 2048
static int nungetstack = 0;
static int allocungetstack = 0;
static char *ungetstack = NULL;
static int ninputbuffer = 0;
static WCHAR inputbuffer[INPUTBUFFER_SIZE];
static int fill_inputbuffer(void)
{
static enum input_mode { INPUT_UNKNOWN, INPUT_ASCII, INPUT_UTF8, INPUT_UNICODE } mode;
static int swapped;
static unsigned char utf8_bom[3] = { 0xef, 0xbb, 0xbf };
WCHAR *wbuf;
int i, pos = 0, len = 0;
char buffer[INPUTBUFFER_SIZE];
if (mode == INPUT_UNKNOWN)
{
len = fread( buffer, 1, 8, yyin );
wbuf = (WCHAR *)buffer;
if (len >= 3 && !memcmp( buffer, utf8_bom, 3 ))
{
mode = INPUT_UTF8;
memmove( buffer, buffer + 3, len - 3 );
len -= 3;
}
else if (len == 8)
{
if (wbuf[0] == 0xfeff || wbuf[0] == 0xfffe)
{
mode = INPUT_UNICODE;
pos = 1;
swapped = (wbuf[0] == 0xfffe);
}
else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0xff00))
{
mode = INPUT_UNICODE;
}
else if (!((wbuf[0] | wbuf[1] | wbuf[2] | wbuf[3]) & 0x00ff))
{
mode = INPUT_UNICODE;
swapped = 1;
}
}
if (mode == INPUT_UNICODE)
{
len = 4 - pos;
memcpy( inputbuffer, wbuf + pos, len * sizeof(WCHAR) );
}
else if (mode == INPUT_UNKNOWN) mode = unicodein ? INPUT_UTF8 : INPUT_ASCII;
}
switch (mode)
{
case INPUT_ASCII:
if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
wbuf = codepage_to_unicode( codepage, buffer, strlen(buffer), &ninputbuffer );
memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
free( wbuf );
return 1;
case INPUT_UTF8:
if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break;
wbuf = utf8_to_unicode( buffer, strlen(buffer), &ninputbuffer );
memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) );
free( wbuf );
return 1;
case INPUT_UNICODE:
len += fread( inputbuffer + len, sizeof(WCHAR), INPUTBUFFER_SIZE - len, yyin );
if (!len) break;
if (swapped) for (i = 0; i < len; i++) inputbuffer[i] = (inputbuffer[i] << 8) | (inputbuffer[i] >> 8);
ninputbuffer = len;
return 1;
case INPUT_UNKNOWN:
break;
}
if (ferror(yyin)) xyyerror( "Fatal: reading input failed\n" );
return 0;
}
static int get_unichar(void)
{
static WCHAR *b = NULL;
char_number++;
if(nungetstack)
return ungetstack[--nungetstack];
if(!ninputbuffer)
{
if(!fill_inputbuffer())
return EOF;
b = inputbuffer;
}
ninputbuffer--;
return *b++;
}
static void unget_unichar(int ch)
{
if(ch == EOF)
return;
char_number--;
if(nungetstack == allocungetstack)
{
allocungetstack += 32;
ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
}
ungetstack[nungetstack++] = (WCHAR)ch;
}
static int ncharstack = 0;
static int alloccharstack = 0;
static char *charstack = NULL;
static void empty_char_stack(void)
{
ncharstack = 0;
}
static void push_char(int ch)
{
if(ncharstack == alloccharstack)
{
alloccharstack += 32;
charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
}
charstack[ncharstack++] = (char)ch;
}
static int tos_char_stack(void)
{
if(!ncharstack)
return 0;
else
return (int)(charstack[ncharstack-1] & 0xff);
}
static char *get_char_stack(void)
{
return charstack;
}
static int nunicharstack = 0;
static int allocunicharstack = 0;
static WCHAR *unicharstack = NULL;
static void empty_unichar_stack(void)
{
nunicharstack = 0;
}
static void push_unichar(int ch)
{
if(nunicharstack == allocunicharstack)
{
allocunicharstack += 128;
unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
}
unicharstack[nunicharstack++] = (WCHAR)ch;
}
#if 0
static int tos_unichar_stack(void)
{
if(!nunicharstack)
return 0;
else
return (int)(unicharstack[nunicharstack-1] & 0xffff);
}
#endif
static WCHAR *get_unichar_stack(void)
{
return unicharstack;
}
static int scan_number(int ch)
{
int state = 0;
int base = 10;
empty_char_stack();
while(1)
{
if(!isisochar(ch))
xyyerror("Invalid digit\n");
switch(state)
{
case 0:
if(isdigit(ch))
{
push_char(ch);
if(ch == '0')
state = 1;
else
state = 4;
}
else
internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state\n");
break;
case 1:
if(ch == 'x' || ch == 'X')
{
push_char(ch);
state = 2;
}
else if(ch >= '0' && ch <= '7')
{
push_char(ch);
state = 3;
}
else if(isalpha(ch) || ch == '_')
xyyerror("Invalid number digit\n");
else
{
unget_unichar(ch);
mcy_lval.num = 0;
return tNUMBER;
}
break;
case 2:
if(isxdigit(ch))
push_char(ch);
else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
xyyerror("Invalid hex digit\n");
else
{
base = 16;
goto finish;
}
break;
case 3:
if(ch >= '0' && ch <= '7')
push_char(ch);
else if(isalnum(ch) || ch == '_')
xyyerror("Invalid octal digit\n");
else
{
base = 8;
goto finish;
}
break;
case 4:
if(isdigit(ch))
push_char(ch);
else if(isalnum(ch) || ch == '_')
xyyerror("Invalid decimal digit\n");
else
{
base = 10;
goto finish;
}
break;
default:
internal_error(__FILE__, __LINE__, "Invalid state in number-scanner\n");
}
ch = get_unichar();
}
finish:
unget_unichar(ch);
push_char(0);
mcy_lval.num = strtoul(get_char_stack(), NULL, base);
return tNUMBER;
}
static void newline(void)
{
line_number++;
char_number = 1;
}
static int unisort(const void *p1, const void *p2)
{
return unistricmp(((const struct token *)p1)->name, ((const struct token *)p2)->name);
}
static struct token *tokentable = NULL;
static int ntokentable = 0;
struct token *lookup_token(const WCHAR *s)
{
struct token tok;
tok.name = s;
return (struct token *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
}
void add_token(enum tok_enum type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
{
ntokentable++;
tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
tokentable[ntokentable-1].type = type;
tokentable[ntokentable-1].name = name;
tokentable[ntokentable-1].token = tok;
tokentable[ntokentable-1].codepage = cp;
tokentable[ntokentable-1].alias = alias;
tokentable[ntokentable-1].fixed = fix;
qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
}
void get_tokentable(struct token **tab, int *len)
{
assert(tab != NULL);
assert(len != NULL);
*tab = tokentable;
*len = ntokentable;
}
int mcy_lex(void)
{
static const WCHAR ustr_dot1[] = { '.', '\n', 0 };
static const WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
static int isinit = 0;
int ch;
if(!isinit)
{
isinit++;
set_codepage(WMC_DEFAULT_CODEPAGE);
add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
add_token(tok_facility, ustr_dxgi, 0x87a, 0, NULL, 0);
add_token(tok_facility, ustr_null, 0x000, 0, NULL, 0);
add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
}
empty_unichar_stack();
while(1)
{
if(want_line)
{
while((ch = get_unichar()) != '\n')
{
if(ch == EOF)
xyyerror("Unexpected EOF\n");
push_unichar(ch);
}
newline();
push_unichar(ch);
push_unichar(0);
if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
{
want_line = 0;
set_codepage(WMC_DEFAULT_CODEPAGE);
return tMSGEND;
}
mcy_lval.str = xunistrdup(get_unichar_stack());
return tLINE;
}
ch = get_unichar();
if(ch == EOF)
return EOF;
if(ch == '\n')
{
newline();
if(want_nl)
{
want_nl = 0;
return tNL;
}
continue;
}
if(isisochar(ch))
{
if(want_file)
{
int n = 0;
while(n < 8 && isisochar(ch))
{
int t = char_table[ch];
if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
break;
push_unichar(ch);
n++;
ch = get_unichar();
}
unget_unichar(ch);
push_unichar(0);
want_file = 0;
mcy_lval.str = xunistrdup(get_unichar_stack());
return tFILE;
}
if(char_table[ch] & CH_IDENT)
{
struct token *tok;
while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
{
push_unichar(ch);
ch = get_unichar();
}
unget_unichar(ch);
push_unichar(0);
if(!(tok = lookup_token(get_unichar_stack())))
{
mcy_lval.str = xunistrdup(get_unichar_stack());
return tIDENT;
}
switch(tok->type)
{
case tok_keyword:
return tok->token;
case tok_language:
codepage = tok->codepage;
case tok_severity:
case tok_facility:
mcy_lval.tok = tok;
return tTOKEN;
default:
internal_error(__FILE__, __LINE__, "Invalid token type encountered\n");
}
}
if(isspace(ch))
continue;
if(isdigit(ch))
return scan_number(ch);
}
switch(ch)
{
case ':':
case '=':
case '+':
case '(':
case ')':
return ch;
case ';':
while(ch != '\n' && ch != EOF)
{
push_unichar(ch);
ch = get_unichar();
}
newline();
push_unichar(ch);
push_unichar(0);
mcy_lval.str = xunistrdup(get_unichar_stack());
return tCOMMENT;
default:
xyyerror("Invalid character '%c' (0x%04x)\n", isisochar(ch) && isprint(ch) ? ch : '.', ch);
}
}
}