Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/cmd/msgcc/msgcvt.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 2000-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
/*
22
* Glenn Fowler
23
* AT&T Research
24
*/
25
26
static const char usage[] =
27
"[-?\n@(#)$Id: msgcvt (AT&T Research) 2000-05-01 $\n]"
28
USAGE_LICENSE
29
"[+NAME?msgcvt - convert message file to/from html]"
30
"[+DESCRIPTION?\bmsgcvt\b reads a \bgencat\b(1) format file on the standard"
31
" input and converts it to \bhtml\b on the standard output. The input"
32
" file must contain the control statement \b$quote \"\b and use the \""
33
" character to quote message text. The output is in a form suitable for"
34
" automatic translation by web sites like"
35
" \bhttp://babelfish.altavista.com/\b or filters like"
36
" \btranslate\b(1).]"
37
"[h:html?Generate \bhtml\b from \bgencat\b(1) input. This is the default.]"
38
"[m:msg?Generate a \bgencat\b(1) message file from (presumably translated)"
39
" \bhtml\b. Wide characters are UTF-8 encoded.]"
40
"[r:raw?The message file is raw message text, one message per line, with no"
41
" quoting or line numbering.]"
42
"[+SEE ALSO?\bgencat\b(1), \bmsgcc\b(1), \bmsggen\b(1), \btranslate\b(1)]"
43
;
44
45
#include <ast.h>
46
#include <ctype.h>
47
#include <error.h>
48
49
#define MSG_RAW (1<<0)
50
#define MSG_SPLICE (1<<1)
51
52
#define SPACE(s) (isspace(*s)&&(s+=1)||*s=='\\'&&(*(s+1)=='n'||*(s+1)=='t')&&(s+=2))
53
54
typedef void (*Convert_f)(Sfio_t*, Sfio_t*, int);
55
56
typedef struct
57
{
58
const char* name;
59
int code;
60
} Code_t;
61
62
static const Code_t codes[] =
63
{
64
"aacute", 225,
65
"Aacute", 193,
66
"acirc", 226,
67
"Acirc", 194,
68
"aelig", 230,
69
"AElig", 198,
70
"agrave", 224,
71
"Agrave", 192,
72
"amp", '&',
73
"aring", 229,
74
"Aring", 197,
75
"atilde", 227,
76
"Atilde", 195,
77
"auml", 228,
78
"Auml", 196,
79
"ccedil", 231,
80
"Ccedil", 199,
81
"copy", 169,
82
"eacute", 233,
83
"Eacute", 201,
84
"ecirc", 234,
85
"Ecirc", 202,
86
"egrave", 232,
87
"Egrave", 200,
88
"euml", 235,
89
"Euml", 203,
90
"gt", '>',
91
"iacute", 237,
92
"Iacute", 205,
93
"icirc", 238,
94
"Icirc", 206,
95
"igrave", 236,
96
"Igrave", 204,
97
"iuml", 239,
98
"Iuml", 207,
99
"lt", '<',
100
"nbsp", ' ',
101
"ntilde", 241,
102
"Ntilde", 209,
103
"oacute", 243,
104
"Oacute", 211,
105
"ocirc", 244,
106
"Ocirc", 212,
107
"ograve", 242,
108
"Ograve", 210,
109
"oslash", 248,
110
"Oslash", 216,
111
"otilde", 245,
112
"Otilde", 213,
113
"ouml", 246,
114
"Ouml", 214,
115
"quot", '"',
116
"reg", 174,
117
"szlig", 223,
118
"uacute", 250,
119
"Uacute", 218,
120
"ucirc", 251,
121
"Ucirc", 219,
122
"ugrave", 249,
123
"Ugrave", 217,
124
"uuml", 252,
125
"Uuml", 220,
126
"yuml", 255,
127
};
128
129
static int
130
decode(Sfio_t* ip)
131
{
132
register int c;
133
register int i;
134
char name[32];
135
136
if ((c = sfgetc(ip)) == EOF)
137
return '&';
138
name[0] = c;
139
i = 1;
140
if (c != '#' && !isalpha(c))
141
goto bad;
142
while ((c = sfgetc(ip)) != EOF && c != ';')
143
{
144
if (c == '&')
145
i = 0;
146
else
147
{
148
name[i++] = c;
149
if (!isalnum(c) && (i > 1 || c != '#') || i >= (elementsof(name) - 1))
150
goto bad;
151
}
152
}
153
name[i] = 0;
154
if (name[0] == '#')
155
{
156
switch (c = strtol(name + 1, NiL, 10))
157
{
158
case 91:
159
c = '[';
160
break;
161
case 93:
162
c = ']';
163
break;
164
}
165
}
166
else
167
{
168
for (i = 0; i < elementsof(codes); i++)
169
if (streq(codes[i].name, name))
170
{
171
c = codes[i].code;
172
break;
173
}
174
if (i >= elementsof(codes))
175
goto bad;
176
}
177
return c;
178
bad:
179
name[i] = 0;
180
if (c == ';')
181
error(1, "&%s: unknown HTML special character -- & assumed", name);
182
else
183
error(1, "&%s: invalid HTML special character -- & assumed", name);
184
while (i--)
185
sfungetc(ip, name[i]);
186
return '&';
187
}
188
189
static int
190
sfpututf(Sfio_t* op, register int w)
191
{
192
if (!(w & ~0x7F))
193
return sfputc(op, w);
194
else if (!(w & ~0x7FF))
195
sfputc(op, 0xC0 + (w >> 6));
196
else if (!(w & ~0xFFFF))
197
{
198
sfputc(op, 0xE0 + (w >> 12));
199
sfputc(op, 0x80 + (w >> 6 ) & 0x3F);
200
}
201
else
202
return sfputc(op, '?');
203
return sfputc(op, 0x80 + (w & 0x3F));
204
}
205
206
static int
207
sfnext(Sfio_t* ip)
208
{
209
register int c;
210
211
while (isspace(c = sfgetc(ip)));
212
return c;
213
}
214
215
static void
216
html2msg(register Sfio_t* ip, register Sfio_t* op, int flags)
217
{
218
register int c;
219
register int q;
220
221
again:
222
while ((c = sfgetc(ip)) != EOF)
223
if (c == '<')
224
{
225
if ((c = sfnext(ip)) == 'O' &&
226
(c = sfnext(ip)) == 'L' &&
227
isspace(c = sfgetc(ip)) &&
228
(c = sfnext(ip)) == 'S' &&
229
(c = sfnext(ip)) == 'T' &&
230
(c = sfnext(ip)) == 'A' &&
231
(c = sfnext(ip)) == 'R' &&
232
(c = sfnext(ip)) == 'T' &&
233
(c = sfnext(ip)) == '=' &&
234
(c = sfnext(ip)) == '"' &&
235
(c = sfnext(ip)) == '5' &&
236
(c = sfnext(ip)) == '5' &&
237
(c = sfnext(ip)) == '0' &&
238
(c = sfnext(ip)) == '7' &&
239
(c = sfnext(ip)) == '1' &&
240
(c = sfnext(ip)) == '7' &&
241
(c = sfnext(ip)) == '"' &&
242
(c = sfnext(ip)) == '>')
243
break;
244
while (c != EOF && c != '>')
245
c = sfgetc(ip);
246
}
247
if ((c = sfnext(ip)) != EOF)
248
sfungetc(ip, c);
249
q = 0;
250
for (;;)
251
{
252
switch (c = sfgetc(ip))
253
{
254
case EOF:
255
break;
256
case '&':
257
c = decode(ip);
258
sfpututf(op, c);
259
if (isspace(c))
260
{
261
while (isspace(c = sfgetc(ip)));
262
if (c == EOF)
263
break;
264
sfungetc(ip, c);
265
}
266
continue;
267
case '<':
268
switch (c = sfnext(ip))
269
{
270
case '/':
271
if ((c = sfnext(ip)) == 'O' &&
272
(c = sfgetc(ip)) == 'L' &&
273
(c = sfnext(ip)) == '>')
274
{
275
if (q)
276
{
277
sfputc(op, q);
278
q = '"';
279
}
280
goto again;
281
}
282
break;
283
case 'B':
284
if ((c = sfgetc(ip)) == 'R' &&
285
(c = sfnext(ip)) == '>')
286
sfputc(op, ' ');
287
break;
288
case 'L':
289
if ((c = sfgetc(ip)) == 'I' &&
290
(c = sfnext(ip)) == '>' &&
291
isdigit(c = sfnext(ip)))
292
{
293
if (q)
294
sfputc(op, q);
295
else
296
q = '"';
297
sfputc(op, '\n');
298
do
299
{
300
sfputc(op, c);
301
} while (isdigit(c = sfgetc(ip)));
302
if (c == EOF)
303
break;
304
sfputc(op, ' ');
305
sfputc(op, '"');
306
if (isspace(c))
307
c = sfnext(ip);
308
if (c == '<' &&
309
(c = sfnext(ip)) == 'L' &&
310
(c = sfgetc(ip)) == 'I' &&
311
(c = sfnext(ip)) == '>')
312
/* great */;
313
continue;
314
}
315
break;
316
case 'P':
317
if ((c = sfnext(ip)) == '>')
318
sfputc(op, '\n');
319
else if (c == 'C' &&
320
(c = sfgetc(ip)) == 'L' &&
321
(c = sfgetc(ip)) == 'A' &&
322
(c = sfgetc(ip)) == 'S' &&
323
(c = sfgetc(ip)) == 'S' &&
324
(c = sfnext(ip)) == '=' &&
325
(c = sfnext(ip)) == '"')
326
for (;;)
327
{
328
switch (c = sfgetc(ip))
329
{
330
case EOF:
331
case '"':
332
break;
333
case '&':
334
c = decode(ip);
335
sfpututf(op, c);
336
continue;
337
default:
338
sfpututf(op, c);
339
continue;
340
}
341
break;
342
}
343
break;
344
}
345
while (c != EOF && c != '>')
346
c = sfgetc(ip);
347
if (c == EOF || (c = sfgetc(ip)) == EOF)
348
break;
349
sfungetc(ip, c);
350
continue;
351
case '"':
352
if (!flags)
353
sfputc(op, '\\');
354
sfputc(op, c);
355
continue;
356
case '\n':
357
if (flags)
358
{
359
sfputc(op, c);
360
continue;
361
}
362
/*FALLTHROUGH*/
363
case ' ':
364
case '\t':
365
while ((c = sfgetc(ip)) != EOF)
366
if (c == '&')
367
{
368
c = decode(ip);
369
if (!isspace(c))
370
sfputc(op, ' ');
371
sfpututf(op, c);
372
break;
373
}
374
else if (!isspace(c))
375
{
376
if (c == '<')
377
{
378
c = sfgetc(ip);
379
if (c == EOF)
380
break;
381
sfungetc(ip, c);
382
sfungetc(ip, '<');
383
if (c != 'L' && c != '/')
384
sfputc(op, ' ');
385
}
386
else
387
{
388
if (c != EOF)
389
sfungetc(ip, c);
390
sfputc(op, ' ');
391
}
392
break;
393
}
394
continue;
395
case '\r':
396
case '[':
397
case ']':
398
continue;
399
default:
400
sfpututf(op, c);
401
continue;
402
}
403
break;
404
}
405
if (q)
406
sfputc(op, q);
407
sfputc(op, '\n');
408
}
409
410
static void
411
encode(Sfio_t* op, register int c)
412
{
413
if (c == '<')
414
sfprintf(op, "&lt;");
415
else if (c == '>')
416
sfprintf(op, "&gt;");
417
else if (c == '"')
418
sfprintf(op, "&quot;");
419
else if (c == '&')
420
sfprintf(op, "&amp;");
421
else if (c == '[')
422
sfprintf(op, "&#091;");
423
else if (c == ']')
424
sfprintf(op, "&#093;");
425
else
426
sfputc(op, c);
427
}
428
429
static void
430
msg2html(register Sfio_t* ip, register Sfio_t* op, register int flags)
431
{
432
register char* s;
433
register int c;
434
register int q;
435
register int p;
436
437
sfprintf(op, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\"><HTML><HEAD><!-- text massaged for external translation --></HEAD><BODY>\n");
438
sfprintf(op, "<OL START=\"550717\">\n");
439
p = q = 0;
440
while (s = sfgetr(ip, '\n', 1))
441
{
442
error_info.line++;
443
if (flags)
444
sfprintf(op, "<P>");
445
else
446
{
447
if (*s == '$')
448
{
449
if (p)
450
sfprintf(op, "<P>");
451
else
452
p = 1;
453
sfprintf(op, "<P CLASS=\"", s);
454
while (c = *s++)
455
encode(op, c);
456
sfprintf(op, "\">\n");
457
continue;
458
}
459
p = 0;
460
if (!isdigit(*s))
461
continue;
462
sfprintf(op, "<LI>");
463
while (isdigit(c = *s++))
464
sfputc(op, c);
465
sfprintf(op, "<LI>");
466
while (c && c != '"')
467
c = *s++;
468
if (!c)
469
s--;
470
else if (isspace(*s))
471
{
472
s++;
473
sfprintf(op, "<BR>");
474
}
475
}
476
for (;;)
477
{
478
switch (c = *s++)
479
{
480
case 0:
481
flags &= ~MSG_SPLICE;
482
if (q)
483
{
484
q = 0;
485
sfprintf(op, "\">");
486
}
487
sfputc(op, '\n');
488
break;
489
case '<':
490
sfprintf(op, "&lt;");
491
continue;
492
case '>':
493
sfprintf(op, "&gt;");
494
continue;
495
case '&':
496
sfprintf(op, "&amp;");
497
continue;
498
case '[':
499
sfprintf(op, "&#091;");
500
continue;
501
case ']':
502
sfprintf(op, "&#093;");
503
continue;
504
case '$':
505
if (!q)
506
{
507
q = 1;
508
sfprintf(op, "<P CLASS=\"");
509
}
510
sfputc(op, c);
511
while (isalnum(c = *s++))
512
sfputc(op, c);
513
s--;
514
continue;
515
case '%':
516
if (!q)
517
{
518
q = 1;
519
sfprintf(op, "<P CLASS=\"");
520
}
521
sfputc(op, c);
522
if (*s == '%')
523
sfputc(op, *s++);
524
else
525
do
526
{
527
if (!(c = *s++) || c == '"')
528
{
529
s--;
530
break;
531
}
532
encode(op, c);
533
} while (!isalpha(c) || (!islower(c) || c == 'h' || c == 'l') && isalpha(*s));
534
if (SPACE(s))
535
sfprintf(op, "&nbsp;");
536
continue;
537
case '"':
538
if (!(flags & MSG_RAW))
539
{
540
s = "";
541
continue;
542
}
543
/*FALLTHROUGH*/
544
case '\'':
545
case ':':
546
case '/':
547
case '+':
548
case '@':
549
if (!q)
550
{
551
q = 1;
552
sfprintf(op, "<P CLASS=\"");
553
}
554
/*FALLTHROUGH*/
555
case '.':
556
case ',':
557
sfputc(op, c);
558
if (SPACE(s))
559
sfprintf(op, "&nbsp;");
560
continue;
561
case '\\':
562
if (!(c = *s++))
563
{
564
flags |= MSG_SPLICE;
565
break;
566
}
567
if (c != 'n' && c != 't')
568
{
569
if (!q)
570
{
571
q = 1;
572
sfprintf(op, "<P CLASS=\"");
573
}
574
sfputc(op, '\\');
575
encode(op, c);
576
if (c == 'b')
577
{
578
for (;;)
579
{
580
if (!(c = *s++) || c == '"')
581
{
582
s--;
583
break;
584
}
585
if (c == '?')
586
{
587
if (*s != '?')
588
{
589
s--;
590
break;
591
}
592
sfputc(op, c);
593
sfputc(op, *s++);
594
continue;
595
}
596
if (c == '\\')
597
{
598
if (!*s)
599
break;
600
sfputc(op, c);
601
if (*s == 'a' || *s == 'b' || *s == '0')
602
{
603
sfputc(op, *s++);
604
break;
605
}
606
c = *s++;
607
}
608
encode(op, c);
609
}
610
}
611
else if (isdigit(c) && isdigit(*s))
612
{
613
sfputc(op, *s++);
614
if (isdigit(*s))
615
sfputc(op, *s++);
616
}
617
if (SPACE(s))
618
sfprintf(op, "&nbsp;");
619
continue;
620
}
621
/*FALLTHROUGH*/
622
case ' ':
623
case '\t':
624
while (isspace(*s) || *s == '\\' && (*(s + 1) == 'n' || *(s + 1) == 't') && s++)
625
s++;
626
if (*s == '"')
627
{
628
if (q)
629
{
630
q = 0;
631
sfprintf(op, " \">");
632
}
633
else
634
sfprintf(op, "<BR>");
635
continue;
636
}
637
c = ' ';
638
/*FALLTHROUGH*/
639
default:
640
if (q)
641
{
642
q = 0;
643
sfprintf(op, "\">");
644
}
645
sfputc(op, c);
646
continue;
647
}
648
break;
649
}
650
}
651
sfprintf(op, "</OL>\n");
652
sfprintf(op, "</BODY></HTML>\n");
653
error_info.line = 0;
654
}
655
656
int
657
main(int argc, char** argv)
658
{
659
int flags = 0;
660
Convert_f convert = msg2html;
661
662
NoP(argc);
663
error_info.id = "msgcvt";
664
for (;;)
665
{
666
switch (optget(argv, usage))
667
{
668
case 'h':
669
convert = msg2html;
670
continue;
671
case 'm':
672
convert = html2msg;
673
continue;
674
case 'r':
675
flags |= MSG_RAW;
676
continue;
677
case '?':
678
error(ERROR_USAGE|4, "%s", opt_info.arg);
679
continue;
680
case ':':
681
error(2, "%s", opt_info.arg);
682
continue;
683
}
684
break;
685
}
686
argv += opt_info.index;
687
if (error_info.errors)
688
error(ERROR_USAGE|4, "%s", optusage(NiL));
689
(*convert)(sfstdin, sfstdout, flags);
690
return error_info.errors != 0;
691
}
692
693