Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
att
GitHub Repository: att/ast
Path: blob/master/src/cmd/html/html2rtf.c
1808 views
1
/***********************************************************************
2
* *
3
* This software is part of the ast package *
4
* Copyright (c) 1996-2011 AT&T Intellectual Property *
5
* and is licensed under the *
6
* Eclipse Public License, Version 1.0 *
7
* by AT&T Intellectual Property *
8
* *
9
* A copy of the License is available at *
10
* http://www.eclipse.org/org/documents/epl-v10.html *
11
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12
* *
13
* Information and Software Systems Research *
14
* AT&T Research *
15
* Florham Park NJ *
16
* *
17
* Glenn Fowler <[email protected]> *
18
* *
19
***********************************************************************/
20
#pragma prototyped
21
/*
22
* Glenn Fowler
23
* AT&T Research
24
*
25
* html to rtf filter
26
*/
27
28
static const char usage[] =
29
"[-?\n@(#)$Id: html2rtf (AT&T Research) 1999-01-01 $\n]"
30
USAGE_LICENSE
31
"[+NAME?html2rtf - html to rtf filter]"
32
"[+DESCRIPTION?\bhtml2rtf\b converts input \bhtml\b documents to an \bRTF\b"
33
" document on the standard output. \bhtml2rtf\b expects properly nested"
34
" begin/end tags in the input \bhtml\b and warns about imbalance.]"
35
36
"[d:debug?Set the debug trace level to \alevel\a. Higher levels produce"
37
" more output.]#[level]"
38
"[f:font-size?Set the initial font size to \asize\a points.]#[size:=12]"
39
"[p:project-file?Appends MS HELP project information to the help project file"
40
" \afile\a. This file combines individual RTF files into a"
41
" hyper-linked collection. Note that MS expects \afile\a to have a"
42
" \b.hlp\b extension.]:[file]"
43
"[v:verbose?Enable verbose error and warning messages. Some \bhtml\b source"
44
" can't stand the heat.]"
45
46
"\n"
47
"\n[ file ... ]\n"
48
"\n"
49
50
"[+SEE ALSO?\bman\b(1), \bmm\b(1), \bmm2html\b(1), \btroff\b(1),"
51
" \btroff2html\b(1)]"
52
;
53
54
#include "html2rtf.h"
55
56
#include <error.h>
57
58
#define LIST_INDENT 140
59
#define STK_LIST_COMPACT (STK_TAG<<0)
60
61
#define a_close data[0].number
62
#define a_label data[1].string
63
64
#define font_size data[0].number
65
66
#define list_counter data[0].number
67
#define list_hanging data[1].number
68
#define list_indent data[2].number
69
#define list_label data[3].string
70
#define list_type data[4].number
71
72
#define title_cc data[0].number
73
#define title_lastlastc data[1].number
74
#define title_op data[2].io
75
#define title_tc data[3].number
76
77
State_t state;
78
79
/*
80
* return the attribute pointer for name in ap
81
*/
82
83
static Attribute_t*
84
attribute(register Attribute_t* ap, const char* name)
85
{
86
if (ap)
87
for (; ap->name; ap++)
88
if (!strcasecmp(ap->name, name))
89
return ap;
90
return 0;
91
}
92
93
/*
94
* new paragraph with optional hanging indent
95
*/
96
97
static void
98
par(int hanging, const char* tail)
99
{
100
if (hanging && !(state.sp->flags & STK_LIST_COMPACT))
101
sfputr(state.out, "\\line", -1);
102
sfputr(state.out, "\\par\\pard", -1);
103
if (state.center)
104
sfputr(state.out, "\\qc", -1);
105
if (hanging)
106
sfprintf(state.out, "\\fi%d", twips(state.hanging - state.indent));
107
sfprintf(state.out, "\\li%d\\tx%d\\tx20000%s", twips(state.indent), twips(state.indent), tail ? tail : "");
108
state.sep = 1;
109
}
110
111
static void
112
anchor(int ref, register char* s)
113
{
114
register int c;
115
116
if (s)
117
{
118
if (ref)
119
{
120
if (*s != '#')
121
{
122
error(1, "%s: unknown link", s);
123
return;
124
}
125
s++;
126
sfprintf(state.out, "{\\uldb %s}{\\v", s);
127
}
128
else
129
sfputr(state.out, "#{\\footnote", -1);
130
sfprintf(state.out, " %s.", state.prefix);
131
while (c = *s++)
132
sfputc(state.out, isalnum(c) ? c : '.');
133
sfputc(state.out, '}');
134
if (ref)
135
{
136
sfputr(state.out, "{\\*\\comment", -1);
137
state.sep = 1;
138
state.sp->a_close = 1;
139
}
140
}
141
}
142
143
static int
144
start_a(Tag_t* tp, Attribute_t* ap)
145
{
146
Attribute_t* op;
147
148
NoP(tp);
149
state.sp->a_close = 0;
150
if (op = attribute(ap, "HREF"))
151
anchor(1, op->value);
152
if (op = attribute(ap, "NAME"))
153
anchor(0, op->value);
154
return 1;
155
}
156
157
static int
158
end_a(Tag_t* tp, Attribute_t* ap)
159
{
160
NoP(tp);
161
NoP(ap);
162
if (state.sp->a_close)
163
{
164
sfputc(state.out, '}');
165
state.sep = 0;
166
}
167
return 0;
168
}
169
170
static int
171
start_b(Tag_t* tp, Attribute_t* ap)
172
{
173
NoP(tp);
174
NoP(ap);
175
sfputr(state.out, "{\\b", -1);
176
state.sep = 1;
177
return 1;
178
}
179
180
static int
181
start_bq(Tag_t* tp, Attribute_t* ap)
182
{
183
NoP(tp);
184
NoP(ap);
185
state.sp->list_hanging = state.hanging;
186
state.sp->list_indent = state.indent;
187
state.hanging += LIST_INDENT;
188
state.indent = state.hanging;
189
par(0, NiL);
190
sfprintf(state.out, "\\ri%d", state.indent);
191
state.sep = 1;
192
return 1;
193
}
194
195
static int
196
end_bq(Tag_t* tp, Attribute_t* ap)
197
{
198
NoP(tp);
199
NoP(ap);
200
state.hanging = state.sp->list_hanging;
201
state.indent = state.sp->list_indent;
202
sfprintf(state.out, "\\ri0");
203
state.sep = 1;
204
return 1;
205
}
206
207
static int
208
start_body(Tag_t* tp, Attribute_t* ap)
209
{
210
NoP(tp);
211
NoP(ap);
212
return 0;
213
}
214
215
static int
216
start_br(Tag_t* tp, Attribute_t* ap)
217
{
218
NoP(tp);
219
NoP(ap);
220
sfputr(state.out, "\\line", -1);
221
state.sep = 1;
222
return 0;
223
}
224
225
static int
226
start_caption(Tag_t* tp, Attribute_t* ap)
227
{
228
NoP(tp);
229
NoP(ap);
230
return 0;
231
}
232
233
static int
234
start_center(Tag_t* tp, Attribute_t* ap)
235
{
236
NoP(tp);
237
NoP(ap);
238
state.center++;
239
state.pre++;
240
par(0, NiL);
241
return 1;
242
}
243
244
static int
245
end_center(Tag_t* tp, Attribute_t* ap)
246
{
247
NoP(tp);
248
NoP(ap);
249
if (state.center > 0)
250
state.center--;
251
if (state.pre > 0)
252
state.pre--;
253
return 1;
254
}
255
256
static int
257
start_dd(Tag_t* tp, Attribute_t* ap)
258
{
259
NoP(tp);
260
NoP(ap);
261
if (state.sp->flags & STK_HEADING)
262
par(0, NiL);
263
else
264
sfputr(state.out, "\\tab", -1);
265
state.sep = 1;
266
return 1;
267
}
268
269
static int
270
start_dl(Tag_t* tp, Attribute_t* ap)
271
{
272
NoP(tp);
273
if (attribute(ap, "COMPACT"))
274
state.sp->flags |= STK_LIST_COMPACT;
275
state.sp->list_hanging = state.hanging;
276
state.sp->list_indent = state.indent;
277
state.hanging += LIST_INDENT;
278
state.indent = state.hanging + LIST_INDENT * 2;
279
return 1;
280
}
281
282
static int
283
end_LIST(Tag_t* tp, Attribute_t* ap)
284
{
285
NoP(tp);
286
NoP(ap);
287
state.hanging = state.sp->list_hanging;
288
state.indent = state.sp->list_indent;
289
return 1;
290
}
291
292
static int
293
start_dt(Tag_t* tp, Attribute_t* ap)
294
{
295
NoP(tp);
296
NoP(ap);
297
par(1, NiL);
298
return 1;
299
}
300
301
static int
302
start_fn(Tag_t* tp, Attribute_t* ap)
303
{
304
NoP(tp);
305
NoP(ap);
306
sfputr(state.out, "\\~[\\~", -1);
307
return 1;
308
}
309
310
static int
311
end_fn(Tag_t* tp, Attribute_t* ap)
312
{
313
NoP(tp);
314
NoP(ap);
315
sfputr(state.out, "\\~]\\~", -1);
316
return 0;
317
}
318
319
static int
320
start_font(Tag_t* tp, Attribute_t* ap)
321
{
322
char* s;
323
char* e;
324
int n;
325
Attribute_t* op;
326
327
NoP(tp);
328
if ((op = attribute(ap, "SIZE")) && (s = op->value) && (n = strtol(s, &e, 10)) && !*e)
329
{
330
if (*s == '+' || *s == '-')
331
n += state.fontsize;
332
state.sp->font_size = state.fontsize;
333
state.fontsize = n;
334
sfprintf(state.out, "{\\fs%d", twips(n));
335
state.sep = 1;
336
return 1;
337
}
338
return 0;
339
}
340
341
static int
342
end_font(Tag_t* tp, Attribute_t* ap)
343
{
344
NoP(tp);
345
NoP(ap);
346
state.fontsize = state.sp->font_size;
347
sfputc(state.out, '}');
348
state.sep = 0;
349
return 1;
350
}
351
352
static int
353
start_H(Tag_t* tp, Attribute_t* ap)
354
{
355
NoP(tp);
356
NoP(ap);
357
(state.sp - 1)->flags |= STK_HEADING;
358
state.sp->font_size = state.fontsize;
359
state.fontsize += (7 - (tp->name[1] - '0')) * 1;
360
sfprintf(state.out, "{\\b\\fs%d", twips(state.fontsize));
361
state.sep = 1;
362
return 1;
363
}
364
365
static int
366
end_H(Tag_t* tp, Attribute_t* ap)
367
{
368
NoP(tp);
369
NoP(ap);
370
state.fontsize = state.sp->font_size;
371
sfputc(state.out, '}');
372
state.sep = 0;
373
return 1;
374
}
375
376
static int
377
start_head(Tag_t* tp, Attribute_t* ap)
378
{
379
NoP(tp);
380
NoP(ap);
381
return 0;
382
}
383
384
static int
385
end_head(Tag_t* tp, Attribute_t* ap)
386
{
387
NoP(tp);
388
NoP(ap);
389
return 0;
390
}
391
392
static int
393
start_hr(Tag_t* tp, Attribute_t* ap)
394
{
395
NoP(tp);
396
NoP(ap);
397
sfputr(state.out, "{\\brdrt\\brdrsh\\par}", -1);
398
state.sep = 0;
399
return 0;
400
}
401
402
static int
403
start_html(Tag_t* tp, Attribute_t* ap)
404
{
405
char* s;
406
407
NoP(tp);
408
NoP(ap);
409
sfputr(state.out, "{\\rtf1 \\ansi \\deff0", '\n');
410
s = strchr(usage, '\n') + 5;
411
sfprintf(state.out, "{\\*\\comment generator: %-.*s}\n", strchr(usage, '\n') - s, s);
412
sfputr(state.out, "{\\fonttbl", '\n');
413
sfputr(state.out, "{\\f0 \\froman Times New Roman;}", '\n');
414
sfputr(state.out, "{\\f1 \\fmodern Line Printer;}", '\n');
415
sfputr(state.out, "{\\f2 \\froman Symbol;}", '\n');
416
sfputr(state.out, "{\\f3 \\fswiss Ariel;}", '\n');
417
sfputr(state.out, "}", '\n');
418
sfprintf(state.out, "\\fs%d\n", twips(state.fontsize));
419
return 1;
420
}
421
422
static int
423
end_html(Tag_t* tp, Attribute_t* ap)
424
{
425
NoP(tp);
426
NoP(ap);
427
sfputr(state.out, "\n}", '\n');
428
return 1;
429
}
430
431
static int
432
start_i(Tag_t* tp, Attribute_t* ap)
433
{
434
NoP(tp);
435
NoP(ap);
436
sfputr(state.out, "{\\i", -1);
437
state.sep = 1;
438
return 1;
439
}
440
441
static int
442
start_img(Tag_t* tp, Attribute_t* ap)
443
{
444
NoP(tp);
445
NoP(ap);
446
return 0;
447
}
448
449
/*
450
* NOTE: roman() transcribed from GNU groff
451
*/
452
453
static void
454
roman(register int n, int format)
455
{
456
register char* dig;
457
register int i;
458
register int m;
459
460
dig = islower(format) ? "zwmdclxvi" : "ZWMDCLXVI";
461
if (n <= -40000 || n >= 40000)
462
{
463
sfprintf(state.out, "<%d>", n);
464
return;
465
}
466
if (n == 0)
467
{
468
sfputc(state.out, '0');
469
return;
470
}
471
if (n < 0)
472
{
473
n = -n;
474
sfputc(state.out, '-');
475
}
476
while (n >= 10000)
477
{
478
n -= 10000;
479
sfputc(state.out, dig[0]);
480
}
481
for (i = 1000; i > 0; i /= 10, dig += 2)
482
{
483
m = n / i;
484
n -= m * i;
485
switch (m)
486
{
487
case 9:
488
sfputc(state.out, dig[2]);
489
sfputc(state.out, dig[0]);
490
break;
491
case 8:
492
sfputc(state.out, dig[1]);
493
sfputc(state.out, dig[2]);
494
sfputc(state.out, dig[2]);
495
sfputc(state.out, dig[2]);
496
break;
497
case 7:
498
sfputc(state.out, dig[1]);
499
sfputc(state.out, dig[2]);
500
sfputc(state.out, dig[2]);
501
break;
502
case 6:
503
sfputc(state.out, dig[1]);
504
sfputc(state.out, dig[2]);
505
break;
506
case 5:
507
sfputc(state.out, dig[1]);
508
break;
509
case 4:
510
sfputc(state.out, dig[2]);
511
sfputc(state.out, dig[1]);
512
break;
513
case 3:
514
sfputc(state.out, dig[2]);
515
/*FALLTHROUGH*/
516
case 2:
517
sfputc(state.out, dig[2]);
518
/*FALLTHROUGH*/
519
case 1:
520
sfputc(state.out, dig[2]);
521
break;
522
}
523
}
524
}
525
526
static int
527
start_li(Tag_t* tp, Attribute_t* ap)
528
{
529
NoP(tp);
530
NoP(ap);
531
par(1, "{\\b ");
532
switch (state.sp->list_type)
533
{
534
case '1':
535
sfprintf(state.out, "%d.", state.sp->list_counter);
536
break;
537
case 'A':
538
sfprintf(state.out, "%c)", 'A' + state.sp->list_counter);
539
break;
540
case 'a':
541
sfprintf(state.out, "%c)", 'a' + state.sp->list_counter);
542
break;
543
case 'I':
544
case 'i':
545
roman(state.sp->list_counter, state.sp->list_type);
546
sfputc(state.out, ')');
547
break;
548
default:
549
sfputr(state.out, state.sp && state.sp->list_label ? state.sp->list_label : "\\bullet", -1);
550
break;
551
}
552
state.sp->list_counter++;
553
sfputr(state.out, "}\\tab", -1);
554
state.sep = 1;
555
return 1;
556
}
557
558
static int
559
start_meta(Tag_t* tp, Attribute_t* ap)
560
{
561
Attribute_t* op;
562
563
NoP(tp);
564
if ((op = attribute(ap, "NAME")) && op->value)
565
{
566
sfprintf(state.out, "{\\*\\comment %s", op->value);
567
if ((op = attribute(ap, "CONTENT")) && op->value)
568
sfprintf(state.out, ": %s", op->value);
569
sfputr(state.out, "}", '\n');
570
}
571
return 0;
572
}
573
574
static int
575
start_ol(Tag_t* tp, Attribute_t* ap)
576
{
577
char* e;
578
Attribute_t* op;
579
580
NoP(tp);
581
if (attribute(ap, "COMPACT"))
582
state.sp->flags |= STK_LIST_COMPACT;
583
if (!(op = attribute(ap, "START")) || !op->value || (state.sp->list_counter = strtol(op->value, &e, 10)) < 0 || *e)
584
state.sp->list_counter = 1;
585
state.sp->list_type = (op = attribute(ap, "TYPE")) && op->value ? *op->value : '1';
586
state.sp->list_hanging = state.hanging;
587
state.sp->list_indent = state.indent;
588
state.hanging += LIST_INDENT;
589
state.indent = state.hanging + LIST_INDENT;
590
return 1;
591
}
592
593
static int
594
start_p(Tag_t* tp, Attribute_t* ap)
595
{
596
register char* s;
597
Attribute_t* op;
598
599
NoP(tp);
600
par(0, NiL);
601
if ((op = attribute(ap, "ALIGN")) && (s = op->value))
602
{
603
if (!strcasecmp(s, "CENTER"))
604
sfputr(state.out, "\\qc", -1);
605
else if (!strcasecmp(s, "LEFT"))
606
sfputr(state.out, "\\ql", -1);
607
else if (!strcasecmp(s, "RIGHT"))
608
sfputr(state.out, "\\qr", -1);
609
}
610
return 1;
611
}
612
613
static int
614
start_pre(Tag_t* tp, Attribute_t* ap)
615
{
616
NoP(tp);
617
NoP(ap);
618
state.pre++;
619
return 1;
620
}
621
622
static int
623
end_pre(Tag_t* tp, Attribute_t* ap)
624
{
625
NoP(tp);
626
NoP(ap);
627
if (state.pre > 0)
628
state.pre--;
629
return 1;
630
}
631
632
static int
633
start_rendering(register Tag_t* tp, Attribute_t* ap)
634
{
635
register Render_t* rp;
636
register int i;
637
638
if (rp = (Render_t*)tp->data)
639
for (i = 0; i < rp->tags; i++)
640
if ((tp = rp->tag[i]) && tp->start)
641
(*tp->start)(tp, ap);
642
return 1;
643
}
644
645
static int
646
end_rendering(register Tag_t* tp, Attribute_t* ap)
647
{
648
register Render_t* rp;
649
register int i;
650
651
if (rp = (Render_t*)tp->data)
652
for (i = rp->tags - 1; i > 0; i--)
653
if ((tp = rp->tag[i]) && tp->end)
654
(*tp->end)(tp, ap);
655
return 1;
656
}
657
658
static int
659
start_render(register Tag_t* tp, Attribute_t* ap)
660
{
661
register Render_t* rp;
662
register char* s;
663
register char* e;
664
register int n;
665
Attribute_t* op;
666
667
if ((op = attribute(ap, "TAG")) && (s = op->value))
668
{
669
if (tp = (Tag_t*)hashget(state.tags, s))
670
{
671
if (tp->data)
672
free(tp->data);
673
tp->start = 0;
674
tp->end = 0;
675
tp->data = 0;
676
}
677
else if (!(tp = newof(NiL, Tag_t, 1, 0)) || !(tp->name = hashput(state.tags, 0, tp)))
678
error(ERROR_SYSTEM|3, "out of space [tag]");
679
if ((op = attribute(ap, "STYLE")) && (s = op->value))
680
{
681
for (n = 0, e = s; e && (e = strchr(e, ',')); n++, e++);
682
if (!(rp = newof(NiL, Render_t, 1, n * sizeof(Tag_t*))))
683
error(ERROR_SYSTEM|3, "out of space [render]");
684
n = 0;
685
do
686
{
687
if (e = strchr(s, ','))
688
*e++ = 0;
689
if (rp->tag[n] = (Tag_t*)hashget(state.tags, s))
690
n++;
691
} while (s = e);
692
if (!(rp->tags = n))
693
free(rp);
694
else
695
{
696
tp->start = start_rendering;
697
tp->end = end_rendering;
698
tp->data = (void*)rp;
699
}
700
}
701
}
702
return 0;
703
}
704
705
static int
706
start_sub(Tag_t* tp, Attribute_t* ap)
707
{
708
NoP(tp);
709
NoP(ap);
710
return 0;
711
}
712
713
static int
714
end_sub(Tag_t* tp, Attribute_t* ap)
715
{
716
NoP(tp);
717
NoP(ap);
718
return 0;
719
}
720
721
static int
722
start_sup(Tag_t* tp, Attribute_t* ap)
723
{
724
NoP(tp);
725
NoP(ap);
726
return 0;
727
}
728
729
static int
730
end_sup(Tag_t* tp, Attribute_t* ap)
731
{
732
NoP(tp);
733
NoP(ap);
734
return 0;
735
}
736
737
static int
738
start_table(Tag_t* tp, Attribute_t* ap)
739
{
740
NoP(tp);
741
NoP(ap);
742
state.center++;
743
par(0, NiL);
744
return 1;
745
}
746
747
static int
748
end_table(Tag_t* tp, Attribute_t* ap)
749
{
750
NoP(tp);
751
NoP(ap);
752
if (state.center > 0)
753
state.center++;
754
sfputr(state.out, "}", '\n');
755
return 0;
756
}
757
758
static int
759
start_td(Tag_t* tp, Attribute_t* ap)
760
{
761
NoP(tp);
762
NoP(ap);
763
return 0;
764
}
765
766
static int
767
end_td(Tag_t* tp, Attribute_t* ap)
768
{
769
NoP(tp);
770
NoP(ap);
771
return 1;
772
}
773
774
static int
775
start_th(Tag_t* tp, Attribute_t* ap)
776
{
777
register Attribute_t* op;
778
779
NoP(tp);
780
if (!(op = attribute(ap, "ALIGN")) || !op->value || *op->value != 'l' && *op->value != 'L')
781
sfputr(state.out, "\\~\\~\\~\\~\\~\\~\\~\\~\\~\\~\\~\\~", -1);
782
return 0;
783
}
784
785
static int
786
start_title(Tag_t* tp, Attribute_t* ap)
787
{
788
NoP(tp);
789
NoP(ap);
790
state.pre++;
791
return 1;
792
}
793
794
static int
795
end_title(Tag_t* tp, Attribute_t* ap)
796
{
797
NoP(tp);
798
NoP(ap);
799
if (state.pre > 0)
800
state.pre--;
801
return 1;
802
}
803
804
static int
805
start_tr(Tag_t* tp, Attribute_t* ap)
806
{
807
NoP(tp);
808
NoP(ap);
809
return 0;
810
}
811
812
static int
813
end_tr(Tag_t* tp, Attribute_t* ap)
814
{
815
NoP(tp);
816
NoP(ap);
817
return 1;
818
}
819
820
static int
821
start_tt(Tag_t* tp, Attribute_t* ap)
822
{
823
NoP(tp);
824
NoP(ap);
825
sfputr(state.out, "{\\f1", -1);
826
state.sep = 1;
827
return 1;
828
}
829
830
static int
831
start_ul(Tag_t* tp, Attribute_t* ap)
832
{
833
Attribute_t* op;
834
835
NoP(tp);
836
if (attribute(ap, "COMPACT"))
837
state.sp->flags |= STK_LIST_COMPACT;
838
state.sp->list_type = 0;
839
switch ((op = attribute(ap, "TYPE")) && op->value ? *op->value : 0)
840
{
841
case 'c':
842
state.sp->list_label = "\\'b0";
843
break;
844
case 's':
845
state.sp->list_label = "\\'a4";
846
break;
847
default:
848
state.sp->list_label = "\\bullet";
849
break;
850
}
851
state.sp->list_hanging = state.hanging;
852
state.sp->list_indent = state.indent;
853
state.hanging += LIST_INDENT;
854
state.indent = state.hanging + LIST_INDENT;
855
return 1;
856
}
857
858
static int
859
start_var(Tag_t* tp, Attribute_t* ap)
860
{
861
NoP(tp);
862
NoP(ap);
863
sfputr(state.out, "{\\f3\\i", -1);
864
state.sep = 1;
865
return 1;
866
}
867
868
/*
869
* generic tag end
870
*/
871
872
static int
873
end(Tag_t* tp, Attribute_t* ap)
874
{
875
sfputc(state.out, '}');
876
state.sep = 0;
877
return 1;
878
}
879
880
/*
881
* convert html file in to rtf file out
882
*/
883
884
#define COMMENT 1
885
#define PUN 4
886
#define STRING 2
887
888
static void
889
process(char* file, register Sfio_t* ip, register Sfio_t* op)
890
{
891
register int c;
892
register int lastc;
893
register int item;
894
register int cc;
895
register int tc;
896
register char* s;
897
int lastlastc;
898
int quote;
899
int n;
900
Entity_t* ep;
901
Tag_t* tp;
902
Attribute_t attributes[16];
903
Attribute_t* ap;
904
Stack_t* sp;
905
906
error_info.file = file;
907
error_info.line = 1;
908
state.center = 0;
909
state.in = ip;
910
state.out = op;
911
state.pre = 0;
912
state.sp = state.sp_min;
913
ap = 0;
914
item = 0;
915
lastc = 0;
916
cc = tc = 0;
917
for (;;)
918
{
919
switch (c = sfgetc(ip))
920
{
921
case EOF:
922
goto done;
923
case '<':
924
if (!item)
925
{
926
item = c;
927
lastlastc = lastc;
928
quote = 0;
929
ap = attributes;
930
ap->name = 0;
931
ap->value = 0;
932
op = state.tmp;
933
if ((c = sfgetc(ip)) != EOF)
934
{
935
sfungetc(ip, c);
936
if (c == '!')
937
quote |= COMMENT;
938
}
939
continue;
940
}
941
break;
942
case '>':
943
if (item == '<' && !(quote & STRING))
944
{
945
item = 0;
946
if (!(s = sfstruse(op)))
947
error(ERROR_SYSTEM|3, "out of space");
948
op = state.out;
949
if (*s == '!')
950
{
951
if ((cc -= strlen(s)) <= 0)
952
{
953
cc = 0;
954
if ((c = sfgetc(ip)) != EOF)
955
{
956
if (c == '\n')
957
error_info.line++;
958
else
959
sfungetc(ip, c);
960
}
961
}
962
continue;
963
}
964
(ap + 1)->name = 0;
965
for (;;)
966
{
967
ap->name = s + (((unsigned int)ap->name) >> PUN);
968
if (!*ap->name)
969
ap->name = 0;
970
else if (ap->value)
971
{
972
ap->value = s + (((unsigned int)ap->value) >> PUN);
973
if (!*ap->value)
974
ap->value = 0;
975
}
976
if (ap == attributes)
977
break;
978
ap--;
979
}
980
if (c = *s == '/')
981
s++;
982
if (!(tp = (Tag_t*)hashget(state.tags, s)))
983
error(1, "<%s>: unknown tag", s);
984
else if (!c)
985
{
986
if (tp->end)
987
{
988
if (state.sp >= state.sp_max)
989
{
990
c = state.sp - state.sp_min;
991
n = (state.sp_max - state.sp_min + 1) * 2;
992
if (!(state.sp_min = oldof(state.sp_min, Stack_t, n, 0)))
993
error(ERROR_SYSTEM|3, "out of space [tag stack]");
994
state.sp_max = state.sp_min + n - 1;
995
state.sp = state.sp_min + c;
996
}
997
state.sp++;
998
state.sp->tag = tp;
999
state.sp->line = error_info.line;
1000
state.sp->flags = 0;
1001
if (tp->flags & TAG_IGNORE)
1002
{
1003
state.sp->title_cc = cc;
1004
state.sp->title_lastlastc = lastlastc;
1005
state.sp->title_op = op;
1006
state.sp->title_tc = tc;
1007
op = state.nul;
1008
sfstrseek(op, 0, SEEK_SET);
1009
}
1010
}
1011
if (tp->start && !(*tp->start)(tp, ap) && tp->end)
1012
state.sp->flags |= STK_NOEND;
1013
}
1014
else
1015
{
1016
sp = state.sp;
1017
if (state.sp->tag != tp)
1018
{
1019
for (;;)
1020
{
1021
if (sp == state.sp_min)
1022
{
1023
if (!(tp->flags & TAG_UNBALANCED))
1024
error(1, "</%s> has no matching <%s>", tp->name, tp->name);
1025
sp = 0;
1026
break;
1027
}
1028
if (sp->tag == tp)
1029
break;
1030
sp--;
1031
}
1032
if (sp)
1033
{
1034
while (state.sp > sp)
1035
{
1036
if (state.sp->tag->end && !(state.sp->flags & STK_NOEND))
1037
{
1038
if (!(state.sp->tag->flags & TAG_UNBALANCED))
1039
error(1, "<%s> on line %d has no matching </%s>", state.sp->tag->name, state.sp->line, state.sp->tag->name);
1040
(*state.sp->tag->end)(state.sp->tag, NiL);
1041
}
1042
state.sp--;
1043
}
1044
}
1045
}
1046
if (sp)
1047
{
1048
if (tp->end && !(state.sp->flags & STK_NOEND))
1049
(*tp->end)(tp, ap);
1050
if (tp->flags & TAG_IGNORE)
1051
{
1052
cc = state.sp->title_cc;
1053
lastlastc = state.sp->title_lastlastc;
1054
op = state.sp->title_op;
1055
tc = state.sp->title_tc;
1056
}
1057
state.sp--;
1058
}
1059
}
1060
ap = 0;
1061
lastc = lastlastc;
1062
continue;
1063
}
1064
break;
1065
case '=':
1066
if (ap && !ap->value)
1067
{
1068
sfputc(op, 0);
1069
ap->value = (char*)(sfstrtell(op) << PUN);
1070
continue;
1071
}
1072
break;
1073
case '"':
1074
if (ap)
1075
{
1076
quote ^= STRING;
1077
if (!(quote & COMMENT))
1078
continue;
1079
}
1080
break;
1081
case '&':
1082
if (!item)
1083
{
1084
item = c;
1085
op = state.tmp;
1086
continue;
1087
}
1088
break;
1089
case ';':
1090
if (item == '&')
1091
{
1092
item = 0;
1093
if (!(s = sfstruse(op)))
1094
error(ERROR_SYSTEM|3, "out of space");
1095
op = state.out;
1096
if (*s == '#')
1097
{
1098
n = (int)strtol(s + 1, NiL, 10) & 0377;
1099
cc += sfprintf(op, "\\'%02x", n);
1100
tc++;
1101
if (isspace(n))
1102
lastc = ' ';
1103
}
1104
else if (ep = (Entity_t*)hashget(state.entities, s))
1105
{
1106
cc += sfputr(op, ep->value, -1);
1107
tc++;
1108
if (ep->flags & ENT_SPACE)
1109
lastc = ' ';
1110
}
1111
else
1112
{
1113
error(1, "&%s;: unknown entity reference", s);
1114
cc += sfprintf(op, "&%s;", s);
1115
tc++;
1116
}
1117
continue;
1118
}
1119
break;
1120
case '{':
1121
case '}':
1122
case '\\':
1123
sfputc(op, '\\');
1124
cc++;
1125
state.sep = 0;
1126
break;
1127
case '\n':
1128
error_info.line++;
1129
if (state.pre && !item)
1130
{
1131
state.sep = 0;
1132
sfputr(op, "\\line", -1);
1133
cc += 5;
1134
tc = 0;
1135
break;
1136
}
1137
/*FALLTHROUGH*/
1138
case ' ':
1139
case '\t':
1140
case '\v':
1141
if (ap)
1142
{
1143
if (!quote)
1144
{
1145
if (lastc != ' ' && ap < &attributes[elementsof(attributes) - 1])
1146
{
1147
sfputc(op, 0);
1148
ap++;
1149
ap->name = (char*)(sfstrtell(op) << PUN);
1150
ap->value = 0;
1151
lastc = ' ';
1152
}
1153
continue;
1154
}
1155
}
1156
else if (!state.pre)
1157
{
1158
if (lastc == ' ')
1159
continue;
1160
c = ' ';
1161
if (cc >= 72)
1162
{
1163
cc = 0;
1164
sfputc(op, '\n');
1165
}
1166
}
1167
else if (c == ' ')
1168
{
1169
sfputr(op, "\\~", -1);
1170
cc += 2;
1171
tc++;
1172
state.sep = 0;
1173
continue;
1174
}
1175
else if (c == '\t')
1176
{
1177
do
1178
{
1179
sfputr(op, "\\~", -1);
1180
cc += 2;
1181
tc++;
1182
} while (tc % 8);
1183
state.sep = 0;
1184
continue;
1185
}
1186
break;
1187
default:
1188
if (iscntrl(c))
1189
continue;
1190
if (c > 0177)
1191
{
1192
cc += sfprintf(op, "\\'%02x", c & 0377);
1193
tc++;
1194
continue;
1195
}
1196
break;
1197
}
1198
if (state.sep && op == state.out)
1199
{
1200
state.sep = 0;
1201
if (c != ' ')
1202
{
1203
sfputc(op, ' ');
1204
cc++;
1205
tc++;
1206
}
1207
}
1208
lastc = c;
1209
sfputc(op, c);
1210
cc++;
1211
tc++;
1212
}
1213
done:
1214
while (state.sp > state.sp_min)
1215
{
1216
error(1, "<%s> on line %d has no matching </%s>", state.sp->tag->name, state.sp->line, state.sp->tag->name);
1217
state.sp--;
1218
}
1219
error_info.file = 0;
1220
error_info.line = 0;
1221
}
1222
1223
/*
1224
* return 1 if project file must be updated
1225
*/
1226
1227
static int
1228
project_update(const char* s, char* v, void* h)
1229
{
1230
NoP(s);
1231
return v == (char*)h;
1232
}
1233
1234
/*
1235
* list project file names
1236
*/
1237
1238
static int
1239
project_list(const char* s, char* v, void* h)
1240
{
1241
NoP(v);
1242
sfputr((Sfio_t*)h, s, '\n');
1243
return 0;
1244
}
1245
1246
/*
1247
* create/update help project file
1248
*/
1249
1250
static void
1251
project(char* file)
1252
{
1253
register char* s;
1254
Sfio_t* fp;
1255
1256
if (state.files)
1257
{
1258
if (fp = sfopen(NiL, file, "r"))
1259
{
1260
while (s = sfgetr(fp, '\n', 1))
1261
{
1262
if (*s == '[' && !strncasecmp(s, "[FILES]", 7))
1263
{
1264
while ((s = sfgetr(fp, '\n', 1)) && *s != '[')
1265
hashput(state.files, s, &state);
1266
if (!s)
1267
break;
1268
}
1269
sfputr(state.tmp, s, '\n');
1270
}
1271
sfclose(fp);
1272
if (!(s = sfstruse(state.tmp)))
1273
error(ERROR_SYSTEM|3, "out of space");
1274
}
1275
else
1276
s = "\
1277
[OPTIONS]\n\
1278
COMPRESS=TRUE\n\
1279
REPORT=ON\n\
1280
TITLE=Manual\n\
1281
";
1282
if (hashwalk(state.files, 0, project_update, state.files))
1283
{
1284
if (!(fp = sfopen(NiL, file, "w")))
1285
error(ERROR_SYSTEM|2, "%s: cannot write", file);
1286
else
1287
{
1288
sfputr(fp, s, -1);
1289
sfputr(fp, "[FILES]", '\n');
1290
hashwalk(state.files, 0, project_list, fp);
1291
sfclose(fp);
1292
}
1293
}
1294
}
1295
}
1296
1297
/*
1298
* html to rtf entity reference map
1299
*/
1300
1301
static const Entity_t entities[] =
1302
{
1303
"AElig", "\\'c6", 0,
1304
"Aacute", "\\'c1", 0,
1305
"Acirc", "\\'c2", 0,
1306
"Agrave", "\\'c0", 0,
1307
"Aring", "\\'c5", 0,
1308
"Atilde", "\\'c3", 0,
1309
"Auml", "\\'c4", 0,
1310
"Ccedil", "\\'c7", 0,
1311
"ETH", "\\'d0", 0,
1312
"Eacute", "\\'c9", 0,
1313
"Ecirc", "\\'ca", 0,
1314
"Egrave", "\\'c8", 0,
1315
"Euml", "\\'cb", 0,
1316
"Iacute", "\\'cd", 0,
1317
"Icirc", "\\'ce", 0,
1318
"Igrave", "\\'cc", 0,
1319
"Iuml", "\\'cf", 0,
1320
"Ntilde", "\\'d1", 0,
1321
"Oacute", "\\'d3", 0,
1322
"Ocirc", "\\'d4", 0,
1323
"Ograve", "\\'d2", 0,
1324
"Oslash", "\\'d8", 0,
1325
"Otilde", "\\'d5", 0,
1326
"Ouml", "\\'d6", 0,
1327
"THORN", "\\'de", 0,
1328
"Uacute", "\\'da", 0,
1329
"Ucirc", "\\'db", 0,
1330
"Ugrave", "\\'d9", 0,
1331
"Uuml", "\\'dc", 0,
1332
"Yacute", "\\'dd", 0,
1333
"aacute", "\\'e1", 0,
1334
"acirc", "\\'e2", 0,
1335
"acute", "\\'b4", 0,
1336
"aelig", "\\'e6", 0,
1337
"agrave", "\\'e0", 0,
1338
"amp", "&", 0,
1339
"aring", "\\'e5", 0,
1340
"atilde", "\\'e3", 0,
1341
"auml", "\\'e4", 0,
1342
"brvbar", "\\'a6", 0,
1343
"ccedil", "\\'e7", 0,
1344
"cedil", "\\'b8", 0,
1345
"cent", "\\'a2", 0,
1346
"copy", "\\'a9", 0,
1347
"curren", "\\'a4", 0,
1348
"deg", "\\'b0", 0,
1349
"divide", "\\'f7", 0,
1350
"eacute", "\\'e9", 0,
1351
"ecirc", "\\'ea", 0,
1352
"egrave", "\\'e8", 0,
1353
"emdash", "\\emdash", 0,
1354
"emspace", "\\emspace", ENT_SPACE,
1355
"endash", "\\endash", 0,
1356
"enspace", "\\enspace", ENT_SPACE,
1357
"eth", "\\'f0", 0,
1358
"euml", "\\'eb", 0,
1359
"frac12", "\\'bd", 0,
1360
"frac14", "\\'bc", 0,
1361
"frac34", "\\'be", 0,
1362
"gt", ">", 0,
1363
"iacute", "\\'ed", 0,
1364
"icirc", "\\'ee", 0,
1365
"iexcl", "\\'a1", 0,
1366
"igrave", "\\'ec", 0,
1367
"iquest", "\\'bf", 0,
1368
"iuml", "\\'ef", 0,
1369
"laquo", "\\'ab", 0,
1370
"lt", "<", 0,
1371
"macr", "\\'af", 0,
1372
"micro", "\\'b5", 0,
1373
"middot", "\\bullet", 0,
1374
"nbsp", "\\~", ENT_SPACE,
1375
"not", "\\'ac", 0,
1376
"ntilde", "\\'f1", 0,
1377
"oacute", "\\'f3", 0,
1378
"ocirc", "\\'f4", 0,
1379
"ograve", "\\'f2", 0,
1380
"ordf", "\\'aa", 0,
1381
"ordm", "\\'ba", 0,
1382
"oslash", "\\'f8", 0,
1383
"otilde", "\\'f5", 0,
1384
"ouml", "\\'f6", 0,
1385
"para", "\\'b6", 0,
1386
"plusmn", "\\'b1", 0,
1387
"pound", "\\'a3", 0,
1388
"quot", "\"", 0,
1389
"raquo", "\\'bb", 0,
1390
"reg", "\\'ae", 0,
1391
"sect", "\\'a7", 0,
1392
"shy", "\\'ad", 0,
1393
"sup1", "\\'b9", 0,
1394
"sup2", "\\'b2", 0,
1395
"sup3", "\\'b3", 0,
1396
"szlig", "\\'df", 0,
1397
"thorn", "\\'fe", 0,
1398
"times", "\\'d7", 0,
1399
"uacute", "\\'fa", 0,
1400
"ucirc", "\\'fb", 0,
1401
"ugrave", "\\'f9", 0,
1402
"uml", "\\'a8", 0,
1403
"uuml", "\\'fc", 0,
1404
"yacute", "\\'fd", 0,
1405
"yen", "\\'a5", 0,
1406
"yuml", "\\'ff", 0,
1407
#if 0
1408
"trademark", "", 0,
1409
#endif
1410
};
1411
1412
/*
1413
* html tag table
1414
*/
1415
1416
static const Tag_t tags[] =
1417
{
1418
"A", start_a, end_a, 0,0,
1419
"ADDRESS", start_i, end, 0,0,
1420
"B", start_b, end, 0,0,
1421
"BLOCKQUOTE", start_bq, end_bq, 0,0,
1422
"BQ", start_bq, end_bq, 0,0,
1423
"BODY", start_body, end, 0,0,
1424
"BR", start_br, 0, 0,0,
1425
"CAPTION", start_caption, end, 0,0,
1426
"CENTER", start_center, end_center, 0,0,
1427
"CITE", start_i, end, 0,0,
1428
"CODE", start_tt, end, 0,0,
1429
"DD", start_dd, 0, 0,0,
1430
"DIR", start_ul, end_LIST, 0,0,
1431
"DL", start_dl, end_LIST, 0,0,
1432
"DT", start_dt, 0, 0,0,
1433
"EM", start_i, end, 0,0,
1434
"FN", start_fn, end_fn, 0,0,
1435
"FONT", start_font, end_font, 0,0,
1436
"H1", start_H, end_H, 0,0,
1437
"H2", start_H, end_H, 0,0,
1438
"H3", start_H, end_H, 0,0,
1439
"H4", start_H, end_H, 0,0,
1440
"H5", start_H, end_H, 0,0,
1441
"H6", start_H, end_H, 0,0,
1442
"HEAD", start_head, end_head, 0,TAG_UNBALANCED,
1443
"HR", start_hr, 0, 0,0,
1444
"HTML", start_html, end_html, 0,0,
1445
"I", start_i, end, 0,0,
1446
"IMG", start_img, 0, 0,0,
1447
"KBD", start_tt, end, 0,0,
1448
"LI", start_li, 0, 0,TAG_UNBALANCED,
1449
"META", start_meta, 0, 0,0,
1450
"MENU", start_ul, end_LIST, 0,0,
1451
"NULL", 0, 0, 0,0,
1452
"OL", start_ol, end_LIST, 0,0,
1453
"P", start_p, 0, 0,TAG_UNBALANCED,
1454
"PRE", start_pre, end_pre, 0,0,
1455
"RENDER", start_render, 0, 0,0,
1456
"SAMP", start_tt, end, 0,0,
1457
"STRONG", start_b, end, 0,0,
1458
"SUB", start_sub, end_sub, 0,0,
1459
"SUP", start_sup, end_sup, 0,0,
1460
"TABLE", start_table, end_table, 0,0,
1461
"TD", start_td, end_td, 0,0,
1462
"TH", start_th, 0, 0,0,
1463
"TITLE", start_title, end_title, 0,TAG_IGNORE,
1464
"TR", start_tr, end_tr, 0,0,
1465
"TT", start_tt, end, 0,0,
1466
"UL", start_ul, end_LIST, 0,0,
1467
"UNKNOWN", 0, 0, 0,0,
1468
"VAR", start_var, end, 0,0,
1469
};
1470
1471
/*
1472
* case insensitive hash
1473
*/
1474
1475
static unsigned int
1476
strcasehash(const char* s)
1477
{
1478
register const unsigned char* p = (const unsigned char*)s;
1479
register unsigned int h = 0;
1480
register unsigned int c;
1481
1482
while (c = *p++)
1483
{
1484
if (isupper(c))
1485
c = tolower(c);
1486
HASHPART(h, c);
1487
}
1488
return h;
1489
}
1490
1491
/*
1492
* initialize the global data
1493
*/
1494
1495
static void
1496
init(void)
1497
{
1498
register int i;
1499
1500
if (!state.nul && !(state.nul = sfstropen()))
1501
error(ERROR_SYSTEM|3, "out of space [nul buffer]");
1502
if (!state.tmp && !(state.tmp = sfstropen()))
1503
error(ERROR_SYSTEM|3, "out of space [tmp buffer]");
1504
i = 1024;
1505
if (!(state.sp_min = oldof(NiL, Stack_t, i, 0)))
1506
error(ERROR_SYSTEM|3, "out of space [tag stack]");
1507
state.sp_max = state.sp_min + i - 1;
1508
if (!(state.entities = hashalloc(NiL, HASH_name, "entities", 0)))
1509
error(ERROR_SYSTEM|3, "out of space [entity hash]");
1510
if (!(state.tags = hashalloc(NiL, HASH_compare, strcasecmp, HASH_hash, strcasehash, HASH_name, "tags", 0)))
1511
error(ERROR_SYSTEM|3, "out of space [tag hash]");
1512
if (state.project && !(state.files = hashalloc(state.tags, HASH_set, HASH_ALLOCATE, HASH_name, "files", 0)))
1513
error(ERROR_SYSTEM|3, "out of space [file hash]");
1514
for (i = 0; i < elementsof(entities); i++)
1515
if (!(hashput(state.entities, entities[i].name, &entities[i])))
1516
error(ERROR_SYSTEM|3, "out of space [entity hash put]");
1517
for (i = 0; i < elementsof(tags); i++)
1518
if (!(hashput(state.tags, tags[i].name, &tags[i])))
1519
error(ERROR_SYSTEM|3, "out of space [tag hash put]");
1520
hashset(state.tags, HASH_ALLOCATE);
1521
}
1522
1523
int
1524
main(int argc, char** argv)
1525
{
1526
register int c;
1527
register char* s;
1528
register char* t;
1529
register char* u;
1530
register Sfio_t* ip;
1531
register Sfio_t* op;
1532
1533
NoP(argc);
1534
error_info.id = "html2rtf";
1535
state.fontsize = FONTSIZE;
1536
for (;;)
1537
{
1538
switch (optget(argv, usage))
1539
{
1540
case 'd':
1541
error_info.trace = -opt_info.num;
1542
continue;
1543
case 'f':
1544
state.fontsize = opt_info.num;
1545
continue;
1546
case 'p':
1547
state.project = opt_info.arg;
1548
continue;
1549
case 'v':
1550
state.verbose = 1;
1551
continue;
1552
case '?':
1553
error(ERROR_USAGE|4, "%s", opt_info.arg);
1554
continue;
1555
case ':':
1556
error(2, "%s", opt_info.arg);
1557
continue;
1558
}
1559
break;
1560
}
1561
argv += opt_info.index;
1562
if (error_info.errors)
1563
error(ERROR_USAGE|4, "%s", optusage(NiL));
1564
init();
1565
if (!*argv)
1566
{
1567
if (state.project)
1568
error(ERROR_SYSTEM|3, "%s: input files required when project file specified", state.project);
1569
process(NiL, sfstdin, sfstdout);
1570
}
1571
else while (s = *argv++)
1572
{
1573
if (ip = sfopen(NiL, s, "r"))
1574
{
1575
if (state.project)
1576
{
1577
if (!(t = strrchr(s, '/')))
1578
t = s;
1579
if (u = strrchr(t, '.'))
1580
c = u - t;
1581
else
1582
c = strlen(t);
1583
sfprintf(state.tmp, "%-.*s.rtf", c, t);
1584
if (!(u = sfstruse(state.tmp)))
1585
error(ERROR_SYSTEM|3, "out of space");
1586
if (!(op = sfopen(NiL, u, "w")))
1587
{
1588
error(ERROR_SYSTEM|2, "%s: cannot write", u);
1589
sfclose(ip);
1590
continue;
1591
}
1592
hashput(state.files, u, state.files);
1593
while (c = *t++)
1594
sfputc(state.tmp, isalnum(c) ? c : '.');
1595
if (!(state.prefix = strdup(sfstruse(state.tmp))))
1596
error(ERROR_SYSTEM|3, "out of space");
1597
}
1598
else
1599
{
1600
state.prefix = "HTML2RTF";
1601
op = sfstdout;
1602
}
1603
process(s, ip, op);
1604
sfclose(ip);
1605
if (state.project)
1606
{
1607
sfclose(op);
1608
free(state.prefix);
1609
}
1610
}
1611
else error(ERROR_SYSTEM|2, "%s: cannot read", s);
1612
}
1613
if (state.project)
1614
project(state.project);
1615
exit(error_info.errors != 0);
1616
}
1617
1618