Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7643 views
1
#include "mupdf/pdf.h"
2
3
/*
4
* CMap parser
5
*/
6
7
static int
8
pdf_code_from_string(char *buf, int len)
9
{
10
int a = 0;
11
while (len--)
12
a = (a << 8) | *(unsigned char *)buf++;
13
return a;
14
}
15
16
static void
17
pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
18
{
19
pdf_token tok;
20
21
tok = pdf_lex(ctx, file, buf);
22
23
if (tok == PDF_TOK_NAME)
24
fz_strlcpy(cmap->cmap_name, buf->scratch, sizeof(cmap->cmap_name));
25
else
26
fz_warn(ctx, "expected name after CMapName in cmap");
27
}
28
29
static void
30
pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
31
{
32
pdf_token tok;
33
34
tok = pdf_lex(ctx, file, buf);
35
36
if (tok == PDF_TOK_INT)
37
pdf_set_cmap_wmode(ctx, cmap, buf->i);
38
else
39
fz_warn(ctx, "expected integer after WMode in cmap");
40
}
41
42
static void
43
pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
44
{
45
pdf_token tok;
46
int lo, hi;
47
48
while (1)
49
{
50
tok = pdf_lex(ctx, file, buf);
51
52
if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcodespacerange"))
53
return;
54
55
else if (tok == PDF_TOK_STRING)
56
{
57
lo = pdf_code_from_string(buf->scratch, buf->len);
58
tok = pdf_lex(ctx, file, buf);
59
if (tok == PDF_TOK_STRING)
60
{
61
hi = pdf_code_from_string(buf->scratch, buf->len);
62
pdf_add_codespace(ctx, cmap, lo, hi, buf->len);
63
}
64
else break;
65
}
66
67
else break;
68
}
69
70
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcodespacerange");
71
}
72
73
static void
74
pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
75
{
76
pdf_token tok;
77
int lo, hi, dst;
78
79
while (1)
80
{
81
tok = pdf_lex(ctx, file, buf);
82
83
if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidrange"))
84
return;
85
86
else if (tok != PDF_TOK_STRING)
87
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidrange");
88
89
lo = pdf_code_from_string(buf->scratch, buf->len);
90
91
tok = pdf_lex(ctx, file, buf);
92
if (tok != PDF_TOK_STRING)
93
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string");
94
95
hi = pdf_code_from_string(buf->scratch, buf->len);
96
97
tok = pdf_lex(ctx, file, buf);
98
if (tok != PDF_TOK_INT)
99
fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer");
100
101
dst = buf->i;
102
103
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
104
}
105
}
106
107
static void
108
pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
109
{
110
pdf_token tok;
111
int src, dst;
112
113
while (1)
114
{
115
tok = pdf_lex(ctx, file, buf);
116
117
if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidchar"))
118
return;
119
120
else if (tok != PDF_TOK_STRING)
121
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidchar");
122
123
src = pdf_code_from_string(buf->scratch, buf->len);
124
125
tok = pdf_lex(ctx, file, buf);
126
if (tok != PDF_TOK_INT)
127
fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer");
128
129
dst = buf->i;
130
131
pdf_map_range_to_range(ctx, cmap, src, src, dst);
132
}
133
}
134
135
static void
136
pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf, int lo, int hi)
137
{
138
pdf_token tok;
139
int dst[256];
140
int i;
141
142
while (1)
143
{
144
tok = pdf_lex(ctx, file, buf);
145
146
if (tok == PDF_TOK_CLOSE_ARRAY)
147
return;
148
149
/* Note: does not handle [ /Name /Name ... ] */
150
else if (tok != PDF_TOK_STRING)
151
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or ]");
152
153
if (buf->len / 2)
154
{
155
int len = fz_mini(buf->len / 2, nelem(dst));
156
for (i = 0; i < len; i++)
157
dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
158
159
pdf_map_one_to_many(ctx, cmap, lo, dst, buf->len / 2);
160
}
161
162
lo ++;
163
}
164
}
165
166
static void
167
pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
168
{
169
pdf_token tok;
170
int lo, hi, dst;
171
172
while (1)
173
{
174
tok = pdf_lex(ctx, file, buf);
175
176
if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfrange"))
177
return;
178
179
else if (tok != PDF_TOK_STRING)
180
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfrange");
181
182
lo = pdf_code_from_string(buf->scratch, buf->len);
183
184
tok = pdf_lex(ctx, file, buf);
185
if (tok != PDF_TOK_STRING)
186
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string");
187
188
hi = pdf_code_from_string(buf->scratch, buf->len);
189
if (lo < 0 || lo > 65535 || hi < 0 || hi > 65535 || lo > hi)
190
{
191
fz_warn(ctx, "bf_range limits out of range in cmap %s", cmap->cmap_name);
192
return;
193
}
194
195
tok = pdf_lex(ctx, file, buf);
196
197
if (tok == PDF_TOK_STRING)
198
{
199
if (buf->len == 2)
200
{
201
dst = pdf_code_from_string(buf->scratch, buf->len);
202
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
203
}
204
else
205
{
206
int dststr[256];
207
int i;
208
209
if (buf->len / 2)
210
{
211
int len = fz_mini(buf->len / 2, nelem(dststr));
212
for (i = 0; i < len; i++)
213
dststr[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
214
215
while (lo <= hi)
216
{
217
dststr[i-1] ++;
218
pdf_map_one_to_many(ctx, cmap, lo, dststr, i);
219
lo ++;
220
}
221
}
222
}
223
}
224
225
else if (tok == PDF_TOK_OPEN_ARRAY)
226
{
227
pdf_parse_bf_range_array(ctx, cmap, file, buf, lo, hi);
228
}
229
230
else
231
{
232
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or array or endbfrange");
233
}
234
}
235
}
236
237
static void
238
pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
239
{
240
pdf_token tok;
241
int dst[256];
242
int src;
243
int i;
244
245
while (1)
246
{
247
tok = pdf_lex(ctx, file, buf);
248
249
if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfchar"))
250
return;
251
252
else if (tok != PDF_TOK_STRING)
253
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfchar");
254
255
src = pdf_code_from_string(buf->scratch, buf->len);
256
257
tok = pdf_lex(ctx, file, buf);
258
/* Note: does not handle /dstName */
259
if (tok != PDF_TOK_STRING)
260
fz_throw(ctx, FZ_ERROR_GENERIC, "expected string");
261
262
if (buf->len / 2)
263
{
264
int len = fz_mini(buf->len / 2, nelem(dst));
265
for (i = 0; i < len; i++)
266
dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
267
pdf_map_one_to_many(ctx, cmap, src, dst, i);
268
}
269
}
270
}
271
272
pdf_cmap *
273
pdf_load_cmap(fz_context *ctx, fz_stream *file)
274
{
275
pdf_cmap *cmap;
276
char key[64];
277
pdf_lexbuf buf;
278
pdf_token tok;
279
280
pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
281
cmap = pdf_new_cmap(ctx);
282
283
strcpy(key, ".notdef");
284
285
fz_try(ctx)
286
{
287
while (1)
288
{
289
tok = pdf_lex(ctx, file, &buf);
290
291
if (tok == PDF_TOK_EOF)
292
break;
293
294
else if (tok == PDF_TOK_NAME)
295
{
296
if (!strcmp(buf.scratch, "CMapName"))
297
pdf_parse_cmap_name(ctx, cmap, file, &buf);
298
else if (!strcmp(buf.scratch, "WMode"))
299
pdf_parse_wmode(ctx, cmap, file, &buf);
300
else
301
fz_strlcpy(key, buf.scratch, sizeof key);
302
}
303
304
else if (tok == PDF_TOK_KEYWORD)
305
{
306
if (!strcmp(buf.scratch, "endcmap"))
307
break;
308
309
else if (!strcmp(buf.scratch, "usecmap"))
310
fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name));
311
312
else if (!strcmp(buf.scratch, "begincodespacerange"))
313
pdf_parse_codespace_range(ctx, cmap, file, &buf);
314
315
else if (!strcmp(buf.scratch, "beginbfchar"))
316
pdf_parse_bf_char(ctx, cmap, file, &buf);
317
318
else if (!strcmp(buf.scratch, "begincidchar"))
319
pdf_parse_cid_char(ctx, cmap, file, &buf);
320
321
else if (!strcmp(buf.scratch, "beginbfrange"))
322
pdf_parse_bf_range(ctx, cmap, file, &buf);
323
324
else if (!strcmp(buf.scratch, "begincidrange"))
325
pdf_parse_cid_range(ctx, cmap, file, &buf);
326
}
327
328
/* ignore everything else */
329
}
330
331
pdf_sort_cmap(ctx, cmap);
332
}
333
fz_always(ctx)
334
{
335
pdf_lexbuf_fin(ctx, &buf);
336
}
337
fz_catch(ctx)
338
{
339
pdf_drop_cmap(ctx, cmap);
340
fz_rethrow_message(ctx, "syntaxerror in cmap");
341
}
342
343
return cmap;
344
}
345
346