Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7643 views
1
#include "mupdf/pdf.h"
2
3
/*
4
* Allocate, destroy and simple parameters.
5
*/
6
7
void
8
pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_)
9
{
10
pdf_cmap *cmap = (pdf_cmap *)cmap_;
11
if (cmap->usecmap)
12
pdf_drop_cmap(ctx, cmap->usecmap);
13
fz_free(ctx, cmap->ranges);
14
fz_free(ctx, cmap->xranges);
15
fz_free(ctx, cmap->mranges);
16
fz_free(ctx, cmap);
17
}
18
19
pdf_cmap *
20
pdf_new_cmap(fz_context *ctx)
21
{
22
pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap);
23
FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp);
24
return cmap;
25
}
26
27
/* Could be a macro for speed */
28
pdf_cmap *
29
pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap)
30
{
31
return (pdf_cmap *)fz_keep_storable(ctx, &cmap->storable);
32
}
33
34
/* Could be a macro for speed */
35
void
36
pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap)
37
{
38
fz_drop_storable(ctx, &cmap->storable);
39
}
40
41
void
42
pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap)
43
{
44
int i;
45
46
if (cmap->usecmap)
47
pdf_drop_cmap(ctx, cmap->usecmap);
48
cmap->usecmap = pdf_keep_cmap(ctx, usecmap);
49
50
if (cmap->codespace_len == 0)
51
{
52
cmap->codespace_len = usecmap->codespace_len;
53
for (i = 0; i < usecmap->codespace_len; i++)
54
cmap->codespace[i] = usecmap->codespace[i];
55
}
56
}
57
58
int
59
pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap)
60
{
61
return cmap->wmode;
62
}
63
64
void
65
pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode)
66
{
67
cmap->wmode = wmode;
68
}
69
70
/*
71
* Add a codespacerange section.
72
* These ranges are used by pdf_decode_cmap to decode
73
* multi-byte encoded strings.
74
*/
75
void
76
pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int n)
77
{
78
if (cmap->codespace_len + 1 == nelem(cmap->codespace))
79
{
80
fz_warn(ctx, "assert: too many code space ranges");
81
return;
82
}
83
84
cmap->codespace[cmap->codespace_len].n = n;
85
cmap->codespace[cmap->codespace_len].low = low;
86
cmap->codespace[cmap->codespace_len].high = high;
87
cmap->codespace_len ++;
88
}
89
90
/*
91
* Add a range.
92
*/
93
static void
94
add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out)
95
{
96
if (low > high)
97
{
98
fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
99
return;
100
}
101
102
if (low <= 0xFFFF && high <= 0xFFFF && out <= 0xFFFF)
103
{
104
if (cmap->rlen + 1 > cmap->rcap)
105
{
106
int new_cap = cmap->rcap ? cmap->rcap * 2 : 256;
107
cmap->ranges = fz_resize_array(ctx, cmap->ranges, new_cap, sizeof *cmap->ranges);
108
cmap->rcap = new_cap;
109
}
110
cmap->ranges[cmap->rlen].low = low;
111
cmap->ranges[cmap->rlen].high = high;
112
cmap->ranges[cmap->rlen].out = out;
113
cmap->rlen++;
114
}
115
else
116
{
117
if (cmap->xlen + 1 > cmap->xcap)
118
{
119
int new_cap = cmap->xcap ? cmap->xcap * 2 : 256;
120
cmap->xranges = fz_resize_array(ctx, cmap->xranges, new_cap, sizeof *cmap->xranges);
121
cmap->xcap = new_cap;
122
}
123
cmap->xranges[cmap->xlen].low = low;
124
cmap->xranges[cmap->xlen].high = high;
125
cmap->xranges[cmap->xlen].out = out;
126
cmap->xlen++;
127
}
128
}
129
130
/*
131
* Add a one-to-many mapping.
132
*/
133
static void
134
add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
135
{
136
int i;
137
if (cmap->mlen + 1 > cmap->mcap)
138
{
139
int new_cap = cmap->mcap ? cmap->mcap * 2 : 256;
140
cmap->mranges = fz_resize_array(ctx, cmap->mranges, new_cap, sizeof *cmap->mranges);
141
cmap->mcap = new_cap;
142
}
143
cmap->mranges[cmap->mlen].low = low;
144
cmap->mranges[cmap->mlen].len = len;
145
for (i = 0; i < len; ++i)
146
cmap->mranges[cmap->mlen].out[i] = out[i];
147
for (; i < PDF_MRANGE_CAP; ++i)
148
cmap->mranges[cmap->mlen].out[i] = 0;
149
cmap->mlen++;
150
}
151
152
/*
153
* Add a range-to-table mapping.
154
*/
155
void
156
pdf_map_range_to_table(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *table, int len)
157
{
158
int i;
159
for (i = 0; i < len; i++)
160
add_range(ctx, cmap, low + i, low + i, table[i]);
161
}
162
163
/*
164
* Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25)
165
*/
166
void
167
pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out)
168
{
169
add_range(ctx, cmap, low, high, out);
170
}
171
172
/*
173
* Add a single one-to-many mapping.
174
*/
175
void
176
pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, int len)
177
{
178
if (len == 1)
179
{
180
add_range(ctx, cmap, low, low, values[0]);
181
return;
182
}
183
184
/* Decode unicode surrogate pairs. */
185
/* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */
186
if (len == 2 &&
187
values[0] >= 0xD800 && values[0] <= 0xDBFF &&
188
values[1] >= 0xDC00 && values[1] <= 0xDFFF)
189
{
190
int rune = ((values[0] - 0xD800) << 10) + (values[1] - 0xDC00) + 0x10000;
191
add_range(ctx, cmap, low, low, rune);
192
return;
193
}
194
195
if (len > PDF_MRANGE_CAP)
196
{
197
fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name);
198
return;
199
}
200
201
add_mrange(ctx, cmap, low, values, len);
202
}
203
204
/*
205
* Sort the input ranges.
206
* Merge contiguous ranges.
207
*/
208
209
static int cmprange(const void *va, const void *vb)
210
{
211
unsigned int a = ((const pdf_range*)va)->low;
212
unsigned int b = ((const pdf_range*)vb)->low;
213
return a < b ? -1 : a > b ? 1 : 0;
214
}
215
216
static int cmpxrange(const void *va, const void *vb)
217
{
218
unsigned int a = ((const pdf_xrange*)va)->low;
219
unsigned int b = ((const pdf_xrange*)vb)->low;
220
return a < b ? -1 : a > b ? 1 : 0;
221
}
222
223
static int cmpmrange(const void *va, const void *vb)
224
{
225
unsigned int a = ((const pdf_mrange*)va)->low;
226
unsigned int b = ((const pdf_mrange*)vb)->low;
227
return a < b ? -1 : a > b ? 1 : 0;
228
}
229
230
void
231
pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
232
{
233
pdf_range *a, *b;
234
pdf_xrange *x, *y;
235
236
if (cmap->rlen)
237
{
238
qsort(cmap->ranges, cmap->rlen, sizeof *cmap->ranges, cmprange);
239
a = cmap->ranges;
240
for (b = a + 1; b < cmap->ranges + cmap->rlen; ++b)
241
{
242
if (b->low == a->high + 1 && b->out == a->out + (a->high - a->low) + 1)
243
a->high = b->high;
244
else
245
*(++a) = *b;
246
}
247
cmap->rlen = a - cmap->ranges + 1;
248
}
249
250
if (cmap->xlen)
251
{
252
qsort(cmap->xranges, cmap->xlen, sizeof *cmap->xranges, cmpxrange);
253
x = cmap->xranges;
254
for (y = x + 1; y < cmap->xranges + cmap->xlen; ++y)
255
{
256
if (y->low == x->high + 1 && y->out == x->out + (x->high - x->low) + 1)
257
x->high = y->high;
258
else
259
*(++x) = *y;
260
}
261
cmap->xlen = x - cmap->xranges + 1;
262
}
263
264
if (cmap->mlen)
265
{
266
qsort(cmap->mranges, cmap->mlen, sizeof *cmap->mranges, cmpmrange);
267
}
268
}
269
270
/*
271
* Lookup the mapping of a codepoint.
272
*/
273
int
274
pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt)
275
{
276
pdf_range *ranges = cmap->ranges;
277
pdf_xrange *xranges = cmap->xranges;
278
int l, r, m;
279
280
l = 0;
281
r = cmap->rlen - 1;
282
while (l <= r)
283
{
284
m = (l + r) >> 1;
285
if (cpt < ranges[m].low)
286
r = m - 1;
287
else if (cpt > ranges[m].high)
288
l = m + 1;
289
else
290
return cpt - ranges[m].low + ranges[m].out;
291
}
292
293
l = 0;
294
r = cmap->xlen - 1;
295
while (l <= r)
296
{
297
m = (l + r) >> 1;
298
if (cpt < xranges[m].low)
299
r = m - 1;
300
else if (cpt > xranges[m].high)
301
l = m + 1;
302
else
303
return cpt - xranges[m].low + xranges[m].out;
304
}
305
306
if (cmap->usecmap)
307
return pdf_lookup_cmap(cmap->usecmap, cpt);
308
309
return -1;
310
}
311
312
int
313
pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out)
314
{
315
pdf_range *ranges = cmap->ranges;
316
pdf_xrange *xranges = cmap->xranges;
317
pdf_mrange *mranges = cmap->mranges;
318
unsigned int i;
319
int l, r, m;
320
321
l = 0;
322
r = cmap->rlen - 1;
323
while (l <= r)
324
{
325
m = (l + r) >> 1;
326
if (cpt < ranges[m].low)
327
r = m - 1;
328
else if (cpt > ranges[m].high)
329
l = m + 1;
330
else
331
{
332
out[0] = cpt - ranges[m].low + ranges[m].out;
333
return 1;
334
}
335
}
336
337
l = 0;
338
r = cmap->xlen - 1;
339
while (l <= r)
340
{
341
m = (l + r) >> 1;
342
if (cpt < xranges[m].low)
343
r = m - 1;
344
else if (cpt > xranges[m].high)
345
l = m + 1;
346
else
347
{
348
out[0] = cpt - xranges[m].low + xranges[m].out;
349
return 1;
350
}
351
}
352
353
l = 0;
354
r = cmap->mlen - 1;
355
while (l <= r)
356
{
357
m = (l + r) >> 1;
358
if (cpt < mranges[m].low)
359
r = m - 1;
360
else if (cpt > mranges[m].low)
361
l = m + 1;
362
else
363
{
364
for (i = 0; i < mranges[m].len; ++i)
365
out[i] = mranges[m].out[i];
366
return mranges[m].len;
367
}
368
}
369
370
if (cmap->usecmap)
371
return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
372
373
return 0;
374
}
375
376
/*
377
* Use the codespace ranges to extract a codepoint from a
378
* multi-byte encoded string.
379
*/
380
int
381
pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt)
382
{
383
unsigned int c;
384
int k, n;
385
int len = end - buf;
386
387
if (len > 4)
388
len = 4;
389
390
c = 0;
391
for (n = 0; n < len; n++)
392
{
393
c = (c << 8) | buf[n];
394
for (k = 0; k < cmap->codespace_len; k++)
395
{
396
if (cmap->codespace[k].n == n + 1)
397
{
398
if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
399
{
400
*cpt = c;
401
return n + 1;
402
}
403
}
404
}
405
}
406
407
*cpt = 0;
408
return 1;
409
}
410
411