Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7639 views
1
#include "mupdf/pdf.h"
2
3
/* Load or synthesize ToUnicode map for fonts */
4
5
static void
6
pdf_remap_cmap_range(fz_context *ctx, pdf_cmap *ucs_from_gid,
7
unsigned int cpt, unsigned int gid, unsigned int n, pdf_cmap *ucs_from_cpt)
8
{
9
unsigned int k;
10
int ucsbuf[8];
11
int ucslen;
12
13
for (k = 0; k <= n; ++k)
14
{
15
ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt + k, ucsbuf);
16
if (ucslen == 1)
17
pdf_map_range_to_range(ctx, ucs_from_gid, gid + k, gid + k, ucsbuf[0]);
18
else if (ucslen > 1)
19
pdf_map_one_to_many(ctx, ucs_from_gid, gid + k, ucsbuf, ucslen);
20
}
21
}
22
23
static pdf_cmap *
24
pdf_remap_cmap(fz_context *ctx, pdf_cmap *gid_from_cpt, pdf_cmap *ucs_from_cpt)
25
{
26
pdf_cmap *ucs_from_gid;
27
unsigned int a, b, x;
28
int i;
29
30
ucs_from_gid = pdf_new_cmap(ctx);
31
32
if (gid_from_cpt->usecmap)
33
ucs_from_gid->usecmap = pdf_remap_cmap(ctx, gid_from_cpt->usecmap, ucs_from_cpt);
34
35
for (i = 0; i < gid_from_cpt->rlen; ++i)
36
{
37
a = gid_from_cpt->ranges[i].low;
38
b = gid_from_cpt->ranges[i].high;
39
x = gid_from_cpt->ranges[i].out;
40
pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
41
}
42
43
for (i = 0; i < gid_from_cpt->xlen; ++i)
44
{
45
a = gid_from_cpt->xranges[i].low;
46
b = gid_from_cpt->xranges[i].high;
47
x = gid_from_cpt->xranges[i].out;
48
pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
49
}
50
51
/* Font encoding CMaps don't have one-to-many mappings, so we can ignore the mranges. */
52
53
pdf_sort_cmap(ctx, ucs_from_gid);
54
55
return ucs_from_gid;
56
}
57
58
void
59
pdf_load_to_unicode(fz_context *ctx, pdf_document *doc, pdf_font_desc *font,
60
char **strings, char *collection, pdf_obj *cmapstm)
61
{
62
unsigned int cpt;
63
64
if (pdf_is_stream(ctx, doc, pdf_to_num(ctx, cmapstm), pdf_to_gen(ctx, cmapstm)))
65
{
66
pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(ctx, doc, cmapstm);
67
font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt);
68
pdf_drop_cmap(ctx, ucs_from_cpt);
69
font->size += pdf_cmap_size(ctx, font->to_unicode);
70
}
71
72
else if (collection)
73
{
74
if (!strcmp(collection, "Adobe-CNS1"))
75
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
76
else if (!strcmp(collection, "Adobe-GB1"))
77
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
78
else if (!strcmp(collection, "Adobe-Japan1"))
79
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
80
else if (!strcmp(collection, "Adobe-Korea1"))
81
font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
82
83
return;
84
}
85
86
if (strings)
87
{
88
/* TODO one-to-many mappings */
89
90
font->cid_to_ucs_len = 256;
91
font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof *font->cid_to_ucs);
92
font->size += 256 * sizeof *font->cid_to_ucs;
93
94
for (cpt = 0; cpt < 256; cpt++)
95
{
96
if (strings[cpt])
97
font->cid_to_ucs[cpt] = pdf_lookup_agl(strings[cpt]);
98
else
99
font->cid_to_ucs[cpt] = '?';
100
}
101
}
102
103
if (!font->to_unicode && !font->cid_to_ucs)
104
{
105
/* TODO: synthesize a ToUnicode if it's a freetype font with
106
* cmap and/or post tables or if it has glyph names. */
107
}
108
}
109
110