Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7639 views
1
#include "mupdf/pdf.h"
2
3
static void
4
pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int own_res)
5
{
6
pdf_processor *proc_buffer = NULL;
7
pdf_processor *proc_filter = NULL;
8
pdf_obj *res = NULL;
9
pdf_obj *ref = NULL;
10
fz_buffer *buffer;
11
12
if (!obj)
13
return;
14
15
fz_var(res);
16
fz_var(ref);
17
fz_var(proc_buffer);
18
fz_var(proc_filter);
19
20
buffer = fz_new_buffer(ctx, 1024);
21
22
fz_try(ctx)
23
{
24
if (own_res)
25
{
26
pdf_obj *r = pdf_dict_get(ctx, obj, PDF_NAME_Resources);
27
if (r)
28
orig_res = r;
29
}
30
31
res = pdf_new_dict(ctx, doc, 1);
32
33
proc_buffer = pdf_new_buffer_processor(ctx, buffer);
34
proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);
35
36
pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);
37
38
pdf_update_stream(ctx, doc, obj, buffer, 0);
39
40
if (own_res)
41
{
42
ref = pdf_new_ref(ctx, doc, res);
43
pdf_dict_put(ctx, obj, PDF_NAME_Resources, ref);
44
}
45
}
46
fz_always(ctx)
47
{
48
pdf_drop_processor(ctx, proc_filter);
49
pdf_drop_processor(ctx, proc_buffer);
50
fz_drop_buffer(ctx, buffer);
51
pdf_drop_obj(ctx, res);
52
pdf_drop_obj(ctx, ref);
53
}
54
fz_catch(ctx)
55
{
56
fz_rethrow_message(ctx, "Failed while cleaning xobject");
57
}
58
}
59
60
static void
61
pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie)
62
{
63
pdf_processor *proc_buffer = NULL;
64
pdf_processor *proc_filter = NULL;
65
pdf_obj *res = NULL;
66
pdf_obj *ref = NULL;
67
pdf_obj *charprocs;
68
int i, l;
69
70
fz_var(res);
71
fz_var(ref);
72
fz_var(proc_buffer);
73
fz_var(proc_filter);
74
75
fz_try(ctx)
76
{
77
res = pdf_dict_get(ctx, obj, PDF_NAME_Resources);
78
if (res)
79
orig_res = res;
80
res = NULL;
81
82
res = pdf_new_dict(ctx, doc, 1);
83
84
charprocs = pdf_dict_get(ctx, obj, PDF_NAME_CharProcs);
85
l = pdf_dict_len(ctx, charprocs);
86
87
for (i = 0; i < l; i++)
88
{
89
pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
90
fz_buffer *buffer = fz_new_buffer(ctx, 1024);
91
fz_try(ctx)
92
{
93
proc_buffer = pdf_new_buffer_processor(ctx, buffer);
94
proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);
95
96
pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
97
98
pdf_update_stream(ctx, doc, val, buffer, 0);
99
}
100
fz_always(ctx)
101
{
102
pdf_drop_processor(ctx, proc_filter);
103
pdf_drop_processor(ctx, proc_buffer);
104
fz_drop_buffer(ctx, buffer);
105
}
106
fz_catch(ctx)
107
{
108
fz_rethrow(ctx);
109
}
110
}
111
112
/* ProcSet - no cleaning possible. Inherit this from the old dict. */
113
pdf_dict_put(ctx, res, PDF_NAME_ProcSet, pdf_dict_get(ctx, orig_res, PDF_NAME_ProcSet));
114
115
ref = pdf_new_ref(ctx, doc, res);
116
pdf_dict_put(ctx, obj, PDF_NAME_Resources, ref);
117
}
118
fz_always(ctx)
119
{
120
pdf_drop_obj(ctx, res);
121
pdf_drop_obj(ctx, ref);
122
}
123
fz_catch(ctx)
124
{
125
fz_rethrow_message(ctx, "Failed while cleaning xobject");
126
}
127
}
128
129
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg)
130
{
131
pdf_processor *proc_buffer = NULL;
132
pdf_processor *proc_filter = NULL;
133
pdf_obj *new_obj = NULL;
134
pdf_obj *new_ref = NULL;
135
pdf_obj *res = NULL;
136
pdf_obj *ref = NULL;
137
pdf_obj *obj;
138
pdf_obj *contents;
139
fz_buffer *buffer;
140
141
fz_var(new_obj);
142
fz_var(new_ref);
143
fz_var(res);
144
fz_var(ref);
145
fz_var(proc_buffer);
146
fz_var(proc_filter);
147
148
buffer = fz_new_buffer(ctx, 1024);
149
150
fz_try(ctx)
151
{
152
res = pdf_new_dict(ctx, doc, 1);
153
154
proc_buffer = pdf_new_buffer_processor(ctx, buffer);
155
proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res);
156
157
pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie);
158
159
contents = page->contents;
160
if (pdf_is_array(ctx, contents))
161
{
162
/* create a new object to replace the array */
163
new_obj = pdf_new_dict(ctx, doc, 1);
164
new_ref = pdf_new_ref(ctx, doc, new_obj);
165
page->contents = contents = new_ref;
166
}
167
else
168
{
169
pdf_dict_del(ctx, contents, PDF_NAME_Filter);
170
pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms);
171
}
172
173
/* Now deal with resources. The spec allows for Type3 fonts and form
174
* XObjects to omit a resource dictionary and look in the parent.
175
* Avoid that by flattening here as part of the cleaning. This could
176
* conceivably cause changes in rendering, but we don't care. */
177
178
/* ExtGState */
179
obj = pdf_dict_get(ctx, res, PDF_NAME_ExtGState);
180
if (obj)
181
{
182
int i, l;
183
184
l = pdf_dict_len(ctx, obj);
185
for (i = 0; i < l; i++)
186
{
187
pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask);
188
189
if (!o)
190
continue;
191
o = pdf_dict_get(ctx, o, PDF_NAME_G);
192
if (!o)
193
continue;
194
195
/* Transparency group XObject */
196
pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1);
197
}
198
}
199
200
/* ColorSpace - no cleaning possible */
201
202
/* Pattern */
203
obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern);
204
if (obj)
205
{
206
int i, l;
207
208
l = pdf_dict_len(ctx, obj);
209
for (i = 0; i < l; i++)
210
{
211
pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
212
213
if (!pat)
214
continue;
215
if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1)
216
pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0);
217
}
218
}
219
220
/* Shading - no cleaning possible */
221
222
/* XObject */
223
obj = pdf_dict_get(ctx, res, PDF_NAME_XObject);
224
if (obj)
225
{
226
int i, l;
227
228
l = pdf_dict_len(ctx, obj);
229
for (i = 0; i < l; i++)
230
{
231
pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
232
233
if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
234
continue;
235
236
pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1);
237
}
238
}
239
240
/* Font */
241
obj = pdf_dict_get(ctx, res, PDF_NAME_Font);
242
if (obj)
243
{
244
int i, l;
245
246
l = pdf_dict_len(ctx, obj);
247
for (i = 0; i < l; i++)
248
{
249
pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
250
251
if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype)))
252
{
253
pdf_clean_type3(ctx, doc, o, page->resources, cookie);
254
}
255
}
256
}
257
258
/* ProcSet - no cleaning possible. Inherit this from the old dict. */
259
obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet);
260
if (obj)
261
pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj);
262
263
/* Properties - no cleaning possible. */
264
265
if (proc_fn)
266
(*proc_fn)(ctx, buffer, res, proc_arg);
267
268
pdf_update_stream(ctx, doc, contents, buffer, 0);
269
pdf_drop_obj(ctx, page->resources);
270
ref = pdf_new_ref(ctx, doc, res);
271
page->resources = pdf_keep_obj(ctx, ref);
272
pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref);
273
}
274
fz_always(ctx)
275
{
276
pdf_drop_processor(ctx, proc_filter);
277
pdf_drop_processor(ctx, proc_buffer);
278
fz_drop_buffer(ctx, buffer);
279
pdf_drop_obj(ctx, new_obj);
280
pdf_drop_obj(ctx, new_ref);
281
pdf_drop_obj(ctx, res);
282
pdf_drop_obj(ctx, ref);
283
}
284
fz_catch(ctx)
285
{
286
fz_rethrow_message(ctx, "Failed while cleaning page");
287
}
288
}
289
290