Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7643 views
1
#include "mupdf/pdf.h"
2
3
/* #define DEBUG_LINEARIZATION */
4
/* #define DEBUG_HEAP_SORT */
5
/* #define DEBUG_WRITING */
6
7
typedef struct pdf_write_options_s pdf_write_options;
8
9
/*
10
As part of linearization, we need to keep a list of what objects are used
11
by what page. We do this by recording the objects used in a given page
12
in a page_objects structure. We have a list of these structures (one per
13
page) in the page_objects_list structure.
14
15
The page_objects structure maintains a heap in the object array, so
16
insertion takes log n time, and we can heapsort and dedupe at the end for
17
a total worse case n log n time.
18
19
The magic heap invariant is that:
20
entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2]
21
or equivalently:
22
entry[(n-1)>>1] >= entry[n]
23
24
For a discussion of the heap data structure (and heapsort) see Kingston,
25
"Algorithms and Data Structures".
26
*/
27
28
typedef struct {
29
int num_shared;
30
int page_object_number;
31
int num_objects;
32
int min_ofs;
33
int max_ofs;
34
/* Extensible list of objects used on this page */
35
int cap;
36
int len;
37
int object[1];
38
} page_objects;
39
40
typedef struct {
41
int cap;
42
int len;
43
page_objects *page[1];
44
} page_objects_list;
45
46
struct pdf_write_options_s
47
{
48
FILE *out;
49
int do_incremental;
50
int do_ascii;
51
int do_expand;
52
int do_garbage;
53
int do_linear;
54
int do_clean;
55
int *use_list;
56
int *ofs_list;
57
int *gen_list;
58
int *renumber_map;
59
int continue_on_error;
60
int *errors;
61
/* The following extras are required for linearization */
62
int *rev_renumber_map;
63
int *rev_gen_list;
64
int start;
65
int first_xref_offset;
66
int main_xref_offset;
67
int first_xref_entry_offset;
68
int file_len;
69
int hints_shared_offset;
70
int hintstream_len;
71
pdf_obj *linear_l;
72
pdf_obj *linear_h0;
73
pdf_obj *linear_h1;
74
pdf_obj *linear_o;
75
pdf_obj *linear_e;
76
pdf_obj *linear_n;
77
pdf_obj *linear_t;
78
pdf_obj *hints_s;
79
pdf_obj *hints_length;
80
int page_count;
81
page_objects_list *page_object_lists;
82
};
83
84
/*
85
* Constants for use with use_list.
86
*
87
* If use_list[num] = 0, then object num is unused.
88
* If use_list[num] & PARAMS, then object num is the linearisation params obj.
89
* If use_list[num] & CATALOGUE, then object num is used by the catalogue.
90
* If use_list[num] & PAGE1, then object num is used by page 1.
91
* If use_list[num] & SHARED, then object num is shared between pages.
92
* If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
93
* If use_list[num] & OTHER_OBJECTS then this must should appear in section 9.
94
* Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
95
*/
96
enum
97
{
98
USE_CATALOGUE = 2,
99
USE_PAGE1 = 4,
100
USE_SHARED = 8,
101
USE_PARAMS = 16,
102
USE_HINTS = 32,
103
USE_PAGE_OBJECT = 64,
104
USE_OTHER_OBJECTS = 128,
105
USE_PAGE_MASK = ~255,
106
USE_PAGE_SHIFT = 8
107
};
108
109
/*
110
* page_objects and page_object_list handling functions
111
*/
112
static page_objects_list *
113
page_objects_list_create(fz_context *ctx)
114
{
115
page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol));
116
117
pol->cap = 1;
118
pol->len = 0;
119
return pol;
120
}
121
122
static void
123
page_objects_list_destroy(fz_context *ctx, page_objects_list *pol)
124
{
125
int i;
126
127
if (!pol)
128
return;
129
for (i = 0; i < pol->len; i++)
130
{
131
fz_free(ctx, pol->page[i]);
132
}
133
fz_free(ctx, pol);
134
}
135
136
static void
137
page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap)
138
{
139
int oldcap = (*pol)->cap;
140
if (newcap <= oldcap)
141
return;
142
*pol = fz_resize_array(ctx, *pol, 1, sizeof(page_objects_list) + (newcap-1)*sizeof(page_objects *));
143
memset(&(*pol)->page[oldcap], 0, (newcap-oldcap)*sizeof(page_objects *));
144
(*pol)->cap = newcap;
145
}
146
147
static page_objects *
148
page_objects_create(fz_context *ctx)
149
{
150
int initial_cap = 8;
151
page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int));
152
153
po->cap = initial_cap;
154
po->len = 0;
155
return po;
156
157
}
158
159
static void
160
page_objects_insert(fz_context *ctx, page_objects **ppo, int i)
161
{
162
page_objects *po;
163
164
/* Make a page_objects if we don't have one */
165
if (*ppo == NULL)
166
*ppo = page_objects_create(ctx);
167
168
po = *ppo;
169
/* page_objects insertion: extend the page_objects by 1, and put us on the end */
170
if (po->len == po->cap)
171
{
172
po = fz_resize_array(ctx, po, 1, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int));
173
po->cap *= 2;
174
*ppo = po;
175
}
176
po->object[po->len++] = i;
177
}
178
179
static void
180
page_objects_list_insert(fz_context *ctx, pdf_write_options *opts, int page, int object)
181
{
182
page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
183
if (opts->page_object_lists->len < page+1)
184
opts->page_object_lists->len = page+1;
185
page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
186
}
187
188
static void
189
page_objects_list_set_page_object(fz_context *ctx, pdf_write_options *opts, int page, int object)
190
{
191
page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
192
opts->page_object_lists->page[page]->page_object_number = object;
193
}
194
195
static void
196
page_objects_sort(fz_context *ctx, page_objects *po)
197
{
198
int i, j;
199
int n = po->len;
200
201
/* Step 1: Make a heap */
202
/* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
203
for (i = 1; i < n; i++)
204
{
205
/* Now bubble backwards to maintain heap invariant */
206
j = i;
207
while (j != 0)
208
{
209
int tmp;
210
int k = (j-1)>>1;
211
if (po->object[k] >= po->object[j])
212
break;
213
tmp = po->object[k];
214
po->object[k] = po->object[j];
215
po->object[j] = tmp;
216
j = k;
217
}
218
}
219
220
/* Step 2: Heap sort */
221
/* Invariant: valid heap in [0..i), sorted list in [i..n) */
222
/* Initially: i = n */
223
for (i = n-1; i > 0; i--)
224
{
225
/* Swap the maximum (0th) element from the page_objects into its place
226
* in the sorted list (position i). */
227
int tmp = po->object[0];
228
po->object[0] = po->object[i];
229
po->object[i] = tmp;
230
/* Now, the page_objects is invalid because the 0th element is out
231
* of place. Bubble it until the page_objects is valid. */
232
j = 0;
233
while (1)
234
{
235
/* Children are k and k+1 */
236
int k = (j+1)*2-1;
237
/* If both children out of the page_objects, we're done */
238
if (k > i-1)
239
break;
240
/* If both are in the page_objects, pick the larger one */
241
if (k < i-1 && po->object[k] < po->object[k+1])
242
k++;
243
/* If j is bigger than k (i.e. both of it's children),
244
* we're done */
245
if (po->object[j] > po->object[k])
246
break;
247
tmp = po->object[k];
248
po->object[k] = po->object[j];
249
po->object[j] = tmp;
250
j = k;
251
}
252
}
253
}
254
255
static int
256
order_ge(int ui, int uj)
257
{
258
/*
259
For linearization, we need to order the sections as follows:
260
261
Remaining pages (Part 7)
262
Shared objects (Part 8)
263
Objects not associated with any page (Part 9)
264
Any "other" objects
265
(Header)(Part 1)
266
(Linearization params) (Part 2)
267
(1st page Xref/Trailer) (Part 3)
268
Catalogue (and other document level objects) (Part 4)
269
First page (Part 6)
270
(Primary Hint stream) (*) (Part 5)
271
Any free objects
272
273
Note, this is NOT the same order they appear in
274
the final file!
275
276
(*) The PDF reference gives us the option of putting the hint stream
277
after the first page, and we take it, for simplicity.
278
*/
279
280
/* If the 2 objects are in the same section, then page object comes first. */
281
if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0)
282
return ((ui & USE_PAGE_OBJECT) == 0);
283
/* Put unused objects last */
284
else if (ui == 0)
285
return 1;
286
else if (uj == 0)
287
return 0;
288
/* Put the hint stream before that... */
289
else if (ui & USE_HINTS)
290
return 1;
291
else if (uj & USE_HINTS)
292
return 0;
293
/* Put page 1 before that... */
294
else if (ui & USE_PAGE1)
295
return 1;
296
else if (uj & USE_PAGE1)
297
return 0;
298
/* Put the catalogue before that... */
299
else if (ui & USE_CATALOGUE)
300
return 1;
301
else if (uj & USE_CATALOGUE)
302
return 0;
303
/* Put the linearization params before that... */
304
else if (ui & USE_PARAMS)
305
return 1;
306
else if (uj & USE_PARAMS)
307
return 0;
308
/* Put other objects before that */
309
else if (ui & USE_OTHER_OBJECTS)
310
return 1;
311
else if (uj & USE_OTHER_OBJECTS)
312
return 0;
313
/* Put objects not associated with any page (anything
314
* not touched by the catalogue) before that... */
315
else if (ui == 0)
316
return 1;
317
else if (uj == 0)
318
return 0;
319
/* Put shared objects before that... */
320
else if (ui & USE_SHARED)
321
return 1;
322
else if (uj & USE_SHARED)
323
return 0;
324
/* And otherwise, order by the page number on which
325
* they are used. */
326
return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
327
}
328
329
static void
330
heap_sort(int *list, int n, const int *val, int (*ge)(int, int))
331
{
332
int i, j;
333
334
#ifdef DEBUG_HEAP_SORT
335
fprintf(stderr, "Initially:\n");
336
for (i=0; i < n; i++)
337
{
338
fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
339
}
340
#endif
341
/* Step 1: Make a heap */
342
/* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
343
for (i = 1; i < n; i++)
344
{
345
/* Now bubble backwards to maintain heap invariant */
346
j = i;
347
while (j != 0)
348
{
349
int tmp;
350
int k = (j-1)>>1;
351
if (ge(val[list[k]], val[list[j]]))
352
break;
353
tmp = list[k];
354
list[k] = list[j];
355
list[j] = tmp;
356
j = k;
357
}
358
}
359
#ifdef DEBUG_HEAP_SORT
360
fprintf(stderr, "Valid heap:\n");
361
for (i=0; i < n; i++)
362
{
363
int k;
364
fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
365
k = (i+1)*2-1;
366
if (k < n)
367
{
368
if (ge(val[list[i]], val[list[k]]))
369
fprintf(stderr, "OK ");
370
else
371
fprintf(stderr, "BAD ");
372
}
373
if (k+1 < n)
374
{
375
if (ge(val[list[i]], val[list[k+1]]))
376
fprintf(stderr, "OK\n");
377
else
378
fprintf(stderr, "BAD\n");
379
}
380
else
381
fprintf(stderr, "\n");
382
}
383
#endif
384
385
/* Step 2: Heap sort */
386
/* Invariant: valid heap in [0..i), sorted list in [i..n) */
387
/* Initially: i = n */
388
for (i = n-1; i > 0; i--)
389
{
390
/* Swap the maximum (0th) element from the page_objects into its place
391
* in the sorted list (position i). */
392
int tmp = list[0];
393
list[0] = list[i];
394
list[i] = tmp;
395
/* Now, the page_objects is invalid because the 0th element is out
396
* of place. Bubble it until the page_objects is valid. */
397
j = 0;
398
while (1)
399
{
400
/* Children are k and k+1 */
401
int k = (j+1)*2-1;
402
/* If both children out of the page_objects, we're done */
403
if (k > i-1)
404
break;
405
/* If both are in the page_objects, pick the larger one */
406
if (k < i-1 && ge(val[list[k+1]], val[list[k]]))
407
k++;
408
/* If j is bigger than k (i.e. both of it's children),
409
* we're done */
410
if (ge(val[list[j]], val[list[k]]))
411
break;
412
tmp = list[k];
413
list[k] = list[j];
414
list[j] = tmp;
415
j = k;
416
}
417
}
418
#ifdef DEBUG_HEAP_SORT
419
fprintf(stderr, "Sorted:\n");
420
for (i=0; i < n; i++)
421
{
422
fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
423
if (i+1 < n)
424
{
425
if (ge(val[list[i+1]], val[list[i]]))
426
fprintf(stderr, "OK");
427
else
428
fprintf(stderr, "BAD");
429
}
430
fprintf(stderr, "\n");
431
}
432
#endif
433
}
434
435
static void
436
page_objects_dedupe(fz_context *ctx, page_objects *po)
437
{
438
int i, j;
439
int n = po->len-1;
440
441
for (i = 0; i < n; i++)
442
{
443
if (po->object[i] == po->object[i+1])
444
break;
445
}
446
j = i; /* j points to the last valid one */
447
i++; /* i points to the first one we haven't looked at */
448
for (; i < n; i++)
449
{
450
if (po->object[j] != po->object[i])
451
po->object[++j] = po->object[i];
452
}
453
po->len = j+1;
454
}
455
456
static void
457
page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol)
458
{
459
int i;
460
int n = pol->len;
461
462
for (i = 0; i < n; i++)
463
{
464
page_objects_sort(ctx, pol->page[i]);
465
page_objects_dedupe(ctx, pol->page[i]);
466
}
467
}
468
469
#ifdef DEBUG_LINEARIZATION
470
static void
471
page_objects_dump(pdf_write_options *opts)
472
{
473
page_objects_list *pol = opts->page_object_lists;
474
int i, j;
475
476
for (i = 0; i < pol->len; i++)
477
{
478
page_objects *p = pol->page[i];
479
fprintf(stderr, "Page %d\n", i+1);
480
for (j = 0; j < p->len; j++)
481
{
482
int o = p->object[j];
483
fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]);
484
}
485
fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
486
fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
487
fprintf(stderr, "Page object number=%d\n", p->page_object_number);
488
}
489
}
490
491
static void
492
objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
493
{
494
int i;
495
496
for (i=0; i < pdf_xref_len(ctx, doc); i++)
497
{
498
fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], opts->ofs_list[i]);
499
}
500
}
501
#endif
502
503
/*
504
* Garbage collect objects not reachable from the trailer.
505
*/
506
507
/* Mark a reference. If it's been marked already, return NULL (as no further
508
* processing is required). If it's not, return the resolved object so
509
* that we can continue our recursive marking. If it's a duff reference
510
* return the fact so that we can remove the reference at source.
511
*/
512
static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj, int *duff)
513
{
514
int num = pdf_to_num(ctx, obj);
515
int gen = pdf_to_gen(ctx, obj);
516
517
if (num <= 0 || num >= pdf_xref_len(ctx, doc))
518
{
519
*duff = 1;
520
return NULL;
521
}
522
*duff = 0;
523
if (opts->use_list[num])
524
return NULL;
525
526
opts->use_list[num] = 1;
527
528
/* Bake in /Length in stream objects */
529
fz_try(ctx)
530
{
531
if (pdf_is_stream(ctx, doc, num, gen))
532
{
533
pdf_obj *len = pdf_dict_get(ctx, obj, PDF_NAME_Length);
534
if (pdf_is_indirect(ctx, len))
535
{
536
opts->use_list[pdf_to_num(ctx, len)] = 0;
537
len = pdf_resolve_indirect(ctx, len);
538
pdf_dict_put(ctx, obj, PDF_NAME_Length, len);
539
}
540
}
541
}
542
fz_catch(ctx)
543
{
544
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
545
/* Leave broken */
546
}
547
548
obj = pdf_resolve_indirect(ctx, obj);
549
if (obj == NULL || pdf_is_null(ctx, obj))
550
{
551
*duff = 1;
552
opts->use_list[num] = 0;
553
}
554
555
return obj;
556
}
557
558
/* Recursively mark an object. If any references found are duff, then
559
* replace them with nulls. */
560
static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj)
561
{
562
int i;
563
564
if (pdf_is_indirect(ctx, obj))
565
{
566
int duff;
567
obj = markref(ctx, doc, opts, obj, &duff);
568
if (duff)
569
return 1;
570
}
571
572
if (pdf_is_dict(ctx, obj))
573
{
574
int n = pdf_dict_len(ctx, obj);
575
for (i = 0; i < n; i++)
576
if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
577
pdf_dict_put_val_drop(ctx, obj, i, pdf_new_null(ctx, doc));
578
}
579
580
else if (pdf_is_array(ctx, obj))
581
{
582
int n = pdf_array_len(ctx, obj);
583
for (i = 0; i < n; i++)
584
if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
585
pdf_array_put_drop(ctx, obj, i, pdf_new_null(ctx, doc));
586
}
587
588
return 0;
589
}
590
591
/*
592
* Scan for and remove duplicate objects (slow)
593
*/
594
595
static void removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
596
{
597
int num, other;
598
int xref_len = pdf_xref_len(ctx, doc);
599
600
for (num = 1; num < xref_len; num++)
601
{
602
/* Only compare an object to objects preceding it */
603
for (other = 1; other < num; other++)
604
{
605
pdf_obj *a, *b;
606
int differ, newnum, streama, streamb;
607
608
if (num == other || !opts->use_list[num] || !opts->use_list[other])
609
continue;
610
611
/*
612
* Comparing stream objects data contents would take too long.
613
*
614
* pdf_is_stream calls pdf_cache_object and ensures
615
* that the xref table has the objects loaded.
616
*/
617
fz_try(ctx)
618
{
619
streama = pdf_is_stream(ctx, doc, num, 0);
620
streamb = pdf_is_stream(ctx, doc, other, 0);
621
differ = streama || streamb;
622
if (streama && streamb && opts->do_garbage >= 4)
623
differ = 0;
624
}
625
fz_catch(ctx)
626
{
627
/* Assume different */
628
differ = 1;
629
}
630
if (differ)
631
continue;
632
633
a = pdf_get_xref_entry(ctx, doc, num)->obj;
634
b = pdf_get_xref_entry(ctx, doc, other)->obj;
635
636
a = pdf_resolve_indirect(ctx, a);
637
b = pdf_resolve_indirect(ctx, b);
638
639
if (pdf_objcmp(ctx, a, b))
640
continue;
641
642
if (streama && streamb)
643
{
644
/* Check to see if streams match too. */
645
fz_buffer *sa = NULL;
646
fz_buffer *sb = NULL;
647
648
fz_var(sa);
649
fz_var(sb);
650
651
differ = 1;
652
fz_try(ctx)
653
{
654
unsigned char *dataa, *datab;
655
int lena, lenb;
656
sa = pdf_load_raw_renumbered_stream(ctx, doc, num, 0, num, 0);
657
sb = pdf_load_raw_renumbered_stream(ctx, doc, other, 0, other, 0);
658
lena = fz_buffer_storage(ctx, sa, &dataa);
659
lenb = fz_buffer_storage(ctx, sb, &datab);
660
if (lena == lenb && memcmp(dataa, datab, lena) == 0)
661
differ = 0;
662
}
663
fz_always(ctx)
664
{
665
fz_drop_buffer(ctx, sa);
666
fz_drop_buffer(ctx, sb);
667
}
668
fz_catch(ctx)
669
{
670
fz_rethrow(ctx);
671
}
672
if (differ)
673
continue;
674
}
675
676
/* Keep the lowest numbered object */
677
newnum = fz_mini(num, other);
678
opts->renumber_map[num] = newnum;
679
opts->renumber_map[other] = newnum;
680
opts->rev_renumber_map[newnum] = num; /* Either will do */
681
opts->use_list[fz_maxi(num, other)] = 0;
682
683
/* One duplicate was found, do not look for another */
684
break;
685
}
686
}
687
}
688
689
/*
690
* Renumber objects sequentially so the xref is more compact
691
*
692
* This code assumes that any opts->renumber_map[n] <= n for all n.
693
*/
694
695
static void compactxref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
696
{
697
int num, newnum;
698
int xref_len = pdf_xref_len(ctx, doc);
699
700
/*
701
* Update renumber_map in-place, clustering all used
702
* objects together at low object ids. Objects that
703
* already should be renumbered will have their new
704
* object ids be updated to reflect the compaction.
705
*/
706
707
newnum = 1;
708
for (num = 1; num < xref_len; num++)
709
{
710
/* If it's not used, map it to zero */
711
if (!opts->use_list[opts->renumber_map[num]])
712
{
713
opts->renumber_map[num] = 0;
714
}
715
/* If it's not moved, compact it. */
716
else if (opts->renumber_map[num] == num)
717
{
718
opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
719
opts->rev_gen_list[newnum] = opts->rev_gen_list[num];
720
opts->renumber_map[num] = newnum++;
721
}
722
/* Otherwise it's used, and moved. We know that it must have
723
* moved down, so the place it's moved to will be in the right
724
* place already. */
725
else
726
{
727
opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
728
}
729
}
730
}
731
732
/*
733
* Update indirect objects according to renumbering established when
734
* removing duplicate objects and compacting the xref.
735
*/
736
737
static void renumberobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj)
738
{
739
int i;
740
int xref_len = pdf_xref_len(ctx, doc);
741
742
if (pdf_is_dict(ctx, obj))
743
{
744
int n = pdf_dict_len(ctx, obj);
745
for (i = 0; i < n; i++)
746
{
747
pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
748
pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
749
if (pdf_is_indirect(ctx, val))
750
{
751
int o = pdf_to_num(ctx, val);
752
if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
753
val = pdf_new_null(ctx, doc);
754
else
755
val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
756
pdf_dict_put(ctx, obj, key, val);
757
pdf_drop_obj(ctx, val);
758
}
759
else
760
{
761
renumberobj(ctx, doc, opts, val);
762
}
763
}
764
}
765
766
else if (pdf_is_array(ctx, obj))
767
{
768
int n = pdf_array_len(ctx, obj);
769
for (i = 0; i < n; i++)
770
{
771
pdf_obj *val = pdf_array_get(ctx, obj, i);
772
if (pdf_is_indirect(ctx, val))
773
{
774
int o = pdf_to_num(ctx, val);
775
if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
776
val = pdf_new_null(ctx, doc);
777
else
778
val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
779
pdf_array_put(ctx, obj, i, val);
780
pdf_drop_obj(ctx, val);
781
}
782
else
783
{
784
renumberobj(ctx, doc, opts, val);
785
}
786
}
787
}
788
}
789
790
static void renumberobjs(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
791
{
792
pdf_xref_entry *newxref = NULL;
793
int newlen;
794
int num;
795
int *new_use_list;
796
int xref_len = pdf_xref_len(ctx, doc);
797
798
new_use_list = fz_calloc(ctx, pdf_xref_len(ctx, doc)+3, sizeof(int));
799
800
fz_var(newxref);
801
fz_try(ctx)
802
{
803
/* Apply renumber map to indirect references in all objects in xref */
804
renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
805
for (num = 0; num < xref_len; num++)
806
{
807
pdf_obj *obj;
808
int to = opts->renumber_map[num];
809
810
/* If object is going to be dropped, don't bother renumbering */
811
if (to == 0)
812
continue;
813
814
obj = pdf_get_xref_entry(ctx, doc, num)->obj;
815
816
if (pdf_is_indirect(ctx, obj))
817
{
818
obj = pdf_new_indirect(ctx, doc, to, 0);
819
pdf_update_object(ctx, doc, num, obj);
820
pdf_drop_obj(ctx, obj);
821
}
822
else
823
{
824
renumberobj(ctx, doc, opts, obj);
825
}
826
}
827
828
/* Create new table for the reordered, compacted xref */
829
newxref = fz_malloc_array(ctx, xref_len + 3, sizeof(pdf_xref_entry));
830
newxref[0] = *pdf_get_xref_entry(ctx, doc, 0);
831
832
/* Move used objects into the new compacted xref */
833
newlen = 0;
834
for (num = 1; num < xref_len; num++)
835
{
836
if (opts->use_list[num])
837
{
838
pdf_xref_entry *e;
839
if (newlen < opts->renumber_map[num])
840
newlen = opts->renumber_map[num];
841
e = pdf_get_xref_entry(ctx, doc, num);
842
newxref[opts->renumber_map[num]] = *e;
843
if (e->obj)
844
{
845
pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
846
e->obj = NULL;
847
}
848
new_use_list[opts->renumber_map[num]] = opts->use_list[num];
849
}
850
else
851
{
852
pdf_xref_entry *e = pdf_get_xref_entry(ctx, doc, num);
853
pdf_drop_obj(ctx, e->obj);
854
e->obj = NULL;
855
}
856
}
857
858
pdf_replace_xref(ctx, doc, newxref, newlen + 1);
859
newxref = NULL;
860
}
861
fz_catch(ctx)
862
{
863
fz_free(ctx, newxref);
864
fz_free(ctx, new_use_list);
865
fz_rethrow(ctx);
866
}
867
fz_free(ctx, opts->use_list);
868
opts->use_list = new_use_list;
869
870
for (num = 1; num < xref_len; num++)
871
{
872
opts->renumber_map[num] = num;
873
}
874
}
875
876
static void page_objects_list_renumber(pdf_write_options *opts)
877
{
878
int i, j;
879
880
for (i = 0; i < opts->page_object_lists->len; i++)
881
{
882
page_objects *po = opts->page_object_lists->page[i];
883
for (j = 0; j < po->len; j++)
884
{
885
po->object[j] = opts->renumber_map[po->object[j]];
886
}
887
po->page_object_number = opts->renumber_map[po->page_object_number];
888
}
889
}
890
891
static void
892
mark_all(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *val, int flag, int page)
893
{
894
895
if (pdf_mark_obj(ctx, val))
896
return;
897
898
fz_try(ctx)
899
{
900
if (pdf_is_indirect(ctx, val))
901
{
902
int num = pdf_to_num(ctx, val);
903
if (opts->use_list[num] & USE_PAGE_MASK)
904
/* Already used */
905
opts->use_list[num] |= USE_SHARED;
906
else
907
opts->use_list[num] |= flag;
908
if (page >= 0)
909
page_objects_list_insert(ctx, opts, page, num);
910
}
911
912
if (pdf_is_dict(ctx, val))
913
{
914
int i, n = pdf_dict_len(ctx, val);
915
916
for (i = 0; i < n; i++)
917
{
918
mark_all(ctx, doc, opts, pdf_dict_get_val(ctx, val, i), flag, page);
919
}
920
}
921
else if (pdf_is_array(ctx, val))
922
{
923
int i, n = pdf_array_len(ctx, val);
924
925
for (i = 0; i < n; i++)
926
{
927
mark_all(ctx, doc, opts, pdf_array_get(ctx, val, i), flag, page);
928
}
929
}
930
}
931
fz_always(ctx)
932
{
933
pdf_unmark_obj(ctx, val);
934
}
935
fz_catch(ctx)
936
{
937
fz_rethrow(ctx);
938
}
939
}
940
941
static int
942
mark_pages(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *val, int pagenum)
943
{
944
945
if (pdf_mark_obj(ctx, val))
946
return pagenum;
947
948
fz_try(ctx)
949
{
950
if (pdf_is_dict(ctx, val))
951
{
952
if (pdf_name_eq(ctx, PDF_NAME_Page, pdf_dict_get(ctx, val, PDF_NAME_Type)))
953
{
954
int num = pdf_to_num(ctx, val);
955
pdf_unmark_obj(ctx, val);
956
mark_all(ctx, doc, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
957
page_objects_list_set_page_object(ctx, opts, pagenum, num);
958
pagenum++;
959
opts->use_list[num] |= USE_PAGE_OBJECT;
960
}
961
else
962
{
963
int i, n = pdf_dict_len(ctx, val);
964
965
for (i = 0; i < n; i++)
966
{
967
pdf_obj *key = pdf_dict_get_key(ctx, val, i);
968
pdf_obj *obj = pdf_dict_get_val(ctx, val, i);
969
970
if (pdf_name_eq(ctx, PDF_NAME_Kids, key))
971
pagenum = mark_pages(ctx, doc, opts, obj, pagenum);
972
else
973
mark_all(ctx, doc, opts, obj, USE_CATALOGUE, -1);
974
}
975
976
if (pdf_is_indirect(ctx, val))
977
{
978
int num = pdf_to_num(ctx, val);
979
opts->use_list[num] |= USE_CATALOGUE;
980
}
981
}
982
}
983
else if (pdf_is_array(ctx, val))
984
{
985
int i, n = pdf_array_len(ctx, val);
986
987
for (i = 0; i < n; i++)
988
{
989
pagenum = mark_pages(ctx, doc, opts, pdf_array_get(ctx, val, i), pagenum);
990
}
991
if (pdf_is_indirect(ctx, val))
992
{
993
int num = pdf_to_num(ctx, val);
994
opts->use_list[num] |= USE_CATALOGUE;
995
}
996
}
997
}
998
fz_always(ctx)
999
{
1000
pdf_unmark_obj(ctx, val);
1001
}
1002
fz_catch(ctx)
1003
{
1004
fz_rethrow(ctx);
1005
}
1006
return pagenum;
1007
}
1008
1009
static void
1010
mark_root(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *dict)
1011
{
1012
int i, n = pdf_dict_len(ctx, dict);
1013
1014
if (pdf_mark_obj(ctx, dict))
1015
return;
1016
1017
fz_try(ctx)
1018
{
1019
if (pdf_is_indirect(ctx, dict))
1020
{
1021
int num = pdf_to_num(ctx, dict);
1022
opts->use_list[num] |= USE_CATALOGUE;
1023
}
1024
1025
for (i = 0; i < n; i++)
1026
{
1027
pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1028
pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1029
1030
if (pdf_name_eq(ctx, PDF_NAME_Pages, key))
1031
opts->page_count = mark_pages(ctx, doc, opts, val, 0);
1032
else if (pdf_name_eq(ctx, PDF_NAME_Names, key))
1033
mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1034
else if (pdf_name_eq(ctx, PDF_NAME_Dests, key))
1035
mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1036
else if (pdf_name_eq(ctx, PDF_NAME_Outlines, key))
1037
{
1038
int section;
1039
/* Look at PageMode to decide whether to
1040
* USE_OTHER_OBJECTS or USE_PAGE1 here. */
1041
if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME_PageMode), PDF_NAME_UseOutlines))
1042
section = USE_PAGE1;
1043
else
1044
section = USE_OTHER_OBJECTS;
1045
mark_all(ctx, doc, opts, val, section, -1);
1046
}
1047
else
1048
mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1049
}
1050
}
1051
fz_always(ctx)
1052
{
1053
pdf_unmark_obj(ctx, dict);
1054
}
1055
fz_catch(ctx)
1056
{
1057
fz_rethrow(ctx);
1058
}
1059
}
1060
1061
static void
1062
mark_trailer(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *dict)
1063
{
1064
int i, n = pdf_dict_len(ctx, dict);
1065
1066
if (pdf_mark_obj(ctx, dict))
1067
return;
1068
1069
fz_try(ctx)
1070
{
1071
for (i = 0; i < n; i++)
1072
{
1073
pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1074
pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1075
1076
if (pdf_name_eq(ctx, PDF_NAME_Root, key))
1077
mark_root(ctx, doc, opts, val);
1078
else
1079
mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1080
}
1081
}
1082
fz_always(ctx)
1083
{
1084
pdf_unmark_obj(ctx, dict);
1085
}
1086
fz_catch(ctx)
1087
{
1088
fz_rethrow(ctx);
1089
}
1090
}
1091
1092
static void
1093
add_linearization_objs(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
1094
{
1095
pdf_obj *params_obj = NULL;
1096
pdf_obj *params_ref = NULL;
1097
pdf_obj *hint_obj = NULL;
1098
pdf_obj *hint_ref = NULL;
1099
pdf_obj *o = NULL;
1100
int params_num, hint_num;
1101
1102
fz_var(params_obj);
1103
fz_var(params_ref);
1104
fz_var(hint_obj);
1105
fz_var(hint_ref);
1106
fz_var(o);
1107
1108
fz_try(ctx)
1109
{
1110
/* Linearization params */
1111
params_obj = pdf_new_dict(ctx, doc, 10);
1112
params_ref = pdf_new_ref(ctx, doc, params_obj);
1113
params_num = pdf_to_num(ctx, params_ref);
1114
1115
opts->use_list[params_num] = USE_PARAMS;
1116
opts->renumber_map[params_num] = params_num;
1117
opts->rev_renumber_map[params_num] = params_num;
1118
opts->gen_list[params_num] = 0;
1119
opts->rev_gen_list[params_num] = 0;
1120
pdf_dict_put_drop(ctx, params_obj, PDF_NAME_Linearized, pdf_new_real(ctx, doc, 1.0));
1121
opts->linear_l = pdf_new_int(ctx, doc, INT_MIN);
1122
pdf_dict_put(ctx, params_obj, PDF_NAME_L, opts->linear_l);
1123
opts->linear_h0 = pdf_new_int(ctx, doc, INT_MIN);
1124
o = pdf_new_array(ctx, doc, 2);
1125
pdf_array_push(ctx, o, opts->linear_h0);
1126
opts->linear_h1 = pdf_new_int(ctx, doc, INT_MIN);
1127
pdf_array_push(ctx, o, opts->linear_h1);
1128
pdf_dict_put_drop(ctx, params_obj, PDF_NAME_H, o);
1129
o = NULL;
1130
opts->linear_o = pdf_new_int(ctx, doc, INT_MIN);
1131
pdf_dict_put(ctx, params_obj, PDF_NAME_O, opts->linear_o);
1132
opts->linear_e = pdf_new_int(ctx, doc, INT_MIN);
1133
pdf_dict_put(ctx, params_obj, PDF_NAME_E, opts->linear_e);
1134
opts->linear_n = pdf_new_int(ctx, doc, INT_MIN);
1135
pdf_dict_put(ctx, params_obj, PDF_NAME_N, opts->linear_n);
1136
opts->linear_t = pdf_new_int(ctx, doc, INT_MIN);
1137
pdf_dict_put(ctx, params_obj, PDF_NAME_T, opts->linear_t);
1138
1139
/* Primary hint stream */
1140
hint_obj = pdf_new_dict(ctx, doc, 10);
1141
hint_ref = pdf_new_ref(ctx, doc, hint_obj);
1142
hint_num = pdf_to_num(ctx, hint_ref);
1143
1144
opts->use_list[hint_num] = USE_HINTS;
1145
opts->renumber_map[hint_num] = hint_num;
1146
opts->rev_renumber_map[hint_num] = hint_num;
1147
opts->gen_list[hint_num] = 0;
1148
opts->rev_gen_list[hint_num] = 0;
1149
pdf_dict_put_drop(ctx, hint_obj, PDF_NAME_P, pdf_new_int(ctx, doc, 0));
1150
opts->hints_s = pdf_new_int(ctx, doc, INT_MIN);
1151
pdf_dict_put(ctx, hint_obj, PDF_NAME_S, opts->hints_s);
1152
/* FIXME: Do we have thumbnails? Do a T entry */
1153
/* FIXME: Do we have outlines? Do an O entry */
1154
/* FIXME: Do we have article threads? Do an A entry */
1155
/* FIXME: Do we have named destinations? Do a E entry */
1156
/* FIXME: Do we have interactive forms? Do a V entry */
1157
/* FIXME: Do we have document information? Do an I entry */
1158
/* FIXME: Do we have logical structure heirarchy? Do a C entry */
1159
/* FIXME: Do L, Page Label hint table */
1160
pdf_dict_put_drop(ctx, hint_obj, PDF_NAME_Filter, PDF_NAME_FlateDecode);
1161
opts->hints_length = pdf_new_int(ctx, doc, INT_MIN);
1162
pdf_dict_put(ctx, hint_obj, PDF_NAME_Length, opts->hints_length);
1163
pdf_get_xref_entry(ctx, doc, hint_num)->stm_ofs = -1;
1164
}
1165
fz_always(ctx)
1166
{
1167
pdf_drop_obj(ctx, params_obj);
1168
pdf_drop_obj(ctx, params_ref);
1169
pdf_drop_obj(ctx, hint_ref);
1170
pdf_drop_obj(ctx, hint_obj);
1171
pdf_drop_obj(ctx, o);
1172
}
1173
fz_catch(ctx)
1174
{
1175
fz_rethrow(ctx);
1176
}
1177
}
1178
1179
static void
1180
lpr_inherit_res_contents(fz_context *ctx, pdf_obj *res, pdf_obj *dict, pdf_obj *text)
1181
{
1182
pdf_obj *o, *r;
1183
int i, n;
1184
1185
/* If the parent node doesn't have an entry of this type, give up. */
1186
o = pdf_dict_get(ctx, dict, text);
1187
if (!o)
1188
return;
1189
1190
/* If the resources dict we are building doesn't have an entry of this
1191
* type yet, then just copy it (ensuring it's not a reference) */
1192
r = pdf_dict_get(ctx, res, text);
1193
if (r == NULL)
1194
{
1195
o = pdf_resolve_indirect(ctx, o);
1196
if (pdf_is_dict(ctx, o))
1197
o = pdf_copy_dict(ctx, o);
1198
else if (pdf_is_array(ctx, o))
1199
o = pdf_copy_array(ctx, o);
1200
else
1201
o = NULL;
1202
if (o)
1203
pdf_dict_put(ctx, res, text, o);
1204
return;
1205
}
1206
1207
/* Otherwise we need to merge o into r */
1208
if (pdf_is_dict(ctx, o))
1209
{
1210
n = pdf_dict_len(ctx, o);
1211
for (i = 0; i < n; i++)
1212
{
1213
pdf_obj *key = pdf_dict_get_key(ctx, o, i);
1214
pdf_obj *val = pdf_dict_get_val(ctx, o, i);
1215
1216
if (pdf_dict_get(ctx, res, key))
1217
continue;
1218
pdf_dict_put(ctx, res, key, val);
1219
}
1220
}
1221
}
1222
1223
static void
1224
lpr_inherit_res(fz_context *ctx, pdf_obj *node, int depth, pdf_obj *dict)
1225
{
1226
while (1)
1227
{
1228
pdf_obj *o;
1229
1230
node = pdf_dict_get(ctx, node, PDF_NAME_Parent);
1231
depth--;
1232
if (!node || depth < 0)
1233
break;
1234
1235
o = pdf_dict_get(ctx, node, PDF_NAME_Resources);
1236
if (o)
1237
{
1238
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_ExtGState);
1239
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_ColorSpace);
1240
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_Pattern);
1241
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_Shading);
1242
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_XObject);
1243
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_Font);
1244
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_ProcSet);
1245
lpr_inherit_res_contents(ctx, dict, o, PDF_NAME_Properties);
1246
}
1247
}
1248
}
1249
1250
static pdf_obj *
1251
lpr_inherit(fz_context *ctx, pdf_obj *node, char *text, int depth)
1252
{
1253
do
1254
{
1255
pdf_obj *o = pdf_dict_gets(ctx, node, text);
1256
1257
if (o)
1258
return pdf_resolve_indirect(ctx, o);
1259
node = pdf_dict_get(ctx, node, PDF_NAME_Parent);
1260
depth--;
1261
}
1262
while (depth >= 0 && node);
1263
1264
return NULL;
1265
}
1266
1267
static int
1268
lpr(fz_context *ctx, pdf_document *doc, pdf_obj *node, int depth, int page)
1269
{
1270
pdf_obj *kids;
1271
pdf_obj *o = NULL;
1272
int i, n;
1273
1274
if (pdf_mark_obj(ctx, node))
1275
return page;
1276
1277
fz_var(o);
1278
1279
fz_try(ctx)
1280
{
1281
if (pdf_name_eq(ctx, PDF_NAME_Page, pdf_dict_get(ctx, node, PDF_NAME_Type)))
1282
{
1283
pdf_obj *r; /* r is deliberately not cleaned up */
1284
1285
/* Copy resources down to the child */
1286
o = pdf_keep_obj(ctx, pdf_dict_get(ctx, node, PDF_NAME_Resources));
1287
if (!o)
1288
{
1289
o = pdf_keep_obj(ctx, pdf_new_dict(ctx, doc, 2));
1290
pdf_dict_put(ctx, node, PDF_NAME_Resources, o);
1291
}
1292
lpr_inherit_res(ctx, node, depth, o);
1293
r = lpr_inherit(ctx, node, "MediaBox", depth);
1294
if (r)
1295
pdf_dict_put(ctx, node, PDF_NAME_MediaBox, r);
1296
r = lpr_inherit(ctx, node, "CropBox", depth);
1297
if (r)
1298
pdf_dict_put(ctx, node, PDF_NAME_CropBox, r);
1299
r = lpr_inherit(ctx, node, "BleedBox", depth);
1300
if (r)
1301
pdf_dict_put(ctx, node, PDF_NAME_BleedBox, r);
1302
r = lpr_inherit(ctx, node, "TrimBox", depth);
1303
if (r)
1304
pdf_dict_put(ctx, node, PDF_NAME_TrimBox, r);
1305
r = lpr_inherit(ctx, node, "ArtBox", depth);
1306
if (r)
1307
pdf_dict_put(ctx, node, PDF_NAME_ArtBox, r);
1308
r = lpr_inherit(ctx, node, "Rotate", depth);
1309
if (r)
1310
pdf_dict_put(ctx, node, PDF_NAME_Rotate, r);
1311
page++;
1312
}
1313
else
1314
{
1315
kids = pdf_dict_get(ctx, node, PDF_NAME_Kids);
1316
n = pdf_array_len(ctx, kids);
1317
for(i = 0; i < n; i++)
1318
{
1319
page = lpr(ctx, doc, pdf_array_get(ctx, kids, i), depth+1, page);
1320
}
1321
pdf_dict_del(ctx, node, PDF_NAME_Resources);
1322
pdf_dict_del(ctx, node, PDF_NAME_MediaBox);
1323
pdf_dict_del(ctx, node, PDF_NAME_CropBox);
1324
pdf_dict_del(ctx, node, PDF_NAME_BleedBox);
1325
pdf_dict_del(ctx, node, PDF_NAME_TrimBox);
1326
pdf_dict_del(ctx, node, PDF_NAME_ArtBox);
1327
pdf_dict_del(ctx, node, PDF_NAME_Rotate);
1328
}
1329
}
1330
fz_always(ctx)
1331
{
1332
pdf_drop_obj(ctx, o);
1333
}
1334
fz_catch(ctx)
1335
{
1336
fz_rethrow(ctx);
1337
}
1338
1339
pdf_unmark_obj(ctx, node);
1340
1341
return page;
1342
}
1343
1344
void
1345
pdf_localise_page_resources(fz_context *ctx, pdf_document *doc)
1346
{
1347
if (doc->resources_localised)
1348
return;
1349
1350
lpr(ctx, doc, pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Pages, NULL), 0, 0);
1351
1352
doc->resources_localised = 1;
1353
}
1354
1355
static void
1356
linearize(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
1357
{
1358
int i;
1359
int n = pdf_xref_len(ctx, doc) + 2;
1360
int *reorder;
1361
int *rev_renumber_map;
1362
int *rev_gen_list;
1363
1364
opts->page_object_lists = page_objects_list_create(ctx);
1365
1366
/* Ensure that every page has local references of its resources */
1367
/* FIXME: We could 'thin' the resources according to what is actually
1368
* required for each page, but this would require us to run the page
1369
* content streams. */
1370
pdf_localise_page_resources(ctx, doc);
1371
1372
/* Walk the objects for each page, marking which ones are used, where */
1373
memset(opts->use_list, 0, n * sizeof(int));
1374
mark_trailer(ctx, doc, opts, pdf_trailer(ctx, doc));
1375
1376
/* Add new objects required for linearization */
1377
add_linearization_objs(ctx, doc, opts);
1378
1379
#ifdef DEBUG_WRITING
1380
fprintf(stderr, "Usage calculated:\n");
1381
for (i=0; i < pdf_xref_len(ctx, doc); i++)
1382
{
1383
fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]);
1384
}
1385
#endif
1386
1387
/* Allocate/init the structures used for renumbering the objects */
1388
reorder = fz_calloc(ctx, n, sizeof(int));
1389
rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
1390
rev_gen_list = fz_calloc(ctx, n, sizeof(int));
1391
for (i = 0; i < n; i++)
1392
{
1393
reorder[i] = i;
1394
}
1395
1396
/* Heap sort the reordering */
1397
heap_sort(reorder+1, n-1, opts->use_list, &order_ge);
1398
1399
#ifdef DEBUG_WRITING
1400
fprintf(stderr, "Reordered:\n");
1401
for (i=1; i < pdf_xref_len(ctx, doc); i++)
1402
{
1403
fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]);
1404
}
1405
#endif
1406
1407
/* Find the split point */
1408
for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++);
1409
opts->start = i;
1410
1411
/* Roll the reordering into the renumber_map */
1412
for (i = 0; i < n; i++)
1413
{
1414
opts->renumber_map[reorder[i]] = i;
1415
rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
1416
rev_gen_list[i] = opts->rev_gen_list[reorder[i]];
1417
}
1418
fz_free(ctx, opts->rev_renumber_map);
1419
fz_free(ctx, opts->rev_gen_list);
1420
opts->rev_renumber_map = rev_renumber_map;
1421
opts->rev_gen_list = rev_gen_list;
1422
fz_free(ctx, reorder);
1423
1424
/* Apply the renumber_map */
1425
page_objects_list_renumber(opts);
1426
renumberobjs(ctx, doc, opts);
1427
1428
page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
1429
}
1430
1431
static void
1432
update_linearization_params(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
1433
{
1434
int offset;
1435
pdf_set_int(ctx, opts->linear_l, opts->file_len);
1436
/* Primary hint stream offset (of object, not stream!) */
1437
pdf_set_int(ctx, opts->linear_h0, opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1438
/* Primary hint stream length (of object, not stream!) */
1439
offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1440
pdf_set_int(ctx, opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1441
/* Object number of first pages page object (the first object of page 0) */
1442
pdf_set_int(ctx, opts->linear_o, opts->page_object_lists->page[0]->object[0]);
1443
/* Offset of end of first page (first page is followed by primary
1444
* hint stream (object n-1) then remaining pages (object 1...). The
1445
* primary hint stream counts as part of the first pages data, I think.
1446
*/
1447
offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1448
pdf_set_int(ctx, opts->linear_e, offset);
1449
/* Number of pages in document */
1450
pdf_set_int(ctx, opts->linear_n, opts->page_count);
1451
/* Offset of first entry in main xref table */
1452
pdf_set_int(ctx, opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
1453
/* Offset of shared objects hint table in the primary hint stream */
1454
pdf_set_int(ctx, opts->hints_s, opts->hints_shared_offset);
1455
/* Primary hint stream length */
1456
pdf_set_int(ctx, opts->hints_length, opts->hintstream_len);
1457
}
1458
1459
/*
1460
* Make sure we have loaded objects from object streams.
1461
*/
1462
1463
static void preloadobjstms(fz_context *ctx, pdf_document *doc)
1464
{
1465
pdf_obj *obj;
1466
int num;
1467
int xref_len = pdf_xref_len(ctx, doc);
1468
1469
for (num = 0; num < xref_len; num++)
1470
{
1471
if (pdf_get_xref_entry(ctx, doc, num)->type == 'o')
1472
{
1473
obj = pdf_load_object(ctx, doc, num, 0);
1474
pdf_drop_obj(ctx, obj);
1475
}
1476
}
1477
}
1478
1479
/*
1480
* Save streams and objects to the output
1481
*/
1482
1483
static inline int isbinary(int c)
1484
{
1485
if (c == '\n' || c == '\r' || c == '\t')
1486
return 0;
1487
return c < 32 || c > 127;
1488
}
1489
1490
static int isbinarystream(fz_buffer *buf)
1491
{
1492
int i;
1493
for (i = 0; i < buf->len; i++)
1494
if (isbinary(buf->data[i]))
1495
return 1;
1496
return 0;
1497
}
1498
1499
static fz_buffer *hexbuf(fz_context *ctx, unsigned char *p, int n)
1500
{
1501
static const char hex[17] = "0123456789abcdef";
1502
fz_buffer *buf;
1503
int x = 0;
1504
1505
buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2);
1506
1507
while (n--)
1508
{
1509
buf->data[buf->len++] = hex[*p >> 4];
1510
buf->data[buf->len++] = hex[*p & 15];
1511
if (++x == 32)
1512
{
1513
buf->data[buf->len++] = '\n';
1514
x = 0;
1515
}
1516
p++;
1517
}
1518
1519
buf->data[buf->len++] = '>';
1520
buf->data[buf->len++] = '\n';
1521
1522
return buf;
1523
}
1524
1525
static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1526
{
1527
pdf_obj *f, *dp, *newf, *newdp;
1528
pdf_obj *nullobj;
1529
1530
nullobj = pdf_new_null(ctx, doc);
1531
newf = newdp = NULL;
1532
1533
f = pdf_dict_get(ctx, dict, PDF_NAME_Filter);
1534
dp = pdf_dict_get(ctx, dict, PDF_NAME_DecodeParms);
1535
1536
if (pdf_is_name(ctx, f))
1537
{
1538
newf = pdf_new_array(ctx, doc, 2);
1539
pdf_array_push(ctx, newf, PDF_NAME_ASCIIHexDecode);
1540
pdf_array_push(ctx, newf, f);
1541
f = newf;
1542
if (pdf_is_dict(ctx, dp))
1543
{
1544
newdp = pdf_new_array(ctx, doc, 2);
1545
pdf_array_push(ctx, newdp, nullobj);
1546
pdf_array_push(ctx, newdp, dp);
1547
dp = newdp;
1548
}
1549
}
1550
else if (pdf_is_array(ctx, f))
1551
{
1552
pdf_array_insert(ctx, f, PDF_NAME_ASCIIHexDecode, 0);
1553
if (pdf_is_array(ctx, dp))
1554
pdf_array_insert(ctx, dp, nullobj, 0);
1555
}
1556
else
1557
f = PDF_NAME_ASCIIHexDecode;
1558
1559
pdf_dict_put(ctx, dict, PDF_NAME_Filter, f);
1560
if (dp)
1561
pdf_dict_put(ctx, dict, PDF_NAME_DecodeParms, dp);
1562
1563
pdf_drop_obj(ctx, nullobj);
1564
pdf_drop_obj(ctx, newf);
1565
pdf_drop_obj(ctx, newdp);
1566
}
1567
1568
static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen)
1569
{
1570
fz_buffer *buf, *tmp;
1571
pdf_obj *newlen;
1572
pdf_obj *obj;
1573
int orig_num = opts->rev_renumber_map[num];
1574
int orig_gen = opts->rev_gen_list[num];
1575
1576
buf = pdf_load_raw_renumbered_stream(ctx, doc, num, gen, orig_num, orig_gen);
1577
1578
obj = pdf_copy_dict(ctx, obj_orig);
1579
if (opts->do_ascii && isbinarystream(buf))
1580
{
1581
tmp = hexbuf(ctx, buf->data, buf->len);
1582
fz_drop_buffer(ctx, buf);
1583
buf = tmp;
1584
1585
addhexfilter(ctx, doc, obj);
1586
1587
newlen = pdf_new_int(ctx, doc, buf->len);
1588
pdf_dict_put(ctx, obj, PDF_NAME_Length, newlen);
1589
pdf_drop_obj(ctx, newlen);
1590
}
1591
1592
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
1593
pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
1594
fputs("stream\n", opts->out);
1595
fwrite(buf->data, 1, buf->len, opts->out);
1596
fputs("endstream\nendobj\n\n", opts->out);
1597
1598
fz_drop_buffer(ctx, buf);
1599
pdf_drop_obj(ctx, obj);
1600
}
1601
1602
static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen)
1603
{
1604
fz_buffer *buf, *tmp;
1605
pdf_obj *newlen;
1606
pdf_obj *obj;
1607
int orig_num = opts->rev_renumber_map[num];
1608
int orig_gen = opts->rev_gen_list[num];
1609
int truncated = 0;
1610
1611
buf = pdf_load_renumbered_stream(ctx, doc, num, gen, orig_num, orig_gen, (opts->continue_on_error ? &truncated : NULL));
1612
if (truncated && opts->errors)
1613
(*opts->errors)++;
1614
1615
obj = pdf_copy_dict(ctx, obj_orig);
1616
pdf_dict_del(ctx, obj, PDF_NAME_Filter);
1617
pdf_dict_del(ctx, obj, PDF_NAME_DecodeParms);
1618
1619
if (opts->do_ascii && isbinarystream(buf))
1620
{
1621
tmp = hexbuf(ctx, buf->data, buf->len);
1622
fz_drop_buffer(ctx, buf);
1623
buf = tmp;
1624
1625
addhexfilter(ctx, doc, obj);
1626
}
1627
1628
newlen = pdf_new_int(ctx, doc, buf->len);
1629
pdf_dict_put(ctx, obj, PDF_NAME_Length, newlen);
1630
pdf_drop_obj(ctx, newlen);
1631
1632
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
1633
pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
1634
fputs("stream\n", opts->out);
1635
fwrite(buf->data, 1, buf->len, opts->out);
1636
fputs("endstream\nendobj\n\n", opts->out);
1637
1638
fz_drop_buffer(ctx, buf);
1639
pdf_drop_obj(ctx, obj);
1640
}
1641
1642
static int is_image_filter(char *s)
1643
{
1644
if (!strcmp(s, "CCITTFaxDecode") || !strcmp(s, "CCF") ||
1645
!strcmp(s, "DCTDecode") || !strcmp(s, "DCT") ||
1646
!strcmp(s, "RunLengthDecode") || !strcmp(s, "RL") ||
1647
!strcmp(s, "JBIG2Decode") ||
1648
!strcmp(s, "JPXDecode"))
1649
return 1;
1650
return 0;
1651
}
1652
1653
static int filter_implies_image(fz_context *ctx, pdf_document *doc, pdf_obj *o)
1654
{
1655
if (!o)
1656
return 0;
1657
if (pdf_is_name(ctx, o))
1658
return is_image_filter(pdf_to_name(ctx, o));
1659
if (pdf_is_array(ctx, o))
1660
{
1661
int i, len;
1662
len = pdf_array_len(ctx, o);
1663
for (i = 0; i < len; i++)
1664
if (is_image_filter(pdf_to_name(ctx, pdf_array_get(ctx, o, i))))
1665
return 1;
1666
}
1667
return 0;
1668
}
1669
1670
static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int num, int gen, int skip_xrefs)
1671
{
1672
pdf_xref_entry *entry;
1673
pdf_obj *obj;
1674
pdf_obj *type;
1675
1676
fz_try(ctx)
1677
{
1678
obj = pdf_load_object(ctx, doc, num, gen);
1679
}
1680
fz_catch(ctx)
1681
{
1682
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1683
if (opts->continue_on_error)
1684
{
1685
fz_fprintf(ctx, opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
1686
if (opts->errors)
1687
(*opts->errors)++;
1688
fz_warn(ctx, "%s", fz_caught_message(ctx));
1689
return;
1690
}
1691
else
1692
fz_rethrow(ctx);
1693
}
1694
1695
/* skip ObjStm and XRef objects */
1696
if (pdf_is_dict(ctx, obj))
1697
{
1698
type = pdf_dict_get(ctx, obj, PDF_NAME_Type);
1699
if (pdf_name_eq(ctx, type, PDF_NAME_ObjStm))
1700
{
1701
opts->use_list[num] = 0;
1702
pdf_drop_obj(ctx, obj);
1703
return;
1704
}
1705
if (skip_xrefs && pdf_name_eq(ctx, type, PDF_NAME_XRef))
1706
{
1707
opts->use_list[num] = 0;
1708
pdf_drop_obj(ctx, obj);
1709
return;
1710
}
1711
}
1712
1713
entry = pdf_get_xref_entry(ctx, doc, num);
1714
if (!pdf_is_stream(ctx, doc, num, gen))
1715
{
1716
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
1717
pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
1718
fputs("endobj\n\n", opts->out);
1719
}
1720
else if (entry->stm_ofs < 0 && entry->stm_buf == NULL)
1721
{
1722
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
1723
pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
1724
fputs("stream\nendstream\nendobj\n\n", opts->out);
1725
}
1726
else
1727
{
1728
int dontexpand = 0;
1729
if (opts->do_expand != 0 && opts->do_expand != fz_expand_all)
1730
{
1731
pdf_obj *o;
1732
1733
if ((o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_XObject)) &&
1734
(o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Image)))
1735
dontexpand = !(opts->do_expand & fz_expand_images);
1736
if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_Font))
1737
dontexpand = !(opts->do_expand & fz_expand_fonts);
1738
if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_FontDescriptor))
1739
dontexpand = !(opts->do_expand & fz_expand_fonts);
1740
if (pdf_dict_get(ctx, obj, PDF_NAME_Length1) != NULL)
1741
dontexpand = !(opts->do_expand & fz_expand_fonts);
1742
if (pdf_dict_get(ctx, obj, PDF_NAME_Length2) != NULL)
1743
dontexpand = !(opts->do_expand & fz_expand_fonts);
1744
if (pdf_dict_get(ctx, obj, PDF_NAME_Length3) != NULL)
1745
dontexpand = !(opts->do_expand & fz_expand_fonts);
1746
if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Type1C))
1747
dontexpand = !(opts->do_expand & fz_expand_fonts);
1748
if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_CIDFontType0C))
1749
dontexpand = !(opts->do_expand & fz_expand_fonts);
1750
if (o = pdf_dict_get(ctx, obj, PDF_NAME_Filter), filter_implies_image(ctx, doc, o))
1751
dontexpand = !(opts->do_expand & fz_expand_images);
1752
if (pdf_dict_get(ctx, obj, PDF_NAME_Width) != NULL && pdf_dict_get(ctx, obj, PDF_NAME_Height) != NULL)
1753
dontexpand = !(opts->do_expand & fz_expand_images);
1754
}
1755
fz_try(ctx)
1756
{
1757
if (opts->do_expand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
1758
expandstream(ctx, doc, opts, obj, num, gen);
1759
else
1760
copystream(ctx, doc, opts, obj, num, gen);
1761
}
1762
fz_catch(ctx)
1763
{
1764
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1765
if (opts->continue_on_error)
1766
{
1767
fz_fprintf(ctx, opts->out, "%d %d obj\nnull\nendobj\n", num, gen);
1768
if (opts->errors)
1769
(*opts->errors)++;
1770
fz_warn(ctx, "%s", fz_caught_message(ctx));
1771
}
1772
else
1773
{
1774
pdf_drop_obj(ctx, obj);
1775
fz_rethrow(ctx);
1776
}
1777
}
1778
}
1779
1780
pdf_drop_obj(ctx, obj);
1781
}
1782
1783
static void writexrefsubsect(fz_context *ctx, pdf_write_options *opts, int from, int to)
1784
{
1785
int num;
1786
1787
fz_fprintf(ctx, opts->out, "%d %d\n", from, to - from);
1788
for (num = from; num < to; num++)
1789
{
1790
if (opts->use_list[num])
1791
fz_fprintf(ctx, opts->out, "%010Zd %05d n \n", opts->ofs_list[num], opts->gen_list[num]);
1792
else
1793
fz_fprintf(ctx, opts->out, "%010Zd %05d f \n", opts->ofs_list[num], opts->gen_list[num]);
1794
}
1795
}
1796
1797
static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int from, int to, int first, int main_xref_offset, int startxref)
1798
{
1799
pdf_obj *trailer = NULL;
1800
pdf_obj *obj;
1801
pdf_obj *nobj = NULL;
1802
1803
fputs("xref\n", opts->out);
1804
opts->first_xref_entry_offset = ftell(opts->out);
1805
1806
if (opts->do_incremental)
1807
{
1808
int subfrom = from;
1809
int subto;
1810
1811
while (subfrom < to)
1812
{
1813
while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
1814
subfrom++;
1815
1816
subto = subfrom;
1817
while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
1818
subto++;
1819
1820
if (subfrom < subto)
1821
writexrefsubsect(ctx, opts, subfrom, subto);
1822
1823
subfrom = subto;
1824
}
1825
}
1826
else
1827
{
1828
writexrefsubsect(ctx, opts, from, to);
1829
}
1830
1831
fputs("\n", opts->out);
1832
1833
fz_var(trailer);
1834
fz_var(nobj);
1835
1836
fz_try(ctx)
1837
{
1838
if (opts->do_incremental)
1839
{
1840
trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
1841
pdf_dict_put_drop(ctx, trailer, PDF_NAME_Size, pdf_new_int(ctx, doc, pdf_xref_len(ctx, doc)));
1842
pdf_dict_put_drop(ctx, trailer, PDF_NAME_Prev, pdf_new_int(ctx, doc, doc->startxref));
1843
doc->startxref = startxref;
1844
}
1845
else
1846
{
1847
trailer = pdf_new_dict(ctx, doc, 5);
1848
1849
nobj = pdf_new_int(ctx, doc, to);
1850
pdf_dict_put(ctx, trailer, PDF_NAME_Size, nobj);
1851
pdf_drop_obj(ctx, nobj);
1852
nobj = NULL;
1853
1854
if (first)
1855
{
1856
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info);
1857
if (obj)
1858
pdf_dict_put(ctx, trailer, PDF_NAME_Info, obj);
1859
1860
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
1861
if (obj)
1862
pdf_dict_put(ctx, trailer, PDF_NAME_Root, obj);
1863
1864
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_ID);
1865
if (obj)
1866
pdf_dict_put(ctx, trailer, PDF_NAME_ID, obj);
1867
}
1868
if (main_xref_offset != 0)
1869
{
1870
nobj = pdf_new_int(ctx, doc, main_xref_offset);
1871
pdf_dict_put(ctx, trailer, PDF_NAME_Prev, nobj);
1872
pdf_drop_obj(ctx, nobj);
1873
nobj = NULL;
1874
}
1875
}
1876
}
1877
fz_always(ctx)
1878
{
1879
pdf_drop_obj(ctx, nobj);
1880
}
1881
fz_catch(ctx)
1882
{
1883
fz_rethrow(ctx);
1884
}
1885
1886
fputs("trailer\n", opts->out);
1887
pdf_fprint_obj(ctx, opts->out, trailer, opts->do_expand == 0);
1888
fputs("\n", opts->out);
1889
1890
pdf_drop_obj(ctx, trailer);
1891
1892
fz_fprintf(ctx, opts->out, "startxref\n%d\n%%%%EOF\n", startxref);
1893
1894
doc->has_xref_streams = 0;
1895
}
1896
1897
static void writexrefstreamsubsect(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *index, fz_buffer *fzbuf, int from, int to)
1898
{
1899
int num;
1900
1901
pdf_array_push_drop(ctx, index, pdf_new_int(ctx, doc, from));
1902
pdf_array_push_drop(ctx, index, pdf_new_int(ctx, doc, to - from));
1903
for (num = from; num < to; num++)
1904
{
1905
fz_write_buffer_byte(ctx, fzbuf, opts->use_list[num] ? 1 : 0);
1906
fz_write_buffer_byte(ctx, fzbuf, opts->ofs_list[num]>>24);
1907
fz_write_buffer_byte(ctx, fzbuf, opts->ofs_list[num]>>16);
1908
fz_write_buffer_byte(ctx, fzbuf, opts->ofs_list[num]>>8);
1909
fz_write_buffer_byte(ctx, fzbuf, opts->ofs_list[num]);
1910
fz_write_buffer_byte(ctx, fzbuf, opts->gen_list[num]);
1911
}
1912
}
1913
1914
static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int from, int to, int first, int main_xref_offset, int startxref)
1915
{
1916
int num;
1917
pdf_obj *dict = NULL;
1918
pdf_obj *obj;
1919
pdf_obj *w = NULL;
1920
pdf_obj *index;
1921
fz_buffer *fzbuf = NULL;
1922
1923
fz_var(dict);
1924
fz_var(w);
1925
fz_var(fzbuf);
1926
fz_try(ctx)
1927
{
1928
num = pdf_create_object(ctx, doc);
1929
dict = pdf_new_dict(ctx, doc, 6);
1930
pdf_update_object(ctx, doc, num, dict);
1931
1932
opts->first_xref_entry_offset = ftell(opts->out);
1933
1934
to++;
1935
1936
if (first)
1937
{
1938
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info);
1939
if (obj)
1940
pdf_dict_put(ctx, dict, PDF_NAME_Info, obj);
1941
1942
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
1943
if (obj)
1944
pdf_dict_put(ctx, dict, PDF_NAME_Root, obj);
1945
1946
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_ID);
1947
if (obj)
1948
pdf_dict_put(ctx, dict, PDF_NAME_ID, obj);
1949
1950
if (opts->do_incremental)
1951
{
1952
obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Encrypt);
1953
if (obj)
1954
pdf_dict_put(ctx, dict, PDF_NAME_Encrypt, obj);
1955
}
1956
}
1957
1958
pdf_dict_put_drop(ctx, dict, PDF_NAME_Size, pdf_new_int(ctx, doc, to));
1959
1960
if (opts->do_incremental)
1961
{
1962
pdf_dict_put_drop(ctx, dict, PDF_NAME_Prev, pdf_new_int(ctx, doc, doc->startxref));
1963
doc->startxref = startxref;
1964
}
1965
else
1966
{
1967
if (main_xref_offset != 0)
1968
pdf_dict_put_drop(ctx, dict, PDF_NAME_Prev, pdf_new_int(ctx, doc, main_xref_offset));
1969
}
1970
1971
pdf_dict_put_drop(ctx, dict, PDF_NAME_Type, PDF_NAME_XRef);
1972
1973
w = pdf_new_array(ctx, doc, 3);
1974
pdf_dict_put(ctx, dict, PDF_NAME_W, w);
1975
pdf_array_push_drop(ctx, w, pdf_new_int(ctx, doc, 1));
1976
pdf_array_push_drop(ctx, w, pdf_new_int(ctx, doc, 4));
1977
pdf_array_push_drop(ctx, w, pdf_new_int(ctx, doc, 1));
1978
1979
index = pdf_new_array(ctx, doc, 2);
1980
pdf_dict_put_drop(ctx, dict, PDF_NAME_Index, index);
1981
1982
opts->ofs_list[num] = opts->first_xref_entry_offset;
1983
1984
fzbuf = fz_new_buffer(ctx, 4*(to-from));
1985
1986
if (opts->do_incremental)
1987
{
1988
int subfrom = from;
1989
int subto;
1990
1991
while (subfrom < to)
1992
{
1993
while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
1994
subfrom++;
1995
1996
subto = subfrom;
1997
while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
1998
subto++;
1999
2000
if (subfrom < subto)
2001
writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
2002
2003
subfrom = subto;
2004
}
2005
}
2006
else
2007
{
2008
writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
2009
}
2010
2011
pdf_update_stream(ctx, doc, dict, fzbuf, 0);
2012
2013
writeobject(ctx, doc, opts, num, 0, 0);
2014
fz_fprintf(ctx, opts->out, "startxref\n%Zd\n%%%%EOF\n", startxref);
2015
}
2016
fz_always(ctx)
2017
{
2018
pdf_drop_obj(ctx, dict);
2019
pdf_drop_obj(ctx, w);
2020
fz_drop_buffer(ctx, fzbuf);
2021
}
2022
fz_catch(ctx)
2023
{
2024
fz_rethrow(ctx);
2025
}
2026
}
2027
2028
static void
2029
padto(FILE *file, int target)
2030
{
2031
int pos = ftell(file);
2032
2033
assert(pos <= target);
2034
while (pos < target)
2035
{
2036
fputc('\n', file);
2037
pos++;
2038
}
2039
}
2040
2041
static void
2042
dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int num, int pass)
2043
{
2044
pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
2045
if (entry->type == 'f')
2046
opts->gen_list[num] = entry->gen;
2047
if (entry->type == 'n')
2048
opts->gen_list[num] = entry->gen;
2049
if (entry->type == 'o')
2050
opts->gen_list[num] = 0;
2051
2052
/* If we are renumbering, then make sure all generation numbers are
2053
* zero (except object 0 which must be free, and have a gen number of
2054
* 65535). Changing the generation numbers (and indeed object numbers)
2055
* will break encryption - so only do this if we are renumbering
2056
* anyway. */
2057
if (opts->do_garbage >= 2)
2058
opts->gen_list[num] = (num == 0 ? 65535 : 0);
2059
2060
if (opts->do_garbage && !opts->use_list[num])
2061
return;
2062
2063
if (entry->type == 'n' || entry->type == 'o')
2064
{
2065
if (pass > 0)
2066
padto(opts->out, opts->ofs_list[num]);
2067
opts->ofs_list[num] = ftell(opts->out);
2068
if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
2069
writeobject(ctx, doc, opts, num, opts->gen_list[num], 1);
2070
}
2071
else
2072
opts->use_list[num] = 0;
2073
}
2074
2075
static void
2076
writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, int pass)
2077
{
2078
int num;
2079
int xref_len = pdf_xref_len(ctx, doc);
2080
2081
if (!opts->do_incremental)
2082
{
2083
fprintf(opts->out, "%%PDF-%d.%d\n", doc->version / 10, doc->version % 10);
2084
fputs("%%\316\274\341\277\246\n\n", opts->out);
2085
}
2086
2087
dowriteobject(ctx, doc, opts, opts->start, pass);
2088
2089
if (opts->do_linear)
2090
{
2091
/* Write first xref */
2092
if (pass == 0)
2093
opts->first_xref_offset = ftell(opts->out);
2094
else
2095
padto(opts->out, opts->first_xref_offset);
2096
writexref(ctx, doc, opts, opts->start, pdf_xref_len(ctx, doc), 1, opts->main_xref_offset, 0);
2097
}
2098
2099
for (num = opts->start+1; num < xref_len; num++)
2100
dowriteobject(ctx, doc, opts, num, pass);
2101
if (opts->do_linear && pass == 1)
2102
{
2103
int offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
2104
padto(opts->out, offset);
2105
}
2106
for (num = 1; num < opts->start; num++)
2107
{
2108
if (pass == 1)
2109
opts->ofs_list[num] += opts->hintstream_len;
2110
dowriteobject(ctx, doc, opts, num, pass);
2111
}
2112
}
2113
2114
static int
2115
my_log2(int x)
2116
{
2117
int i = 0;
2118
2119
if (x <= 0)
2120
return 0;
2121
2122
while ((1<<i) <= x && (1<<i) > 0)
2123
i++;
2124
2125
if ((1<<i) <= 0)
2126
return 0;
2127
2128
return i;
2129
}
2130
2131
static void
2132
make_page_offset_hints(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, fz_buffer *buf)
2133
{
2134
int i, j;
2135
int min_objs_per_page, max_objs_per_page;
2136
int min_page_length, max_page_length;
2137
int objs_per_page_bits;
2138
int min_shared_object, max_shared_object;
2139
int max_shared_object_refs = 0;
2140
int min_shared_length, max_shared_length;
2141
page_objects **pop = &opts->page_object_lists->page[0];
2142
int page_len_bits, shared_object_bits, shared_object_id_bits;
2143
int shared_length_bits;
2144
int xref_len = pdf_xref_len(ctx, doc);
2145
2146
min_shared_object = pdf_xref_len(ctx, doc);
2147
max_shared_object = 1;
2148
min_shared_length = opts->file_len;
2149
max_shared_length = 0;
2150
for (i=1; i < xref_len; i++)
2151
{
2152
int min, max, page;
2153
2154
min = opts->ofs_list[i];
2155
if (i == opts->start-1 || (opts->start == 1 && i == xref_len-1))
2156
max = opts->main_xref_offset;
2157
else if (i == xref_len-1)
2158
max = opts->ofs_list[1];
2159
else
2160
max = opts->ofs_list[i+1];
2161
2162
assert(max > min);
2163
2164
if (opts->use_list[i] & USE_SHARED)
2165
{
2166
page = -1;
2167
if (i < min_shared_object)
2168
min_shared_object = i;
2169
if (i > max_shared_object)
2170
max_shared_object = i;
2171
if (min_shared_length > max - min)
2172
min_shared_length = max - min;
2173
if (max_shared_length < max - min)
2174
max_shared_length = max - min;
2175
}
2176
else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS))
2177
page = -1;
2178
else if (opts->use_list[i] & USE_PAGE1)
2179
{
2180
page = 0;
2181
if (min_shared_length > max - min)
2182
min_shared_length = max - min;
2183
if (max_shared_length < max - min)
2184
max_shared_length = max - min;
2185
}
2186
else if (opts->use_list[i] == 0)
2187
page = -1;
2188
else
2189
page = opts->use_list[i]>>USE_PAGE_SHIFT;
2190
2191
if (page >= 0)
2192
{
2193
pop[page]->num_objects++;
2194
if (pop[page]->min_ofs > min)
2195
pop[page]->min_ofs = min;
2196
if (pop[page]->max_ofs < max)
2197
pop[page]->max_ofs = max;
2198
}
2199
}
2200
2201
min_objs_per_page = max_objs_per_page = pop[0]->num_objects;
2202
min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs;
2203
for (i=1; i < opts->page_count; i++)
2204
{
2205
int tmp;
2206
if (min_objs_per_page > pop[i]->num_objects)
2207
min_objs_per_page = pop[i]->num_objects;
2208
if (max_objs_per_page < pop[i]->num_objects)
2209
max_objs_per_page = pop[i]->num_objects;
2210
tmp = pop[i]->max_ofs - pop[i]->min_ofs;
2211
if (tmp < min_page_length)
2212
min_page_length = tmp;
2213
if (tmp > max_page_length)
2214
max_page_length = tmp;
2215
}
2216
2217
for (i=0; i < opts->page_count; i++)
2218
{
2219
int count = 0;
2220
page_objects *po = opts->page_object_lists->page[i];
2221
for (j = 0; j < po->len; j++)
2222
{
2223
if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1)
2224
count++;
2225
else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED)
2226
count++;
2227
}
2228
po->num_shared = count;
2229
if (i == 0 || count > max_shared_object_refs)
2230
max_shared_object_refs = count;
2231
}
2232
if (min_shared_object > max_shared_object)
2233
min_shared_object = max_shared_object = 0;
2234
2235
/* Table F.3 - Header */
2236
/* Header Item 1: Least number of objects in a page */
2237
fz_write_buffer_bits(ctx, buf, min_objs_per_page, 32);
2238
/* Header Item 2: Location of first pages page object */
2239
fz_write_buffer_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32);
2240
/* Header Item 3: Number of bits required to represent the difference
2241
* between the greatest and least number of objects in a page. */
2242
objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
2243
fz_write_buffer_bits(ctx, buf, objs_per_page_bits, 16);
2244
/* Header Item 4: Least length of a page. */
2245
fz_write_buffer_bits(ctx, buf, min_page_length, 32);
2246
/* Header Item 5: Number of bits needed to represent the difference
2247
* between the greatest and least length of a page. */
2248
page_len_bits = my_log2(max_page_length - min_page_length);
2249
fz_write_buffer_bits(ctx, buf, page_len_bits, 16);
2250
/* Header Item 6: Least offset to start of content stream (Acrobat
2251
* sets this to always be 0) */
2252
fz_write_buffer_bits(ctx, buf, 0, 32);
2253
/* Header Item 7: Number of bits needed to represent the difference
2254
* between the greatest and least offset to content stream (Acrobat
2255
* sets this to always be 0) */
2256
fz_write_buffer_bits(ctx, buf, 0, 16);
2257
/* Header Item 8: Least content stream length. (Acrobat
2258
* sets this to always be 0) */
2259
fz_write_buffer_bits(ctx, buf, 0, 32);
2260
/* Header Item 9: Number of bits needed to represent the difference
2261
* between the greatest and least content stream length (Acrobat
2262
* sets this to always be the same as item 5) */
2263
fz_write_buffer_bits(ctx, buf, page_len_bits, 16);
2264
/* Header Item 10: Number of bits needed to represent the greatest
2265
* number of shared object references. */
2266
shared_object_bits = my_log2(max_shared_object_refs);
2267
fz_write_buffer_bits(ctx, buf, shared_object_bits, 16);
2268
/* Header Item 11: Number of bits needed to represent the greatest
2269
* shared object identifier. */
2270
shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared);
2271
fz_write_buffer_bits(ctx, buf, shared_object_id_bits, 16);
2272
/* Header Item 12: Number of bits needed to represent the numerator
2273
* of the fractions. We always send 0. */
2274
fz_write_buffer_bits(ctx, buf, 0, 16);
2275
/* Header Item 13: Number of bits needed to represent the denominator
2276
* of the fractions. We always send 0. */
2277
fz_write_buffer_bits(ctx, buf, 0, 16);
2278
2279
/* Table F.4 - Page offset hint table (per page) */
2280
/* Item 1: A number that, when added to the least number of objects
2281
* on a page, gives the number of objects in the page. */
2282
for (i = 0; i < opts->page_count; i++)
2283
{
2284
fz_write_buffer_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
2285
}
2286
fz_write_buffer_pad(ctx, buf);
2287
/* Item 2: A number that, when added to the least page length, gives
2288
* the length of the page in bytes. */
2289
for (i = 0; i < opts->page_count; i++)
2290
{
2291
fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2292
}
2293
fz_write_buffer_pad(ctx, buf);
2294
/* Item 3: The number of shared objects referenced from the page. */
2295
for (i = 0; i < opts->page_count; i++)
2296
{
2297
fz_write_buffer_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
2298
}
2299
fz_write_buffer_pad(ctx, buf);
2300
/* Item 4: Shared object id for each shared object ref in every page.
2301
* Spec says "not for page 1", but acrobat does send page 1's - all
2302
* as zeros. */
2303
for (i = 0; i < opts->page_count; i++)
2304
{
2305
for (j = 0; j < pop[i]->len; j++)
2306
{
2307
int o = pop[i]->object[j];
2308
if (i == 0 && opts->use_list[o] & USE_PAGE1)
2309
fz_write_buffer_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits);
2310
if (i != 0 && opts->use_list[o] & USE_SHARED)
2311
fz_write_buffer_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits);
2312
}
2313
}
2314
fz_write_buffer_pad(ctx, buf);
2315
/* Item 5: Numerator of fractional position for each shared object reference. */
2316
/* We always send 0 in 0 bits */
2317
/* Item 6: A number that, when added to the least offset to the start
2318
* of the content stream (F.3 Item 6), gives the offset in bytes of
2319
* start of the pages content stream object relative to the beginning
2320
* of the page. Always 0 in 0 bits. */
2321
/* Item 7: A number that, when added to the least content stream length
2322
* (F.3 Item 8), gives the length of the pages content stream object.
2323
* Always == Item 2 as least content stream length = least page stream
2324
* length.
2325
*/
2326
for (i = 0; i < opts->page_count; i++)
2327
{
2328
fz_write_buffer_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2329
}
2330
2331
/* Pad, and then do shared object hint table */
2332
fz_write_buffer_pad(ctx, buf);
2333
opts->hints_shared_offset = buf->len;
2334
2335
/* Table F.5: */
2336
/* Header Item 1: Object number of the first object in the shared
2337
* objects section. */
2338
fz_write_buffer_bits(ctx, buf, min_shared_object, 32);
2339
/* Header Item 2: Location of first object in the shared objects
2340
* section. */
2341
fz_write_buffer_bits(ctx, buf, opts->ofs_list[min_shared_object], 32);
2342
/* Header Item 3: The number of shared object entries for the first
2343
* page. */
2344
fz_write_buffer_bits(ctx, buf, pop[0]->num_shared, 32);
2345
/* Header Item 4: The number of shared object entries for the shared
2346
* objects section + first page. */
2347
fz_write_buffer_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32);
2348
/* Header Item 5: The number of bits needed to represent the greatest
2349
* number of objects in a shared object group (Always 0). */
2350
fz_write_buffer_bits(ctx, buf, 0, 16);
2351
/* Header Item 6: The least length of a shared object group in bytes. */
2352
fz_write_buffer_bits(ctx, buf, min_shared_length, 32);
2353
/* Header Item 7: The number of bits required to represent the
2354
* difference between the greatest and least length of a shared object
2355
* group. */
2356
shared_length_bits = my_log2(max_shared_length - min_shared_length);
2357
fz_write_buffer_bits(ctx, buf, shared_length_bits, 16);
2358
2359
/* Table F.6 */
2360
/* Item 1: Shared object group length (page 1 objects) */
2361
for (j = 0; j < pop[0]->len; j++)
2362
{
2363
int o = pop[0]->object[j];
2364
int min, max;
2365
min = opts->ofs_list[o];
2366
if (o == opts->start-1)
2367
max = opts->main_xref_offset;
2368
else if (o < xref_len-1)
2369
max = opts->ofs_list[o+1];
2370
else
2371
max = opts->ofs_list[1];
2372
if (opts->use_list[o] & USE_PAGE1)
2373
fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2374
}
2375
/* Item 1: Shared object group length (shared objects) */
2376
for (i = min_shared_object; i <= max_shared_object; i++)
2377
{
2378
int min, max;
2379
min = opts->ofs_list[i];
2380
if (i == opts->start-1)
2381
max = opts->main_xref_offset;
2382
else if (i < xref_len-1)
2383
max = opts->ofs_list[i+1];
2384
else
2385
max = opts->ofs_list[1];
2386
fz_write_buffer_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2387
}
2388
fz_write_buffer_pad(ctx, buf);
2389
2390
/* Item 2: MD5 presence flags */
2391
for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--)
2392
{
2393
fz_write_buffer_bits(ctx, buf, 0, 1);
2394
}
2395
fz_write_buffer_pad(ctx, buf);
2396
/* Item 3: MD5 sums (not present) */
2397
fz_write_buffer_pad(ctx, buf);
2398
/* Item 4: Number of objects in the group (not present) */
2399
}
2400
2401
static void
2402
make_hint_stream(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
2403
{
2404
fz_buffer *buf = fz_new_buffer(ctx, 100);
2405
2406
fz_try(ctx)
2407
{
2408
make_page_offset_hints(ctx, doc, opts, buf);
2409
pdf_update_stream(ctx, doc, pdf_load_object(ctx, doc, pdf_xref_len(ctx, doc)-1, 0), buf, 0);
2410
opts->hintstream_len = buf->len;
2411
fz_drop_buffer(ctx, buf);
2412
}
2413
fz_catch(ctx)
2414
{
2415
fz_drop_buffer(ctx, buf);
2416
fz_rethrow(ctx);
2417
}
2418
}
2419
2420
#ifdef DEBUG_WRITING
2421
static void dump_object_details(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
2422
{
2423
int i;
2424
2425
for (i = 0; i < pdf_xref_len(ctx, doc); i++)
2426
{
2427
fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
2428
}
2429
}
2430
#endif
2431
2432
static void presize_unsaved_signature_byteranges(fz_context *ctx, pdf_document *doc)
2433
{
2434
if (doc->unsaved_sigs)
2435
{
2436
/* The ByteRange objects of signatures are initially written out with
2437
* dummy values, and then overwritten later. We need to make sure their
2438
* initial form at least takes enough sufficient file space */
2439
pdf_unsaved_sig *usig;
2440
int n = 0;
2441
2442
for (usig = doc->unsaved_sigs; usig; usig = usig->next)
2443
n++;
2444
2445
for (usig = doc->unsaved_sigs; usig; usig = usig->next)
2446
{
2447
/* There will be segments of bytes at the beginning, at
2448
* the end and between each consecutive pair of signatures,
2449
* hence n + 1 */
2450
int i;
2451
pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME_V, PDF_NAME_ByteRange, NULL);
2452
2453
for (i = 0; i < n+1; i++)
2454
{
2455
pdf_array_push_drop(ctx, byte_range, pdf_new_int(ctx, doc, INT_MAX));
2456
pdf_array_push_drop(ctx, byte_range, pdf_new_int(ctx, doc, INT_MAX));
2457
}
2458
}
2459
}
2460
}
2461
2462
static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, char *filename)
2463
{
2464
pdf_unsaved_sig *usig;
2465
FILE *f;
2466
char buf[5120];
2467
int i;
2468
int flen;
2469
int last_end;
2470
2471
if (doc->unsaved_sigs)
2472
{
2473
pdf_obj *byte_range;
2474
2475
f = fopen(filename, "rb+");
2476
if (!f)
2477
fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to open %s to complete signatures", filename);
2478
2479
fseek(f, 0, SEEK_END);
2480
flen = ftell(f);
2481
2482
/* Locate the byte ranges and contents in the saved file */
2483
for (usig = doc->unsaved_sigs; usig; usig = usig->next)
2484
{
2485
char *bstr, *cstr, *fstr;
2486
int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME_V, PDF_NAME_ByteRange, NULL));
2487
fseek(f, opts->ofs_list[pnum], SEEK_SET);
2488
(void)fread(buf, 1, sizeof(buf), f);
2489
buf[sizeof(buf)-1] = 0;
2490
2491
bstr = strstr(buf, "/ByteRange");
2492
cstr = strstr(buf, "/Contents");
2493
fstr = strstr(buf, "/Filter");
2494
2495
if (bstr && cstr && fstr && bstr < cstr && cstr < fstr)
2496
{
2497
usig->byte_range_start = bstr - buf + 10 + opts->ofs_list[pnum];
2498
usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
2499
usig->contents_start = cstr - buf + 9 + opts->ofs_list[pnum];
2500
usig->contents_end = fstr - buf + opts->ofs_list[pnum];
2501
}
2502
}
2503
2504
/* Recreate ByteRange with correct values. Initially store the
2505
* recreated object in the first of the unsaved signatures */
2506
byte_range = pdf_new_array(ctx, doc, 4);
2507
pdf_dict_putl_drop(ctx, doc->unsaved_sigs->field, byte_range, PDF_NAME_V, PDF_NAME_ByteRange, NULL);
2508
2509
last_end = 0;
2510
for (usig = doc->unsaved_sigs; usig; usig = usig->next)
2511
{
2512
pdf_array_push_drop(ctx, byte_range, pdf_new_int(ctx, doc, last_end));
2513
pdf_array_push_drop(ctx, byte_range, pdf_new_int(ctx, doc, usig->contents_start - last_end));
2514
last_end = usig->contents_end;
2515
}
2516
pdf_array_push_drop(ctx, byte_range, pdf_new_int(ctx, doc, last_end));
2517
pdf_array_push_drop(ctx, byte_range, pdf_new_int(ctx, doc, flen - last_end));
2518
2519
/* Copy the new ByteRange to the other unsaved signatures */
2520
for (usig = doc->unsaved_sigs->next; usig; usig = usig->next)
2521
pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME_V, PDF_NAME_ByteRange, NULL);
2522
2523
/* Write the byte range into buf, padding with spaces*/
2524
i = pdf_sprint_obj(ctx, buf, sizeof(buf), byte_range, 1);
2525
memset(buf+i, ' ', sizeof(buf)-i);
2526
2527
/* Write the byte range to the file */
2528
for (usig = doc->unsaved_sigs; usig; usig = usig->next)
2529
{
2530
fseek(f, usig->byte_range_start, SEEK_SET);
2531
fwrite(buf, 1, usig->byte_range_end - usig->byte_range_start, f);
2532
}
2533
2534
fclose(f);
2535
2536
/* Write the digests into the file */
2537
for (usig = doc->unsaved_sigs; usig; usig = usig->next)
2538
pdf_write_digest(ctx, doc, filename, byte_range, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
2539
2540
/* delete the unsaved_sigs records */
2541
while ((usig = doc->unsaved_sigs) != NULL)
2542
{
2543
doc->unsaved_sigs = usig->next;
2544
pdf_drop_obj(ctx, usig->field);
2545
pdf_drop_signer(ctx, usig->signer);
2546
fz_free(ctx, usig);
2547
}
2548
}
2549
}
2550
2551
static void sanitise(fz_context *ctx, pdf_document *doc)
2552
{
2553
int n = pdf_count_pages(ctx, doc);
2554
int i;
2555
2556
for (i = 0; i < n; i++)
2557
{
2558
pdf_page *page = pdf_load_page(ctx, doc, i);
2559
2560
pdf_clean_page_contents(ctx, doc, page, NULL, NULL, NULL);
2561
2562
fz_drop_page(ctx, &page->super);
2563
}
2564
}
2565
2566
void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_write_options *fz_opts)
2567
{
2568
fz_write_options opts_defaults = { 0 };
2569
pdf_write_options opts = { 0 };
2570
2571
int lastfree;
2572
int num;
2573
int xref_len;
2574
2575
if (!doc)
2576
return;
2577
2578
if (!fz_opts)
2579
fz_opts = &opts_defaults;
2580
2581
doc->freeze_updates = 1;
2582
2583
/* Sanitise the operator streams */
2584
if (fz_opts->do_clean)
2585
sanitise(ctx, doc);
2586
2587
pdf_finish_edit(ctx, doc);
2588
presize_unsaved_signature_byteranges(ctx, doc);
2589
2590
xref_len = pdf_xref_len(ctx, doc);
2591
2592
if (fz_opts->do_incremental)
2593
{
2594
/* If no changes, nothing to write */
2595
if (!doc->xref_altered)
2596
return;
2597
opts.out = fopen(filename, "ab");
2598
if (opts.out)
2599
{
2600
fseek(opts.out, 0, SEEK_END);
2601
fputs("\n", opts.out);
2602
}
2603
}
2604
else
2605
{
2606
opts.out = fopen(filename, "wb");
2607
}
2608
2609
if (!opts.out)
2610
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open output file '%s'", filename);
2611
2612
fz_try(ctx)
2613
{
2614
opts.do_incremental = fz_opts->do_incremental;
2615
opts.do_expand = fz_opts->do_expand;
2616
opts.do_garbage = fz_opts->do_garbage;
2617
opts.do_ascii = fz_opts->do_ascii;
2618
opts.do_linear = fz_opts->do_linear;
2619
opts.do_clean = fz_opts->do_clean;
2620
opts.start = 0;
2621
opts.main_xref_offset = INT_MIN;
2622
/* We deliberately make these arrays long enough to cope with
2623
* 1 to n access rather than 0..n-1, and add space for 2 new
2624
* extra entries that may be required for linearization. */
2625
opts.use_list = fz_malloc_array(ctx, pdf_xref_len(ctx, doc) + 3, sizeof(int));
2626
opts.ofs_list = fz_malloc_array(ctx, pdf_xref_len(ctx, doc) + 3, sizeof(int));
2627
opts.gen_list = fz_calloc(ctx, pdf_xref_len(ctx, doc) + 3, sizeof(int));
2628
opts.renumber_map = fz_malloc_array(ctx, pdf_xref_len(ctx, doc) + 3, sizeof(int));
2629
opts.rev_renumber_map = fz_malloc_array(ctx, pdf_xref_len(ctx, doc) + 3, sizeof(int));
2630
opts.rev_gen_list = fz_malloc_array(ctx, pdf_xref_len(ctx, doc) + 3, sizeof(int));
2631
opts.continue_on_error = fz_opts->continue_on_error;
2632
opts.errors = fz_opts->errors;
2633
2634
for (num = 0; num < xref_len; num++)
2635
{
2636
opts.use_list[num] = 0;
2637
opts.ofs_list[num] = 0;
2638
opts.renumber_map[num] = num;
2639
opts.rev_renumber_map[num] = num;
2640
opts.rev_gen_list[num] = pdf_get_xref_entry(ctx, doc, num)->gen;
2641
}
2642
2643
if (opts.do_incremental && opts.do_garbage)
2644
fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
2645
if (opts.do_incremental && opts.do_linear)
2646
fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
2647
2648
/* Make sure any objects hidden in compressed streams have been loaded */
2649
if (!opts.do_incremental)
2650
{
2651
pdf_ensure_solid_xref(ctx, doc, xref_len);
2652
preloadobjstms(ctx, doc);
2653
}
2654
2655
/* Sweep & mark objects from the trailer */
2656
if (opts.do_garbage >= 1 || opts.do_linear)
2657
(void)markobj(ctx, doc, &opts, pdf_trailer(ctx, doc));
2658
else
2659
for (num = 0; num < xref_len; num++)
2660
opts.use_list[num] = 1;
2661
2662
/* Coalesce and renumber duplicate objects */
2663
if (opts.do_garbage >= 3)
2664
removeduplicateobjs(ctx, doc, &opts);
2665
2666
/* Compact xref by renumbering and removing unused objects */
2667
if (opts.do_garbage >= 2 || opts.do_linear)
2668
compactxref(ctx, doc, &opts);
2669
2670
/* Make renumbering affect all indirect references and update xref */
2671
if (opts.do_garbage >= 2 || opts.do_linear)
2672
renumberobjs(ctx, doc, &opts);
2673
2674
/* Truncate the xref after compacting and renumbering */
2675
if ((opts.do_garbage >= 2 || opts.do_linear) && !opts.do_incremental)
2676
while (xref_len > 0 && !opts.use_list[xref_len-1])
2677
xref_len--;
2678
2679
if (opts.do_linear)
2680
linearize(ctx, doc, &opts);
2681
2682
writeobjects(ctx, doc, &opts, 0);
2683
2684
#ifdef DEBUG_WRITING
2685
dump_object_details(ctx, doc, &opts);
2686
#endif
2687
2688
if (opts.do_incremental)
2689
{
2690
for (num = 0; num < xref_len; num++)
2691
{
2692
if (!opts.use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
2693
{
2694
/* Make unreusable. FIXME: would be better to link to existing free list */
2695
opts.gen_list[num] = 65535;
2696
opts.ofs_list[num] = 0;
2697
}
2698
}
2699
}
2700
else
2701
{
2702
/* Construct linked list of free object slots */
2703
lastfree = 0;
2704
for (num = 0; num < xref_len; num++)
2705
{
2706
if (!opts.use_list[num])
2707
{
2708
opts.gen_list[num]++;
2709
opts.ofs_list[lastfree] = num;
2710
lastfree = num;
2711
}
2712
}
2713
}
2714
2715
if (opts.do_linear)
2716
{
2717
opts.main_xref_offset = ftell(opts.out);
2718
writexref(ctx, doc, &opts, 0, opts.start, 0, 0, opts.first_xref_offset);
2719
opts.file_len = ftell(opts.out);
2720
2721
make_hint_stream(ctx, doc, &opts);
2722
if (opts.do_ascii)
2723
{
2724
opts.hintstream_len *= 2;
2725
opts.hintstream_len += 1 + ((opts.hintstream_len+63)>>6);
2726
}
2727
opts.file_len += opts.hintstream_len;
2728
opts.main_xref_offset += opts.hintstream_len;
2729
update_linearization_params(ctx, doc, &opts);
2730
fseek(opts.out, 0, 0);
2731
writeobjects(ctx, doc, &opts, 1);
2732
2733
padto(opts.out, opts.main_xref_offset);
2734
writexref(ctx, doc, &opts, 0, opts.start, 0, 0, opts.first_xref_offset);
2735
}
2736
else
2737
{
2738
opts.first_xref_offset = ftell(opts.out);
2739
if (opts.do_incremental && doc->has_xref_streams)
2740
writexrefstream(ctx, doc, &opts, 0, xref_len, 1, 0, opts.first_xref_offset);
2741
else
2742
writexref(ctx, doc, &opts, 0, xref_len, 1, 0, opts.first_xref_offset);
2743
}
2744
2745
fclose(opts.out);
2746
opts.out = NULL;
2747
complete_signatures(ctx, doc, &opts, filename);
2748
2749
doc->dirty = 0;
2750
}
2751
fz_always(ctx)
2752
{
2753
#ifdef DEBUG_LINEARIZATION
2754
page_objects_dump(&opts);
2755
objects_dump(ctx, doc, &opts);
2756
#endif
2757
fz_free(ctx, opts.use_list);
2758
fz_free(ctx, opts.ofs_list);
2759
fz_free(ctx, opts.gen_list);
2760
fz_free(ctx, opts.renumber_map);
2761
fz_free(ctx, opts.rev_renumber_map);
2762
fz_free(ctx, opts.rev_gen_list);
2763
pdf_drop_obj(ctx, opts.linear_l);
2764
pdf_drop_obj(ctx, opts.linear_h0);
2765
pdf_drop_obj(ctx, opts.linear_h1);
2766
pdf_drop_obj(ctx, opts.linear_o);
2767
pdf_drop_obj(ctx, opts.linear_e);
2768
pdf_drop_obj(ctx, opts.linear_n);
2769
pdf_drop_obj(ctx, opts.linear_t);
2770
pdf_drop_obj(ctx, opts.hints_s);
2771
pdf_drop_obj(ctx, opts.hints_length);
2772
page_objects_list_destroy(ctx, opts.page_object_lists);
2773
if (opts.out)
2774
fclose(opts.out);
2775
doc->freeze_updates = 0;
2776
}
2777
fz_catch(ctx)
2778
{
2779
fz_rethrow(ctx);
2780
}
2781
}
2782
2783
#define KIDS_PER_LEVEL 32
2784
2785
#if 0
2786
2787
// TODO: pdf_rebalance_page_tree(ctx, doc);
2788
2789
static pdf_obj *
2790
make_page_tree_node(fz_context *ctx, pdf_document *doc, int l, int r, pdf_obj *parent_ref, int root)
2791
{
2792
int count_per_kid, spaces;
2793
pdf_obj *a = NULL;
2794
pdf_obj *me = NULL;
2795
pdf_obj *o = NULL;
2796
pdf_obj *me_ref = NULL;
2797
2798
count_per_kid = 1;
2799
while(count_per_kid * KIDS_PER_LEVEL < r-l)
2800
count_per_kid *= KIDS_PER_LEVEL;
2801
2802
fz_var(o);
2803
fz_var(me);
2804
fz_var(a);
2805
fz_var(me_ref);
2806
2807
fz_try(ctx)
2808
{
2809
me = pdf_new_dict(ctx, doc, 2);
2810
pdf_dict_put_drop(ctx, me, PDF_NAME_Type, PDF_NAME_Pages);
2811
pdf_dict_put_drop(ctx, me, PDF_NAME_Count, pdf_new_int(ctx, doc, r-l));
2812
if (!root)
2813
pdf_dict_put(ctx, me, PDF_NAME_Parent, parent_ref);
2814
a = pdf_new_array(ctx, doc, KIDS_PER_LEVEL);
2815
me_ref = pdf_new_ref(ctx, doc, me);
2816
2817
for (spaces = KIDS_PER_LEVEL; l < r; spaces--)
2818
{
2819
if (spaces >= r-l)
2820
{
2821
o = pdf_keep_obj(ctx, doc->page_refs[l++]);
2822
pdf_dict_put(ctx, o, PDF_NAME_Parent, me_ref);
2823
}
2824
else
2825
{
2826
int j = l+count_per_kid;
2827
if (j > r)
2828
j = r;
2829
o = make_page_tree_node(ctx, doc, l, j, me_ref, 0);
2830
l = j;
2831
}
2832
pdf_array_push(ctx, a, o);
2833
pdf_drop_obj(ctx, o);
2834
o = NULL;
2835
}
2836
pdf_dict_put_drop(ctx, me, PDF_NAME_Kids, a);
2837
a = NULL;
2838
}
2839
fz_always(ctx)
2840
{
2841
pdf_drop_obj(ctx, me);
2842
}
2843
fz_catch(ctx)
2844
{
2845
pdf_drop_obj(ctx, a);
2846
pdf_drop_obj(ctx, o);
2847
pdf_drop_obj(ctx, me);
2848
fz_rethrow_message(ctx, "Failed to synthesize new page tree");
2849
}
2850
return me_ref;
2851
}
2852
2853
static void
2854
pdf_rebalance_page_tree(fz_context *ctx, pdf_document *doc)
2855
{
2856
pdf_obj *catalog;
2857
pdf_obj *pages;
2858
2859
if (!doc || !doc->needs_page_tree_rebuild)
2860
return;
2861
2862
catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
2863
pages = make_page_tree_node(ctx, doc, 0, doc->page_len, catalog, 1);
2864
pdf_dict_put_drop(ctx, catalog, PDF_NAME_Pages, pages);
2865
2866
doc->needs_page_tree_rebuild = 0;
2867
}
2868
2869
#endif
2870
2871
static void
2872
pdf_rebalance_page_tree(fz_context *ctx, pdf_document *doc)
2873
{
2874
}
2875
2876
void pdf_finish_edit(fz_context *ctx, pdf_document *doc)
2877
{
2878
if (!doc)
2879
return;
2880
pdf_rebalance_page_tree(ctx, doc);
2881
}
2882
2883