Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7643 views
1
#include "mupdf/pdf.h"
2
#include "mupdf/fitz/document.h"
3
4
#undef DEBUG_PROGESSIVE_ADVANCE
5
6
#ifdef DEBUG_PROGESSIVE_ADVANCE
7
#define DEBUGMESS(A) do { fz_warn A; } while (0)
8
#else
9
#define DEBUGMESS(A) do { } while (0)
10
#endif
11
12
static inline int iswhite(int ch)
13
{
14
return
15
ch == '\000' || ch == '\011' || ch == '\012' ||
16
ch == '\014' || ch == '\015' || ch == '\040';
17
}
18
19
/*
20
* xref tables
21
*/
22
23
static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc)
24
{
25
int x, e;
26
27
for (x = 0; x < doc->num_xref_sections; x++)
28
{
29
pdf_xref *xref = &doc->xref_sections[x];
30
pdf_xref_subsec *sub = xref->subsec;
31
32
while (sub != NULL)
33
{
34
pdf_xref_subsec *next_sub = sub->next;
35
for (e = 0; e < sub->len; e++)
36
{
37
pdf_xref_entry *entry = &sub->table[e];
38
39
if (entry->obj)
40
{
41
pdf_drop_obj(ctx, entry->obj);
42
fz_drop_buffer(ctx, entry->stm_buf);
43
}
44
}
45
fz_free(ctx, sub->table);
46
fz_free(ctx, sub);
47
sub = next_sub;
48
}
49
50
pdf_drop_obj(ctx, xref->pre_repair_trailer);
51
pdf_drop_obj(ctx, xref->trailer);
52
}
53
54
fz_free(ctx, doc->xref_sections);
55
doc->xref_sections = NULL;
56
doc->num_xref_sections = 0;
57
}
58
59
static void
60
extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
61
{
62
int i;
63
64
doc->xref_index = fz_resize_array(ctx, doc->xref_index, newlen, sizeof(int));
65
for (i = doc->max_xref_len; i < newlen; i++)
66
{
67
doc->xref_index[i] = 0;
68
}
69
doc->max_xref_len = newlen;
70
}
71
72
/* This is only ever called when we already have an incremental
73
* xref. This means there will only be 1 subsec, and it will be
74
* a complete subsec. */
75
static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
76
{
77
int i;
78
pdf_xref *xref = &doc->xref_sections[0];
79
pdf_xref_subsec *sub;
80
81
assert(xref != NULL);
82
sub = xref->subsec;
83
assert(sub->next == NULL && sub->start == 0 && sub->len == xref->num_objects);
84
assert(newlen > xref->num_objects);
85
86
sub->table = fz_resize_array(ctx, sub->table, newlen, sizeof(pdf_xref_entry));
87
for (i = xref->num_objects; i < newlen; i++)
88
{
89
sub->table[i].type = 0;
90
sub->table[i].ofs = 0;
91
sub->table[i].gen = 0;
92
sub->table[i].stm_ofs = 0;
93
sub->table[i].stm_buf = NULL;
94
sub->table[i].obj = NULL;
95
}
96
xref->num_objects = newlen;
97
sub->len = newlen;
98
if (doc->max_xref_len < newlen)
99
extend_xref_index(ctx, doc, newlen);
100
}
101
102
static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc)
103
{
104
pdf_xref *xref;
105
doc->xref_sections = fz_resize_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, sizeof(pdf_xref));
106
doc->num_xref_sections++;
107
108
xref = &doc->xref_sections[doc->num_xref_sections - 1];
109
xref->subsec = NULL;
110
xref->num_objects = 0;
111
xref->trailer = NULL;
112
xref->pre_repair_trailer = NULL;
113
}
114
115
pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc)
116
{
117
/* Return the document's final trailer */
118
pdf_xref *xref = &doc->xref_sections[0];
119
120
return xref->trailer;
121
}
122
123
void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer)
124
{
125
/* Update the trailer of the xref section being populated */
126
pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1];
127
if (xref->trailer)
128
{
129
pdf_drop_obj(ctx, xref->pre_repair_trailer);
130
xref->pre_repair_trailer = xref->trailer;
131
}
132
xref->trailer = pdf_keep_obj(ctx, trailer);
133
}
134
135
int pdf_xref_len(fz_context *ctx, pdf_document *doc)
136
{
137
return doc->max_xref_len;
138
}
139
140
/* Ensure that the given xref has a single subsection
141
* that covers the entire range. */
142
static void
143
ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which)
144
{
145
pdf_xref *xref = &doc->xref_sections[which];
146
pdf_xref_subsec *sub = xref->subsec;
147
pdf_xref_subsec *new_sub;
148
149
if (num < xref->num_objects)
150
num = xref->num_objects;
151
152
if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num)
153
return;
154
155
new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
156
fz_try(ctx)
157
{
158
new_sub->table = fz_calloc(ctx, num, sizeof(pdf_xref_entry));
159
new_sub->start = 0;
160
new_sub->len = num;
161
new_sub->next = NULL;
162
}
163
fz_catch(ctx)
164
{
165
fz_free(ctx, new_sub);
166
fz_rethrow(ctx);
167
}
168
169
/* Move objects over to the new subsection and destroy the old
170
* ones */
171
sub = xref->subsec;
172
while (sub != NULL)
173
{
174
pdf_xref_subsec *next = sub->next;
175
int i;
176
177
for (i = 0; i < sub->len; i++)
178
{
179
new_sub->table[i+sub->start] = sub->table[i];
180
}
181
fz_free(ctx, sub->table);
182
fz_free(ctx, sub);
183
sub = next;
184
}
185
xref->num_objects = num;
186
xref->subsec = new_sub;
187
if (doc->max_xref_len < num)
188
extend_xref_index(ctx, doc, num);
189
}
190
191
/* Used while reading the individual xref sections from a file */
192
pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num)
193
{
194
/* Return an entry within the xref currently being populated */
195
pdf_xref *xref;
196
pdf_xref_subsec *sub;
197
198
if (doc->num_xref_sections == 0)
199
{
200
doc->xref_sections = fz_calloc(ctx, 1, sizeof(pdf_xref));
201
doc->num_xref_sections = 1;
202
}
203
204
/* Prevent accidental heap underflow */
205
if (num < 0)
206
fz_throw(ctx, FZ_ERROR_GENERIC, "object number must not be negative (%d)", num);
207
208
/* Return the pointer to the entry in the last section. */
209
xref = &doc->xref_sections[doc->num_xref_sections-1];
210
211
for (sub = xref->subsec; sub != NULL; sub = sub->next)
212
{
213
if (num >= sub->start && num < sub->start + sub->len)
214
return &sub->table[num-sub->start];
215
}
216
217
/* We've been asked for an object that's not in a subsec. */
218
ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1);
219
xref = &doc->xref_sections[doc->num_xref_sections-1];
220
sub = xref->subsec;
221
222
return &sub->table[num-sub->start];
223
}
224
225
/* Used after loading a document to access entries */
226
/* This will never throw anything, or return NULL if it is
227
* only asked to return objects in range within a 'solid'
228
* xref. */
229
pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
230
{
231
pdf_xref *xref;
232
pdf_xref_subsec *sub;
233
int j;
234
235
if (i < 0)
236
fz_throw(ctx, FZ_ERROR_GENERIC, "Negative object number requested");
237
238
if (i <= doc->max_xref_len)
239
j = doc->xref_index[i];
240
else
241
j = 0;
242
243
/* Find the first xref section where the entry is defined. */
244
for (; j < doc->num_xref_sections; j++)
245
{
246
xref = &doc->xref_sections[j];
247
248
if (i < xref->num_objects)
249
{
250
for (sub = xref->subsec; sub != NULL; sub = sub->next)
251
{
252
pdf_xref_entry *entry;
253
254
if (i < sub->start || i >= sub->start + sub->len)
255
continue;
256
257
entry = &sub->table[i - sub->start];
258
if (entry->type)
259
{
260
doc->xref_index[i] = j;
261
return entry;
262
}
263
}
264
}
265
}
266
267
/* Didn't find the entry in any section. Return the entry from
268
* the final section. */
269
doc->xref_index[i] = 0;
270
if (i < xref->num_objects)
271
{
272
xref = &doc->xref_sections[0];
273
for (sub = xref->subsec; sub != NULL; sub = sub->next)
274
{
275
if (i >= sub->start && i < sub->start + sub->len)
276
return &sub->table[i - sub->start];
277
}
278
}
279
280
/* At this point, we solidify the xref. This ensures that we
281
* can return a pointer. This is the only case where this function
282
* might throw an exception, and it will never happen when we are
283
* working within a 'solid' xref. */
284
ensure_solid_xref(ctx, doc, i+1, 0);
285
xref = &doc->xref_sections[0];
286
sub = xref->subsec;
287
return &sub->table[i - sub->start];
288
}
289
290
/*
291
Ensure we have an incremental xref section where we can store
292
updated versions of indirect objects. This is a new xref section
293
consisting of a single xref subsection.
294
*/
295
static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc)
296
{
297
298
if (!doc->xref_altered)
299
{
300
pdf_xref *xref = &doc->xref_sections[0];
301
pdf_xref *pxref;
302
pdf_xref_entry *new_table = fz_calloc(ctx, xref->num_objects, sizeof(pdf_xref_entry));
303
pdf_xref_subsec *sub;
304
pdf_obj *trailer = NULL;
305
int i;
306
307
fz_var(trailer);
308
fz_try(ctx)
309
{
310
sub = fz_malloc_struct(ctx, pdf_xref_subsec);
311
trailer = pdf_copy_dict(ctx, xref->trailer);
312
doc->xref_sections = fz_resize_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, sizeof(pdf_xref));
313
xref = &doc->xref_sections[0];
314
pxref = &doc->xref_sections[1];
315
memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
316
/* xref->num_objects is already correct */
317
xref->subsec = sub;
318
xref->trailer = trailer;
319
xref->pre_repair_trailer = NULL;
320
sub->next = NULL;
321
sub->len = xref->num_objects;
322
sub->start = 0;
323
sub->table = new_table;
324
doc->num_xref_sections++;
325
doc->xref_altered = 1;
326
}
327
fz_catch(ctx)
328
{
329
fz_free(ctx, new_table);
330
pdf_drop_obj(ctx, trailer);
331
fz_rethrow(ctx);
332
}
333
334
/* Update the xref_index */
335
for (i = 0; i < doc->max_xref_len; i++)
336
{
337
doc->xref_index[i]++;
338
}
339
}
340
}
341
342
/* Used when altering a document */
343
static pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i)
344
{
345
pdf_xref *xref;
346
pdf_xref_subsec *sub;
347
348
/* Make a new final xref section if we haven't already */
349
ensure_incremental_xref(ctx, doc);
350
351
xref = &doc->xref_sections[0];
352
if (i >= xref->num_objects)
353
pdf_resize_xref(ctx, doc, i + 1);
354
355
sub = xref->subsec;
356
assert(sub != NULL && sub->next == NULL);
357
assert(i >= sub->start && i < sub->start + sub->len);
358
doc->xref_index[i] = 0;
359
return &sub->table[i - sub->start];
360
}
361
362
int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num)
363
{
364
pdf_xref *xref = &doc->xref_sections[0];
365
pdf_xref_subsec *sub = xref->subsec;
366
367
assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0);
368
369
return doc->xref_altered && num < xref->num_objects && sub->table[num].type;
370
}
371
372
/* Ensure that the current populating xref has a single subsection
373
* that covers the entire range. */
374
void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num)
375
{
376
if (doc->num_xref_sections == 0)
377
pdf_populate_next_xref_level(ctx, doc);
378
379
ensure_solid_xref(ctx, doc, num, doc->num_xref_sections-1);
380
}
381
382
/* Ensure that an object has been cloned into the incremental xref section */
383
void pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num)
384
{
385
pdf_xref_entry *new_entry, *old_entry;
386
pdf_xref_subsec *sub = NULL;
387
int i;
388
389
/* Make sure we have created an xref section for incremental updates */
390
ensure_incremental_xref(ctx, doc);
391
392
/* Search for the section that contains this object */
393
for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
394
{
395
pdf_xref *xref = &doc->xref_sections[i];
396
397
if (num < 0 && num >= xref->num_objects)
398
break;
399
for (sub = xref->subsec; sub != NULL; sub = sub->next)
400
{
401
if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
402
break;
403
}
404
if (sub != NULL)
405
break;
406
}
407
/* sub == NULL implies we did not find it */
408
409
/* If we don't find it, or it's already in the incremental section, return */
410
if (i == 0 || sub == NULL)
411
return;
412
413
/* Move the object to the incremental section */
414
doc->xref_index[num] = 0;
415
old_entry = &sub->table[num - sub->start];
416
new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
417
*new_entry = *old_entry;
418
old_entry->obj = NULL;
419
old_entry->stm_buf = NULL;
420
}
421
422
void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n)
423
{
424
pdf_xref *xref = NULL;
425
pdf_xref_subsec *sub;
426
pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
427
428
fz_var(xref);
429
fz_try(ctx)
430
{
431
doc->xref_index = fz_calloc(ctx, n, sizeof(int));
432
xref = fz_malloc_struct(ctx, pdf_xref);
433
sub = fz_malloc_struct(ctx, pdf_xref_subsec);
434
435
/* The new table completely replaces the previous separate sections */
436
pdf_drop_xref_sections(ctx, doc);
437
438
sub->table = entries;
439
sub->start = 0;
440
sub->len = n;
441
xref->subsec = sub;
442
xref->num_objects = n;
443
xref->trailer = trailer;
444
trailer = NULL;
445
446
doc->xref_sections = xref;
447
doc->num_xref_sections = 1;
448
doc->max_xref_len = n;
449
450
memset(doc->xref_index, 0, sizeof(int)*doc->max_xref_len);
451
}
452
fz_catch(ctx)
453
{
454
fz_free(ctx, xref);
455
pdf_drop_obj(ctx, trailer);
456
fz_rethrow(ctx);
457
}
458
}
459
460
/*
461
* magic version tag and startxref
462
*/
463
464
static void
465
pdf_load_version(fz_context *ctx, pdf_document *doc)
466
{
467
char buf[20];
468
469
fz_seek(ctx, doc->file, 0, SEEK_SET);
470
fz_read_line(ctx, doc->file, buf, sizeof buf);
471
if (memcmp(buf, "%PDF-", 5) != 0)
472
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize version marker");
473
474
doc->version = 10 * (fz_atof(buf+5) + 0.05);
475
}
476
477
static void
478
pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
479
{
480
unsigned char buf[1024];
481
int t, n;
482
int i;
483
484
fz_seek(ctx, doc->file, 0, SEEK_END);
485
486
doc->file_size = fz_tell(ctx, doc->file);
487
488
t = fz_maxi(0, doc->file_size - (int)sizeof buf);
489
fz_seek(ctx, doc->file, t, SEEK_SET);
490
491
n = fz_read(ctx, doc->file, buf, sizeof buf);
492
493
for (i = n - 9; i >= 0; i--)
494
{
495
if (memcmp(buf + i, "startxref", 9) == 0)
496
{
497
i += 9;
498
while (i < n && iswhite(buf[i]))
499
i ++;
500
doc->startxref = 0;
501
while (i < n && buf[i] >= '0' && buf[i] <= '9')
502
doc->startxref = doc->startxref * 10 + (buf[i++] - '0');
503
if (doc->startxref != 0)
504
return;
505
break;
506
}
507
}
508
509
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref");
510
}
511
512
/*
513
* trailer dictionary
514
*/
515
516
static int
517
pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
518
{
519
int len;
520
char *s;
521
int t;
522
pdf_token tok;
523
int c;
524
int size;
525
int ofs;
526
pdf_obj *trailer = NULL;
527
528
fz_var(trailer);
529
530
/* Record the current file read offset so that we can reinstate it */
531
ofs = fz_tell(ctx, doc->file);
532
533
fz_read_line(ctx, doc->file, buf->scratch, buf->size);
534
if (strncmp(buf->scratch, "xref", 4) != 0)
535
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
536
537
while (1)
538
{
539
c = fz_peek_byte(ctx, doc->file);
540
if (!(c >= '0' && c <= '9'))
541
break;
542
543
fz_read_line(ctx, doc->file, buf->scratch, buf->size);
544
s = buf->scratch;
545
fz_strsep(&s, " "); /* ignore ofs */
546
if (!s)
547
fz_throw(ctx, FZ_ERROR_GENERIC, "invalid range marker in xref");
548
len = fz_atoi(fz_strsep(&s, " "));
549
if (len < 0)
550
fz_throw(ctx, FZ_ERROR_GENERIC, "xref range marker must be positive");
551
552
/* broken pdfs where the section is not on a separate line */
553
if (s && *s != '\0')
554
fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR);
555
556
t = fz_tell(ctx, doc->file);
557
if (t < 0)
558
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
559
if (len > (INT_MAX - t) / 20)
560
fz_throw(ctx, FZ_ERROR_GENERIC, "xref has too many entries");
561
562
fz_seek(ctx, doc->file, t + 20 * len, SEEK_SET);
563
}
564
565
fz_try(ctx)
566
{
567
tok = pdf_lex(ctx, doc->file, buf);
568
if (tok != PDF_TOK_TRAILER)
569
fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
570
571
tok = pdf_lex(ctx, doc->file, buf);
572
if (tok != PDF_TOK_OPEN_DICT)
573
fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
574
575
trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
576
577
size = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Size));
578
if (!size)
579
fz_throw(ctx, FZ_ERROR_GENERIC, "trailer missing Size entry");
580
}
581
fz_always(ctx)
582
{
583
pdf_drop_obj(ctx, trailer);
584
}
585
fz_catch(ctx)
586
{
587
fz_rethrow_message(ctx, "cannot parse trailer");
588
}
589
590
fz_seek(ctx, doc->file, ofs, SEEK_SET);
591
592
return size;
593
}
594
595
pdf_obj *
596
pdf_new_ref(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
597
{
598
int num = pdf_create_object(ctx, doc);
599
pdf_update_object(ctx, doc, num, obj);
600
return pdf_new_indirect(ctx, doc, num, 0);
601
}
602
603
static pdf_xref_entry *
604
pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int ofs, int len)
605
{
606
pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
607
pdf_xref_subsec *sub;
608
int new_max;
609
610
/* Different cases here. Case 1) We might be asking for a
611
* subsection (or a subset of a subsection) that we already
612
* have - Just return it. Case 2) We might be asking for a
613
* completely new subsection - Create it and return it.
614
* Case 3) We might have an overlapping one - Create a 'solid'
615
* subsection and return that. */
616
617
/* Sanity check */
618
for (sub = xref->subsec; sub != NULL; sub = sub->next)
619
{
620
if (ofs >= sub->start && ofs + len <= sub->start + sub->len)
621
return &sub->table[ofs-sub->start]; /* Case 1 */
622
if (ofs + len > sub->start && ofs <= sub->start + sub->len)
623
break; /* Case 3 */
624
}
625
626
new_max = xref->num_objects;
627
if (new_max < ofs + len)
628
new_max = ofs + len;
629
630
if (sub == NULL)
631
{
632
/* Case 2 */
633
sub = fz_malloc_struct(ctx, pdf_xref_subsec);
634
fz_try(ctx)
635
{
636
sub->table = fz_calloc(ctx, len, sizeof(pdf_xref_entry));
637
sub->start = ofs;
638
sub->len = len;
639
sub->next = xref->subsec;
640
xref->subsec = sub;
641
}
642
fz_catch(ctx)
643
{
644
fz_free(ctx, sub);
645
fz_rethrow(ctx);
646
}
647
xref->num_objects = new_max;
648
if (doc->max_xref_len < new_max)
649
extend_xref_index(ctx, doc, new_max);
650
}
651
else
652
{
653
/* Case 3 */
654
ensure_solid_xref(ctx, doc, new_max, doc->num_xref_sections-1);
655
xref = &doc->xref_sections[doc->num_xref_sections-1];
656
sub = xref->subsec;
657
}
658
return &sub->table[ofs-sub->start];
659
}
660
661
static pdf_obj *
662
pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
663
{
664
fz_stream *file = doc->file;
665
666
int ofs, len;
667
char *s;
668
int n;
669
pdf_token tok;
670
int i;
671
int c;
672
pdf_obj *trailer;
673
int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
674
pdf_xref_entry *table;
675
676
fz_read_line(ctx, file, buf->scratch, buf->size);
677
if (strncmp(buf->scratch, "xref", 4) != 0)
678
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
679
680
while (1)
681
{
682
c = fz_peek_byte(ctx, file);
683
if (!(c >= '0' && c <= '9'))
684
break;
685
686
fz_read_line(ctx, file, buf->scratch, buf->size);
687
s = buf->scratch;
688
ofs = fz_atoi(fz_strsep(&s, " "));
689
len = fz_atoi(fz_strsep(&s, " "));
690
691
/* broken pdfs where the section is not on a separate line */
692
if (s && *s != '\0')
693
{
694
fz_warn(ctx, "broken xref section. proceeding anyway.");
695
fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR);
696
}
697
698
if (ofs < 0)
699
fz_throw(ctx, FZ_ERROR_GENERIC, "out of range object num in xref: %d", ofs);
700
701
/* broken pdfs where size in trailer undershoots entries in xref sections */
702
if (ofs + len > xref_len)
703
{
704
fz_warn(ctx, "broken xref section, proceeding anyway.");
705
}
706
707
table = pdf_xref_find_subsection(ctx, doc, ofs, len);
708
709
for (i = ofs; i < ofs + len; i++)
710
{
711
pdf_xref_entry *entry = &table[i-ofs];
712
n = fz_read(ctx, file, (unsigned char *) buf->scratch, 20);
713
if (n != 20)
714
fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected EOF in xref table");
715
if (!entry->type)
716
{
717
s = buf->scratch;
718
719
/* broken pdfs where line start with white space */
720
while (*s != '\0' && iswhite(*s))
721
s++;
722
723
entry->ofs = atoi(s);
724
entry->gen = atoi(s + 11);
725
entry->type = s[17];
726
if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
727
fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], i, entry->gen);
728
}
729
}
730
}
731
732
fz_try(ctx)
733
{
734
tok = pdf_lex(ctx, file, buf);
735
if (tok != PDF_TOK_TRAILER)
736
fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
737
738
tok = pdf_lex(ctx, file, buf);
739
if (tok != PDF_TOK_OPEN_DICT)
740
fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
741
742
trailer = pdf_parse_dict(ctx, doc, file, buf);
743
}
744
fz_catch(ctx)
745
{
746
fz_rethrow_message(ctx, "cannot parse trailer");
747
}
748
return trailer;
749
}
750
751
static void
752
pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
753
{
754
pdf_xref_entry *table;
755
int i, n;
756
757
if (i0 < 0 || i1 < 0)
758
fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream entry index");
759
//if (i0 + i1 > pdf_xref_len(ctx, doc))
760
// fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream has too many entries");
761
762
table = pdf_xref_find_subsection(ctx, doc, i0, i1);
763
for (i = i0; i < i0 + i1; i++)
764
{
765
pdf_xref_entry *entry = &table[i-i0];
766
int a = 0;
767
int b = 0;
768
int c = 0;
769
770
if (fz_is_eof(ctx, stm))
771
fz_throw(ctx, FZ_ERROR_GENERIC, "truncated xref stream");
772
773
for (n = 0; n < w0; n++)
774
a = (a << 8) + fz_read_byte(ctx, stm);
775
for (n = 0; n < w1; n++)
776
b = (b << 8) + fz_read_byte(ctx, stm);
777
for (n = 0; n < w2; n++)
778
c = (c << 8) + fz_read_byte(ctx, stm);
779
780
if (!entry->type)
781
{
782
int t = w0 ? a : 1;
783
entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
784
entry->ofs = w1 ? b : 0;
785
entry->gen = w2 ? c : 0;
786
}
787
}
788
789
doc->has_xref_streams = 1;
790
}
791
792
/* Entered with file locked, remains locked throughout. */
793
static pdf_obj *
794
pdf_read_new_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
795
{
796
fz_stream *stm = NULL;
797
pdf_obj *trailer = NULL;
798
pdf_obj *index = NULL;
799
pdf_obj *obj = NULL;
800
int num, gen, ofs, stm_ofs;
801
int size, w0, w1, w2;
802
int t;
803
804
fz_var(trailer);
805
fz_var(stm);
806
807
fz_try(ctx)
808
{
809
ofs = fz_tell(ctx, doc->file);
810
trailer = pdf_parse_ind_obj(ctx, doc, doc->file, buf, &num, &gen, &stm_ofs, NULL);
811
}
812
fz_catch(ctx)
813
{
814
pdf_drop_obj(ctx, trailer);
815
fz_rethrow_message(ctx, "cannot parse compressed xref stream object");
816
}
817
818
fz_try(ctx)
819
{
820
pdf_xref_entry *entry;
821
822
obj = pdf_dict_get(ctx, trailer, PDF_NAME_Size);
823
if (!obj)
824
fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing Size entry (%d %d R)", num, gen);
825
826
size = pdf_to_int(ctx, obj);
827
828
obj = pdf_dict_get(ctx, trailer, PDF_NAME_W);
829
if (!obj)
830
fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing W entry (%d %d R)", num, gen);
831
w0 = pdf_to_int(ctx, pdf_array_get(ctx, obj, 0));
832
w1 = pdf_to_int(ctx, pdf_array_get(ctx, obj, 1));
833
w2 = pdf_to_int(ctx, pdf_array_get(ctx, obj, 2));
834
835
if (w0 < 0)
836
fz_warn(ctx, "xref stream objects have corrupt type");
837
if (w1 < 0)
838
fz_warn(ctx, "xref stream objects have corrupt offset");
839
if (w2 < 0)
840
fz_warn(ctx, "xref stream objects have corrupt generation");
841
842
w0 = w0 < 0 ? 0 : w0;
843
w1 = w1 < 0 ? 0 : w1;
844
w2 = w2 < 0 ? 0 : w2;
845
846
index = pdf_dict_get(ctx, trailer, PDF_NAME_Index);
847
848
stm = pdf_open_stream_with_offset(ctx, doc, num, gen, trailer, stm_ofs);
849
850
if (!index)
851
{
852
pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2);
853
}
854
else
855
{
856
int n = pdf_array_len(ctx, index);
857
for (t = 0; t < n; t += 2)
858
{
859
int i0 = pdf_to_int(ctx, pdf_array_get(ctx, index, t + 0));
860
int i1 = pdf_to_int(ctx, pdf_array_get(ctx, index, t + 1));
861
pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
862
}
863
}
864
entry = pdf_get_populating_xref_entry(ctx, doc, num);
865
entry->ofs = ofs;
866
entry->gen = gen;
867
entry->stm_ofs = stm_ofs;
868
pdf_drop_obj(ctx, entry->obj);
869
entry->obj = pdf_keep_obj(ctx, trailer);
870
entry->type = 'n';
871
}
872
fz_always(ctx)
873
{
874
fz_drop_stream(ctx, stm);
875
}
876
fz_catch(ctx)
877
{
878
pdf_drop_obj(ctx, trailer);
879
fz_rethrow(ctx);
880
}
881
882
return trailer;
883
}
884
885
static pdf_obj *
886
pdf_read_xref(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf)
887
{
888
pdf_obj *trailer;
889
int c;
890
891
fz_seek(ctx, doc->file, ofs, SEEK_SET);
892
893
while (iswhite(fz_peek_byte(ctx, doc->file)))
894
fz_read_byte(ctx, doc->file);
895
896
fz_try(ctx)
897
{
898
c = fz_peek_byte(ctx, doc->file);
899
if (c == 'x')
900
trailer = pdf_read_old_xref(ctx, doc, buf);
901
else if (c >= '0' && c <= '9')
902
trailer = pdf_read_new_xref(ctx, doc, buf);
903
else
904
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize xref format");
905
}
906
fz_catch(ctx)
907
{
908
fz_rethrow_message(ctx, "cannot read xref (ofs=%d)", ofs);
909
}
910
return trailer;
911
}
912
913
typedef struct ofs_list_s ofs_list;
914
915
struct ofs_list_s
916
{
917
int max;
918
int len;
919
int *list;
920
};
921
922
static int
923
read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets)
924
{
925
pdf_obj *trailer = NULL;
926
int xrefstmofs = 0;
927
int prevofs = 0;
928
929
fz_var(trailer);
930
931
fz_try(ctx)
932
{
933
int i;
934
/* Avoid potential infinite recursion */
935
for (i = 0; i < offsets->len; i ++)
936
{
937
if (offsets->list[i] == ofs)
938
break;
939
}
940
if (i < offsets->len)
941
{
942
fz_warn(ctx, "ignoring xref recursion with offset %d", ofs);
943
break;
944
}
945
if (offsets->len == offsets->max)
946
{
947
offsets->list = fz_resize_array(ctx, offsets->list, offsets->max*2, sizeof(int));
948
offsets->max *= 2;
949
}
950
offsets->list[offsets->len++] = ofs;
951
952
trailer = pdf_read_xref(ctx, doc, ofs, buf);
953
954
pdf_set_populating_xref_trailer(ctx, doc, trailer);
955
956
/* FIXME: do we overwrite free entries properly? */
957
/* FIXME: Does this work properly with progression? */
958
xrefstmofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_XRefStm));
959
if (xrefstmofs)
960
{
961
if (xrefstmofs < 0)
962
fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset");
963
964
/*
965
Read the XRefStm stream, but throw away the resulting trailer. We do not
966
follow any Prev tag therein, as specified on Page 108 of the PDF reference
967
1.7
968
*/
969
pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs, buf));
970
}
971
972
prevofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Prev));
973
if (prevofs < 0)
974
fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset for previous xref stream");
975
}
976
fz_always(ctx)
977
{
978
pdf_drop_obj(ctx, trailer);
979
}
980
fz_catch(ctx)
981
{
982
fz_rethrow_message(ctx, "cannot read xref at offset %d", ofs);
983
}
984
985
return prevofs;
986
}
987
988
static void
989
pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_previous)
990
{
991
ofs_list list;
992
993
list.len = 0;
994
list.max = 10;
995
list.list = fz_malloc_array(ctx, 10, sizeof(int));
996
fz_try(ctx)
997
{
998
while(ofs)
999
{
1000
pdf_populate_next_xref_level(ctx, doc);
1001
ofs = read_xref_section(ctx, doc, ofs, buf, &list);
1002
if (!read_previous)
1003
break;
1004
}
1005
}
1006
fz_always(ctx)
1007
{
1008
fz_free(ctx, list.list);
1009
}
1010
fz_catch(ctx)
1011
{
1012
fz_rethrow(ctx);
1013
}
1014
}
1015
1016
static void
1017
pdf_prime_xref_index(fz_context *ctx, pdf_document *doc)
1018
{
1019
int i, j;
1020
int *idx = doc->xref_index;
1021
1022
for (i = doc->num_xref_sections-1; i >= 0; i--)
1023
{
1024
pdf_xref *xref = &doc->xref_sections[i];
1025
pdf_xref_subsec *subsec = xref->subsec;
1026
while (subsec != NULL)
1027
{
1028
int start = subsec->start;
1029
int end = subsec->start + subsec->len;
1030
for (j = start; j < end; j++)
1031
{
1032
char t = subsec->table[j-start].type;
1033
if (t != 0 && t != 'f')
1034
idx[j] = i;
1035
}
1036
1037
subsec = subsec->next;
1038
}
1039
}
1040
}
1041
1042
/*
1043
* load xref tables from pdf
1044
*
1045
* File locked on entry, throughout and on exit.
1046
*/
1047
1048
static void
1049
pdf_load_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
1050
{
1051
int i;
1052
int xref_len;
1053
pdf_xref_entry *entry;
1054
1055
pdf_read_start_xref(ctx, doc);
1056
1057
pdf_read_xref_sections(ctx, doc, doc->startxref, buf, 1);
1058
1059
if (pdf_xref_len(ctx, doc) == 0)
1060
fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty");
1061
1062
pdf_prime_xref_index(ctx, doc);
1063
1064
entry = pdf_get_xref_entry(ctx, doc, 0);
1065
/* broken pdfs where first object is missing */
1066
if (!entry->type)
1067
{
1068
entry->type = 'f';
1069
entry->gen = 65535;
1070
}
1071
/* broken pdfs where first object is not free */
1072
else if (entry->type != 'f')
1073
fz_throw(ctx, FZ_ERROR_GENERIC, "first object in xref is not free");
1074
1075
/* broken pdfs where object offsets are out of range */
1076
xref_len = pdf_xref_len(ctx, doc);
1077
for (i = 0; i < xref_len; i++)
1078
{
1079
pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
1080
if (entry->type == 'n')
1081
{
1082
/* Special case code: "0000000000 * n" means free,
1083
* according to some producers (inc Quartz) */
1084
if (entry->ofs == 0)
1085
entry->type = 'f';
1086
else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
1087
fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", entry->ofs, i);
1088
}
1089
if (entry->type == 'o')
1090
if (entry->ofs <= 0 || entry->ofs >= xref_len || pdf_get_xref_entry(ctx, doc, entry->ofs)->type != 'n')
1091
fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", entry->ofs, i);
1092
}
1093
}
1094
1095
static void
1096
pdf_load_linear(fz_context *ctx, pdf_document *doc)
1097
{
1098
pdf_obj *dict = NULL;
1099
pdf_obj *hint = NULL;
1100
pdf_obj *o;
1101
int num, gen, stmofs, lin, len;
1102
1103
fz_var(dict);
1104
fz_var(hint);
1105
1106
fz_try(ctx)
1107
{
1108
pdf_xref_entry *entry;
1109
1110
dict = pdf_parse_ind_obj(ctx, doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs, NULL);
1111
if (!pdf_is_dict(ctx, dict))
1112
fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1113
o = pdf_dict_get(ctx, dict, PDF_NAME_Linearized);
1114
if (o == NULL)
1115
fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1116
lin = pdf_to_int(ctx, o);
1117
if (lin != 1)
1118
fz_throw(ctx, FZ_ERROR_GENERIC, "Unexpected version of Linearized tag (%d)", lin);
1119
len = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_L));
1120
if (len != doc->file_length)
1121
fz_throw(ctx, FZ_ERROR_GENERIC, "File has been updated since linearization");
1122
1123
pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), &doc->lexbuf.base, 0);
1124
1125
doc->page_count = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_N));
1126
doc->linear_page_refs = fz_resize_array(ctx, doc->linear_page_refs, doc->page_count, sizeof(pdf_obj *));
1127
memset(doc->linear_page_refs, 0, doc->page_count * sizeof(pdf_obj*));
1128
doc->linear_obj = dict;
1129
doc->linear_pos = fz_tell(ctx, doc->file);
1130
doc->linear_page1_obj_num = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_O));
1131
doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0);
1132
doc->linear_page_num = 0;
1133
hint = pdf_dict_get(ctx, dict, PDF_NAME_H);
1134
doc->hint_object_offset = pdf_to_int(ctx, pdf_array_get(ctx, hint, 0));
1135
doc->hint_object_length = pdf_to_int(ctx, pdf_array_get(ctx, hint, 1));
1136
1137
entry = pdf_get_populating_xref_entry(ctx, doc, 0);
1138
entry->type = 'f';
1139
}
1140
fz_catch(ctx)
1141
{
1142
pdf_drop_obj(ctx, dict);
1143
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1144
/* Drop back to non linearized reading mode */
1145
doc->file_reading_linearly = 0;
1146
}
1147
}
1148
1149
void
1150
pdf_ocg_set_config(fz_context *ctx, pdf_document *doc, int config)
1151
{
1152
int i, j, len, len2;
1153
pdf_ocg_descriptor *desc = doc->ocg;
1154
pdf_obj *obj, *cobj;
1155
pdf_obj *name;
1156
1157
obj = pdf_dict_get(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root), PDF_NAME_OCProperties);
1158
if (!obj)
1159
{
1160
if (config == 0)
1161
return;
1162
else
1163
fz_throw(ctx, FZ_ERROR_GENERIC, "Unknown OCG config (None known!)");
1164
}
1165
if (config == 0)
1166
{
1167
cobj = pdf_dict_get(ctx, obj, PDF_NAME_D);
1168
if (!cobj)
1169
fz_throw(ctx, FZ_ERROR_GENERIC, "No default OCG config");
1170
}
1171
else
1172
{
1173
cobj = pdf_array_get(ctx, pdf_dict_get(ctx, obj, PDF_NAME_Configs), config);
1174
if (!cobj)
1175
fz_throw(ctx, FZ_ERROR_GENERIC, "Illegal OCG config");
1176
}
1177
1178
pdf_drop_obj(ctx, desc->intent);
1179
desc->intent = pdf_dict_get(ctx, cobj, PDF_NAME_Intent);
1180
if (desc->intent)
1181
pdf_keep_obj(ctx, desc->intent);
1182
1183
len = desc->len;
1184
name = pdf_dict_get(ctx, cobj, PDF_NAME_BaseState);
1185
if (pdf_name_eq(ctx, name, PDF_NAME_Unchanged))
1186
{
1187
/* Do nothing */
1188
}
1189
else if (pdf_name_eq(ctx, name, PDF_NAME_OFF))
1190
{
1191
for (i = 0; i < len; i++)
1192
{
1193
desc->ocgs[i].state = 0;
1194
}
1195
}
1196
else /* Default to ON */
1197
{
1198
for (i = 0; i < len; i++)
1199
{
1200
desc->ocgs[i].state = 1;
1201
}
1202
}
1203
1204
obj = pdf_dict_get(ctx, cobj, PDF_NAME_ON);
1205
len2 = pdf_array_len(ctx, obj);
1206
for (i = 0; i < len2; i++)
1207
{
1208
pdf_obj *o = pdf_array_get(ctx, obj, i);
1209
int n = pdf_to_num(ctx, o);
1210
int g = pdf_to_gen(ctx, o);
1211
for (j=0; j < len; j++)
1212
{
1213
if (desc->ocgs[j].num == n && desc->ocgs[j].gen == g)
1214
{
1215
desc->ocgs[j].state = 1;
1216
break;
1217
}
1218
}
1219
}
1220
1221
obj = pdf_dict_get(ctx, cobj, PDF_NAME_OFF);
1222
len2 = pdf_array_len(ctx, obj);
1223
for (i = 0; i < len2; i++)
1224
{
1225
pdf_obj *o = pdf_array_get(ctx, obj, i);
1226
int n = pdf_to_num(ctx, o);
1227
int g = pdf_to_gen(ctx, o);
1228
for (j=0; j < len; j++)
1229
{
1230
if (desc->ocgs[j].num == n && desc->ocgs[j].gen == g)
1231
{
1232
desc->ocgs[j].state = 0;
1233
break;
1234
}
1235
}
1236
}
1237
1238
/* FIXME: Should make 'num configs' available in the descriptor. */
1239
/* FIXME: Should copy out 'Intent' here into the descriptor, and remove
1240
* csi->intent in favour of that. */
1241
/* FIXME: Should copy 'AS' into the descriptor, and visibility
1242
* decisions should respect it. */
1243
/* FIXME: Make 'Order' available via the descriptor (when we have an
1244
* app that needs it) */
1245
/* FIXME: Make 'ListMode' available via the descriptor (when we have
1246
* an app that needs it) */
1247
/* FIXME: Make 'RBGroups' available via the descriptor (when we have
1248
* an app that needs it) */
1249
/* FIXME: Make 'Locked' available via the descriptor (when we have
1250
* an app that needs it) */
1251
}
1252
1253
static void
1254
pdf_read_ocg(fz_context *ctx, pdf_document *doc)
1255
{
1256
pdf_obj *obj, *ocg;
1257
int len, i;
1258
pdf_ocg_descriptor *desc;
1259
1260
fz_var(desc);
1261
1262
obj = pdf_dict_get(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root), PDF_NAME_OCProperties);
1263
if (!obj)
1264
return;
1265
ocg = pdf_dict_get(ctx, obj, PDF_NAME_OCGs);
1266
if (!ocg || !pdf_is_array(ctx, ocg))
1267
/* Not ever supposed to happen, but live with it. */
1268
return;
1269
len = pdf_array_len(ctx, ocg);
1270
fz_try(ctx)
1271
{
1272
desc = fz_calloc(ctx, 1, sizeof(*desc));
1273
desc->len = len;
1274
desc->ocgs = fz_calloc(ctx, len, sizeof(*desc->ocgs));
1275
desc->intent = NULL;
1276
for (i=0; i < len; i++)
1277
{
1278
pdf_obj *o = pdf_array_get(ctx, ocg, i);
1279
desc->ocgs[i].num = pdf_to_num(ctx, o);
1280
desc->ocgs[i].gen = pdf_to_gen(ctx, o);
1281
desc->ocgs[i].state = 1;
1282
}
1283
doc->ocg = desc;
1284
}
1285
fz_catch(ctx)
1286
{
1287
if (desc)
1288
fz_free(ctx, desc->ocgs);
1289
fz_free(ctx, desc);
1290
fz_rethrow(ctx);
1291
}
1292
1293
pdf_ocg_set_config(ctx, doc, 0);
1294
}
1295
1296
static void
1297
pdf_drop_ocg(fz_context *ctx, pdf_ocg_descriptor *desc)
1298
{
1299
if (!desc)
1300
return;
1301
1302
pdf_drop_obj(ctx, desc->intent);
1303
fz_free(ctx, desc->ocgs);
1304
fz_free(ctx, desc);
1305
}
1306
1307
/*
1308
* Initialize and load xref tables.
1309
* If password is not null, try to decrypt.
1310
*/
1311
1312
static void
1313
pdf_init_document(fz_context *ctx, pdf_document *doc)
1314
{
1315
pdf_obj *encrypt, *id;
1316
pdf_obj *dict = NULL;
1317
pdf_obj *obj;
1318
pdf_obj *nobj = NULL;
1319
int i, repaired = 0;
1320
1321
fz_var(dict);
1322
fz_var(nobj);
1323
1324
fz_try(ctx)
1325
{
1326
pdf_load_version(ctx, doc);
1327
1328
doc->file_length = fz_stream_meta(ctx, doc->file, FZ_STREAM_META_LENGTH, 0, NULL);
1329
if (doc->file_length < 0)
1330
doc->file_length = 0;
1331
1332
/* Check to see if we should work in progressive mode */
1333
if (fz_stream_meta(ctx, doc->file, FZ_STREAM_META_PROGRESSIVE, 0, NULL) > 0)
1334
doc->file_reading_linearly = 1;
1335
1336
/* Try to load the linearized file if we are in progressive
1337
* mode. */
1338
if (doc->file_reading_linearly)
1339
pdf_load_linear(ctx, doc);
1340
1341
/* If we aren't in progressive mode (or the linear load failed
1342
* and has set us back to non-progressive mode), load normally.
1343
*/
1344
if (!doc->file_reading_linearly)
1345
pdf_load_xref(ctx, doc, &doc->lexbuf.base);
1346
}
1347
fz_catch(ctx)
1348
{
1349
pdf_drop_xref_sections(ctx, doc);
1350
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1351
fz_warn(ctx, "trying to repair broken xref");
1352
repaired = 1;
1353
}
1354
1355
fz_try(ctx)
1356
{
1357
int hasroot, hasinfo;
1358
1359
if (repaired)
1360
{
1361
/* pdf_repair_xref may access xref_index, so reset it properly */
1362
memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len);
1363
pdf_repair_xref(ctx, doc);
1364
pdf_prime_xref_index(ctx, doc);
1365
}
1366
1367
encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Encrypt);
1368
id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_ID);
1369
if (pdf_is_dict(ctx, encrypt))
1370
doc->crypt = pdf_new_crypt(ctx, encrypt, id);
1371
1372
/* Allow lazy clients to read encrypted files with a blank password */
1373
pdf_authenticate_password(ctx, doc, "");
1374
1375
if (repaired)
1376
{
1377
int xref_len = pdf_xref_len(ctx, doc);
1378
pdf_repair_obj_stms(ctx, doc);
1379
1380
hasroot = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root) != NULL);
1381
hasinfo = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info) != NULL);
1382
1383
for (i = 1; i < xref_len; i++)
1384
{
1385
pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
1386
if (entry->type == 0 || entry->type == 'f')
1387
continue;
1388
1389
fz_try(ctx)
1390
{
1391
dict = pdf_load_object(ctx, doc, i, 0);
1392
}
1393
fz_catch(ctx)
1394
{
1395
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1396
fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
1397
continue;
1398
}
1399
1400
if (!hasroot)
1401
{
1402
obj = pdf_dict_get(ctx, dict, PDF_NAME_Type);
1403
if (pdf_name_eq(ctx, obj, PDF_NAME_Catalog))
1404
{
1405
nobj = pdf_new_indirect(ctx, doc, i, 0);
1406
pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, nobj);
1407
pdf_drop_obj(ctx, nobj);
1408
nobj = NULL;
1409
}
1410
}
1411
1412
if (!hasinfo)
1413
{
1414
if (pdf_dict_get(ctx, dict, PDF_NAME_Creator) || pdf_dict_get(ctx, dict, PDF_NAME_Producer))
1415
{
1416
nobj = pdf_new_indirect(ctx, doc, i, 0);
1417
pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info, nobj);
1418
pdf_drop_obj(ctx, nobj);
1419
nobj = NULL;
1420
}
1421
}
1422
1423
pdf_drop_obj(ctx, dict);
1424
dict = NULL;
1425
}
1426
1427
/* ensure that strings are not used in their repaired, non-decrypted form */
1428
if (doc->crypt)
1429
pdf_clear_xref(ctx, doc);
1430
}
1431
}
1432
fz_catch(ctx)
1433
{
1434
pdf_drop_obj(ctx, dict);
1435
pdf_drop_obj(ctx, nobj);
1436
fz_rethrow_message(ctx, "cannot open document");
1437
}
1438
1439
fz_try(ctx)
1440
{
1441
pdf_read_ocg(ctx, doc);
1442
}
1443
fz_catch(ctx)
1444
{
1445
fz_warn(ctx, "Ignoring Broken Optional Content");
1446
}
1447
1448
fz_try(ctx)
1449
{
1450
char *version_str;
1451
obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Version, NULL);
1452
version_str = pdf_to_name(ctx, obj);
1453
if (*version_str)
1454
{
1455
int version = 10 * (fz_atof(version_str) + 0.05);
1456
if (version > doc->version)
1457
doc->version = version;
1458
}
1459
}
1460
fz_catch(ctx) { }
1461
}
1462
1463
void
1464
pdf_close_document(fz_context *ctx, pdf_document *doc)
1465
{
1466
pdf_unsaved_sig *usig;
1467
int i;
1468
1469
if (!doc)
1470
return;
1471
1472
/* Type3 glyphs in the glyph cache can contain pdf_obj pointers
1473
* that we are about to destroy. Simplest solution is to bin the
1474
* glyph cache at this point. */
1475
fz_purge_glyph_cache(ctx);
1476
1477
if (doc->js)
1478
doc->drop_js(doc->js);
1479
1480
pdf_drop_xref_sections(ctx, doc);
1481
fz_free(ctx, doc->xref_index);
1482
1483
if (doc->focus_obj)
1484
pdf_drop_obj(ctx, doc->focus_obj);
1485
if (doc->file)
1486
fz_drop_stream(ctx, doc->file);
1487
if (doc->crypt)
1488
pdf_drop_crypt(ctx, doc->crypt);
1489
1490
pdf_drop_obj(ctx, doc->linear_obj);
1491
if (doc->linear_page_refs)
1492
{
1493
for (i=0; i < doc->page_count; i++)
1494
{
1495
pdf_drop_obj(ctx, doc->linear_page_refs[i]);
1496
}
1497
fz_free(ctx, doc->linear_page_refs);
1498
}
1499
fz_free(ctx, doc->hint_page);
1500
fz_free(ctx, doc->hint_shared_ref);
1501
fz_free(ctx, doc->hint_shared);
1502
fz_free(ctx, doc->hint_obj_offsets);
1503
1504
while ((usig = doc->unsaved_sigs) != NULL)
1505
{
1506
doc->unsaved_sigs = usig->next;
1507
pdf_drop_obj(ctx, usig->field);
1508
pdf_drop_signer(ctx, usig->signer);
1509
fz_free(ctx, usig);
1510
}
1511
1512
for (i=0; i < doc->num_type3_fonts; i++)
1513
{
1514
fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
1515
fz_drop_font(ctx, doc->type3_fonts[i]);
1516
}
1517
fz_free(ctx, doc->type3_fonts);
1518
1519
pdf_drop_ocg(ctx, doc->ocg);
1520
1521
fz_empty_store(ctx);
1522
1523
pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
1524
1525
fz_free(ctx, doc);
1526
}
1527
1528
void
1529
pdf_print_xref(fz_context *ctx, pdf_document *doc)
1530
{
1531
int i;
1532
int xref_len = pdf_xref_len(ctx, doc);
1533
printf("xref\n0 %d\n", xref_len);
1534
for (i = 0; i < xref_len; i++)
1535
{
1536
pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
1537
printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i,
1538
entry->ofs,
1539
entry->gen,
1540
entry->type ? entry->type : '-',
1541
entry->stm_ofs,
1542
entry->stm_buf);
1543
}
1544
}
1545
1546
/*
1547
* compressed object streams
1548
*/
1549
1550
static pdf_xref_entry *
1551
pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbuf *buf, int target)
1552
{
1553
fz_stream *stm = NULL;
1554
pdf_obj *objstm = NULL;
1555
int *numbuf = NULL;
1556
int *ofsbuf = NULL;
1557
1558
pdf_obj *obj;
1559
int first;
1560
int count;
1561
int i;
1562
pdf_token tok;
1563
pdf_xref_entry *ret_entry = NULL;
1564
1565
fz_var(numbuf);
1566
fz_var(ofsbuf);
1567
fz_var(objstm);
1568
fz_var(stm);
1569
1570
fz_try(ctx)
1571
{
1572
objstm = pdf_load_object(ctx, doc, num, gen);
1573
1574
count = pdf_to_int(ctx, pdf_dict_get(ctx, objstm, PDF_NAME_N));
1575
first = pdf_to_int(ctx, pdf_dict_get(ctx, objstm, PDF_NAME_First));
1576
1577
if (count < 0)
1578
fz_throw(ctx, FZ_ERROR_GENERIC, "negative number of objects in object stream");
1579
if (first < 0)
1580
fz_throw(ctx, FZ_ERROR_GENERIC, "first object in object stream resides outside stream");
1581
1582
numbuf = fz_calloc(ctx, count, sizeof(int));
1583
ofsbuf = fz_calloc(ctx, count, sizeof(int));
1584
1585
stm = pdf_open_stream(ctx, doc, num, gen);
1586
for (i = 0; i < count; i++)
1587
{
1588
tok = pdf_lex(ctx, stm, buf);
1589
if (tok != PDF_TOK_INT)
1590
fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen);
1591
numbuf[i] = buf->i;
1592
1593
tok = pdf_lex(ctx, stm, buf);
1594
if (tok != PDF_TOK_INT)
1595
fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen);
1596
ofsbuf[i] = buf->i;
1597
}
1598
1599
fz_seek(ctx, stm, first, SEEK_SET);
1600
1601
for (i = 0; i < count; i++)
1602
{
1603
int xref_len = pdf_xref_len(ctx, doc);
1604
pdf_xref_entry *entry;
1605
fz_seek(ctx, stm, first + ofsbuf[i], SEEK_SET);
1606
1607
obj = pdf_parse_stm_obj(ctx, doc, stm, buf);
1608
1609
if (numbuf[i] <= 0 || numbuf[i] >= xref_len)
1610
{
1611
pdf_drop_obj(ctx, obj);
1612
fz_throw(ctx, FZ_ERROR_GENERIC, "object id (%d 0 R) out of range (0..%d)", numbuf[i], xref_len - 1);
1613
}
1614
1615
entry = pdf_get_xref_entry(ctx, doc, numbuf[i]);
1616
1617
pdf_set_obj_parent(ctx, obj, numbuf[i]);
1618
1619
if (entry->type == 'o' && entry->ofs == num)
1620
{
1621
/* If we already have an entry for this object,
1622
* we'd like to drop it and use the new one -
1623
* but this means that anyone currently holding
1624
* a pointer to the old one will be left with a
1625
* stale pointer. Instead, we drop the new one
1626
* and trust that the old one is correct. */
1627
if (entry->obj)
1628
{
1629
if (pdf_objcmp(ctx, entry->obj, obj))
1630
fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
1631
pdf_drop_obj(ctx, obj);
1632
}
1633
else
1634
entry->obj = obj;
1635
if (numbuf[i] == target)
1636
ret_entry = entry;
1637
}
1638
else
1639
{
1640
pdf_drop_obj(ctx, obj);
1641
}
1642
}
1643
}
1644
fz_always(ctx)
1645
{
1646
fz_drop_stream(ctx, stm);
1647
fz_free(ctx, ofsbuf);
1648
fz_free(ctx, numbuf);
1649
pdf_drop_obj(ctx, objstm);
1650
}
1651
fz_catch(ctx)
1652
{
1653
fz_rethrow_message(ctx, "cannot open object stream (%d %d R)", num, gen);
1654
}
1655
return ret_entry;
1656
}
1657
1658
/*
1659
* object loading
1660
*/
1661
static int
1662
pdf_obj_read(fz_context *ctx, pdf_document *doc, int *offset, int *nump, pdf_obj **page)
1663
{
1664
pdf_lexbuf *buf = &doc->lexbuf.base;
1665
int num, numofs, gen, genofs, stmofs, tmpofs, tok;
1666
int xref_len;
1667
pdf_xref_entry *entry;
1668
int newtmpofs;
1669
1670
numofs = *offset;
1671
fz_seek(ctx, doc->file, numofs, SEEK_SET);
1672
1673
/* We expect to read 'num' here */
1674
tok = pdf_lex(ctx, doc->file, buf);
1675
genofs = fz_tell(ctx, doc->file);
1676
if (tok != PDF_TOK_INT)
1677
{
1678
/* Failed! */
1679
DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
1680
*offset = genofs;
1681
return tok == PDF_TOK_EOF;
1682
}
1683
*nump = num = buf->i;
1684
1685
/* We expect to read 'gen' here */
1686
tok = pdf_lex(ctx, doc->file, buf);
1687
tmpofs = fz_tell(ctx, doc->file);
1688
if (tok != PDF_TOK_INT)
1689
{
1690
/* Failed! */
1691
DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
1692
*offset = tmpofs;
1693
return tok == PDF_TOK_EOF;
1694
}
1695
gen = buf->i;
1696
1697
/* We expect to read 'obj' here */
1698
do
1699
{
1700
tmpofs = fz_tell(ctx, doc->file);
1701
tok = pdf_lex(ctx, doc->file, buf);
1702
if (tok == PDF_TOK_OBJ)
1703
break;
1704
if (tok != PDF_TOK_INT)
1705
{
1706
DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
1707
*offset = fz_tell(ctx, doc->file);
1708
return tok == PDF_TOK_EOF;
1709
}
1710
DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
1711
*nump = num = gen;
1712
numofs = genofs;
1713
gen = buf->i;
1714
genofs = tmpofs;
1715
}
1716
while (1);
1717
1718
/* Now we read the actual object */
1719
xref_len = pdf_xref_len(ctx, doc);
1720
1721
/* When we are reading a progressive file, we typically see:
1722
* File Header
1723
* obj m (Linearization params)
1724
* xref #1 (refers to objects m-n)
1725
* obj m+1
1726
* ...
1727
* obj n
1728
* obj 1
1729
* ...
1730
* obj n-1
1731
* xref #2
1732
*
1733
* The linearisation params are read elsewhere, hence
1734
* whenever we read an object it should just go into the
1735
* previous xref.
1736
*/
1737
tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs);
1738
1739
do /* So we can break out of it */
1740
{
1741
if (num <= 0 || num >= xref_len)
1742
{
1743
fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
1744
break;
1745
}
1746
if (gen != 0)
1747
{
1748
fz_warn(ctx, "Unexpected non zero generation number in linearized file");
1749
}
1750
entry = pdf_get_populating_xref_entry(ctx, doc, num);
1751
if (entry->type != 0)
1752
{
1753
DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
1754
break;
1755
}
1756
if (page && *page)
1757
{
1758
DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
1759
if (!entry->obj)
1760
entry->obj = pdf_keep_obj(ctx, *page);
1761
1762
if (doc->linear_page_refs[doc->linear_page_num] == NULL)
1763
doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
1764
}
1765
else
1766
{
1767
DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
1768
}
1769
entry->type = 'n';
1770
entry->gen = 0;
1771
entry->ofs = numofs;
1772
entry->stm_ofs = stmofs;
1773
}
1774
while (0);
1775
if (page && *page)
1776
doc->linear_page_num++;
1777
1778
if (tok == PDF_TOK_ENDOBJ)
1779
{
1780
*offset = fz_tell(ctx, doc->file);
1781
}
1782
else
1783
{
1784
*offset = newtmpofs;
1785
}
1786
return 0;
1787
}
1788
1789
static void
1790
pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum)
1791
{
1792
1793
if (!doc->hints_loaded || !doc->linear_page_refs)
1794
return;
1795
1796
if (doc->linear_page_refs[pagenum])
1797
return;
1798
1799
fz_try(ctx)
1800
{
1801
int num = doc->hint_page[pagenum].number;
1802
pdf_obj *page = pdf_load_object(ctx, doc, num, 0);
1803
if (pdf_name_eq(ctx, PDF_NAME_Page, pdf_dict_get(ctx, page, PDF_NAME_Type)))
1804
{
1805
/* We have found the page object! */
1806
DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
1807
doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0);
1808
}
1809
pdf_drop_obj(ctx, page);
1810
}
1811
fz_catch(ctx)
1812
{
1813
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1814
/* Silently swallow the error and proceed as normal */
1815
}
1816
1817
}
1818
1819
static int
1820
read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
1821
{
1822
/* Try to find the object using our hint table. Find the closest
1823
* object <= the one we want that has a hint and read forward from
1824
* there. */
1825
int expected = num;
1826
int curr_pos;
1827
int start, offset;
1828
1829
while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
1830
expected--;
1831
if (expected != num)
1832
DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
1833
if (expected == 0) /* No hints found, just bale */
1834
return 0;
1835
1836
curr_pos = fz_tell(ctx, doc->file);
1837
offset = doc->hint_obj_offsets[expected];
1838
1839
fz_var(expected);
1840
1841
fz_try(ctx)
1842
{
1843
int found;
1844
1845
/* Try to read forward from there */
1846
do
1847
{
1848
start = offset;
1849
DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
1850
pdf_obj_read(ctx, doc, &offset, &found, 0);
1851
DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
1852
if (found <= expected)
1853
{
1854
/* We found the right one (or one earlier than
1855
* we expected). Update the hints. */
1856
doc->hint_obj_offsets[expected] = offset;
1857
doc->hint_obj_offsets[found] = start;
1858
doc->hint_obj_offsets[found+1] = offset;
1859
/* Retry with the next one */
1860
expected = found+1;
1861
}
1862
else
1863
{
1864
/* We found one later than we expected. */
1865
doc->hint_obj_offsets[expected] = 0;
1866
doc->hint_obj_offsets[found] = start;
1867
doc->hint_obj_offsets[found+1] = offset;
1868
while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
1869
expected--;
1870
if (expected == 0) /* No hints found, just bale */
1871
return 0;
1872
}
1873
}
1874
while (found != num);
1875
}
1876
fz_always(ctx)
1877
{
1878
fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
1879
}
1880
fz_catch(ctx)
1881
{
1882
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1883
/* FIXME: Currently we ignore the hint. Perhaps we should
1884
* drop back to non-hinted operation here. */
1885
doc->hint_obj_offsets[expected] = 0;
1886
fz_rethrow(ctx);
1887
}
1888
return 1;
1889
}
1890
1891
pdf_xref_entry *
1892
pdf_cache_object(fz_context *ctx, pdf_document *doc, int num, int gen)
1893
{
1894
pdf_xref_entry *x;
1895
int rnum, rgen, try_repair;
1896
1897
fz_var(try_repair);
1898
1899
if (num <= 0 || num >= pdf_xref_len(ctx, doc))
1900
fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(ctx, doc));
1901
1902
object_updated:
1903
try_repair = 0;
1904
rnum = num;
1905
1906
x = pdf_get_xref_entry(ctx, doc, num);
1907
1908
if (x->obj != NULL)
1909
return x;
1910
1911
if (x->type == 'f')
1912
{
1913
x->obj = pdf_new_null(ctx, doc);
1914
}
1915
else if (x->type == 'n')
1916
{
1917
fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
1918
1919
fz_try(ctx)
1920
{
1921
x->obj = pdf_parse_ind_obj(ctx, doc, doc->file, &doc->lexbuf.base,
1922
&rnum, &rgen, &x->stm_ofs, &try_repair);
1923
}
1924
fz_catch(ctx)
1925
{
1926
if (!try_repair || fz_caught(ctx) == FZ_ERROR_TRYLATER)
1927
fz_rethrow(ctx);
1928
}
1929
1930
if (!try_repair && rnum != num)
1931
{
1932
pdf_drop_obj(ctx, x->obj);
1933
x->obj = NULL;
1934
try_repair = (doc->repair_attempted == 0);
1935
}
1936
1937
if (try_repair)
1938
{
1939
fz_try(ctx)
1940
{
1941
pdf_repair_xref(ctx, doc);
1942
pdf_prime_xref_index(ctx, doc);
1943
}
1944
fz_catch(ctx)
1945
{
1946
if (rnum == num)
1947
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse object (%d %d R)", num, gen);
1948
else
1949
fz_throw(ctx, FZ_ERROR_GENERIC, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);
1950
}
1951
goto object_updated;
1952
}
1953
1954
if (doc->crypt)
1955
pdf_crypt_obj(ctx, doc->crypt, x->obj, num, gen);
1956
}
1957
else if (x->type == 'o')
1958
{
1959
if (!x->obj)
1960
{
1961
fz_try(ctx)
1962
{
1963
x = pdf_load_obj_stm(ctx, doc, x->ofs, 0, &doc->lexbuf.base, num);
1964
}
1965
fz_catch(ctx)
1966
{
1967
fz_rethrow_message(ctx, "cannot load object stream containing object (%d %d R)", num, gen);
1968
}
1969
if (x == NULL)
1970
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load object stream containing object (%d %d R)", num, gen);
1971
if (!x->obj)
1972
fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d %d R) was not found in its object stream", num, gen);
1973
}
1974
}
1975
else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
1976
{
1977
goto object_updated;
1978
}
1979
else if (doc->file_length && doc->linear_pos < doc->file_length)
1980
{
1981
fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d %d R) - not loaded yet?", num, gen);
1982
}
1983
else
1984
{
1985
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d %d R)", num, gen);
1986
}
1987
1988
pdf_set_obj_parent(ctx, x->obj, num);
1989
return x;
1990
}
1991
1992
pdf_obj *
1993
pdf_load_object(fz_context *ctx, pdf_document *doc, int num, int gen)
1994
{
1995
pdf_xref_entry *entry;
1996
1997
fz_try(ctx)
1998
{
1999
entry = pdf_cache_object(ctx, doc, num, gen);
2000
}
2001
fz_catch(ctx)
2002
{
2003
fz_rethrow_message(ctx, "cannot load object (%d %d R) into cache", num, gen);
2004
}
2005
2006
assert(entry->obj != NULL);
2007
2008
return pdf_keep_obj(ctx, entry->obj);
2009
}
2010
2011
pdf_obj *
2012
pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
2013
{
2014
int sanity = 10;
2015
int num;
2016
int gen;
2017
pdf_xref_entry *entry;
2018
2019
while (pdf_is_indirect(ctx, ref))
2020
{
2021
pdf_document *doc;
2022
2023
if (--sanity == 0)
2024
{
2025
fz_warn(ctx, "too many indirections (possible indirection cycle involving %d %d R)", num, gen);
2026
return NULL;
2027
}
2028
2029
doc = pdf_get_indirect_document(ctx, ref);
2030
if (!doc)
2031
return NULL;
2032
num = pdf_to_num(ctx, ref);
2033
gen = pdf_to_gen(ctx, ref);
2034
2035
if (num <= 0 || gen < 0)
2036
{
2037
fz_warn(ctx, "invalid indirect reference (%d %d R)", num, gen);
2038
return NULL;
2039
}
2040
2041
fz_try(ctx)
2042
{
2043
entry = pdf_cache_object(ctx, doc, num, gen);
2044
}
2045
fz_catch(ctx)
2046
{
2047
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2048
fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen);
2049
return NULL;
2050
}
2051
2052
if (entry->obj == NULL)
2053
return NULL;
2054
ref = entry->obj;
2055
}
2056
2057
return ref;
2058
}
2059
2060
int
2061
pdf_count_objects(fz_context *ctx, pdf_document *doc)
2062
{
2063
return pdf_xref_len(ctx, doc);
2064
}
2065
2066
int
2067
pdf_create_object(fz_context *ctx, pdf_document *doc)
2068
{
2069
/* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2070
pdf_xref_entry *entry;
2071
int num = pdf_xref_len(ctx, doc);
2072
entry = pdf_get_incremental_xref_entry(ctx, doc, num);
2073
entry->type = 'f';
2074
entry->ofs = -1;
2075
entry->gen = 0;
2076
entry->stm_ofs = 0;
2077
entry->stm_buf = NULL;
2078
entry->obj = NULL;
2079
return num;
2080
}
2081
2082
void
2083
pdf_delete_object(fz_context *ctx, pdf_document *doc, int num)
2084
{
2085
pdf_xref_entry *x;
2086
2087
if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2088
{
2089
fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2090
return;
2091
}
2092
2093
x = pdf_get_incremental_xref_entry(ctx, doc, num);
2094
2095
fz_drop_buffer(ctx, x->stm_buf);
2096
pdf_drop_obj(ctx, x->obj);
2097
2098
x->type = 'f';
2099
x->ofs = 0;
2100
x->gen = 0;
2101
x->stm_ofs = 0;
2102
x->stm_buf = NULL;
2103
x->obj = NULL;
2104
}
2105
2106
void
2107
pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2108
{
2109
pdf_xref_entry *x;
2110
2111
if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2112
{
2113
fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2114
return;
2115
}
2116
2117
x = pdf_get_incremental_xref_entry(ctx, doc, num);
2118
2119
pdf_drop_obj(ctx, x->obj);
2120
2121
x->type = 'n';
2122
x->ofs = 0;
2123
x->obj = pdf_keep_obj(ctx, newobj);
2124
2125
pdf_set_obj_parent(ctx, newobj, num);
2126
}
2127
2128
void
2129
pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed)
2130
{
2131
int num;
2132
pdf_xref_entry *x;
2133
2134
if (pdf_is_indirect(ctx, obj))
2135
num = pdf_to_num(ctx, obj);
2136
else
2137
num = pdf_obj_parent_num(ctx, obj);
2138
if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2139
{
2140
fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2141
return;
2142
}
2143
2144
x = pdf_get_xref_entry(ctx, doc, num);
2145
2146
fz_drop_buffer(ctx, x->stm_buf);
2147
x->stm_buf = fz_keep_buffer(ctx, newbuf);
2148
2149
pdf_dict_puts_drop(ctx, obj, "Length", pdf_new_int(ctx, doc, newbuf->len));
2150
if (!compressed)
2151
{
2152
pdf_dict_dels(ctx, obj, "Filter");
2153
pdf_dict_dels(ctx, obj, "DecodeParms");
2154
}
2155
}
2156
2157
int
2158
pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, int size)
2159
{
2160
if (!strcmp(key, "format"))
2161
return fz_snprintf(buf, size, "PDF %d.%d", doc->version/10, doc->version % 10);
2162
2163
if (!strcmp(key, "encryption"))
2164
{
2165
if (doc->crypt)
2166
return fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
2167
pdf_crypt_version(ctx, doc),
2168
pdf_crypt_revision(ctx, doc),
2169
pdf_crypt_length(ctx, doc),
2170
pdf_crypt_method(ctx, doc));
2171
else
2172
return fz_strlcpy(buf, "None", size);
2173
}
2174
2175
if (strstr(key, "info:") == key)
2176
{
2177
pdf_obj *info;
2178
char *s;
2179
int n;
2180
2181
info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info);
2182
if (!info)
2183
return -1;
2184
2185
info = pdf_dict_gets(ctx, info, key + 5);
2186
if (!info)
2187
return -1;
2188
2189
s = pdf_to_utf8(ctx, doc, info);
2190
n = fz_strlcpy(buf, s, size);
2191
fz_free(ctx, s);
2192
return n;
2193
}
2194
2195
return -1;
2196
}
2197
2198
fz_transition *
2199
pdf_page_presentation(fz_context *ctx, pdf_page *page, float *duration)
2200
{
2201
*duration = page->duration;
2202
if (!page->transition_present)
2203
return NULL;
2204
return &page->transition;
2205
}
2206
2207
/*
2208
Initializers for the fz_document interface.
2209
2210
The functions are split across two files to allow calls to a
2211
version of the constructor that does not link in the interpreter.
2212
The interpreter references the built-in font and cmap resources
2213
which are quite big. Not linking those into the mubusy binary
2214
saves roughly 6MB of space.
2215
*/
2216
2217
static pdf_document *
2218
pdf_new_document(fz_context *ctx, fz_stream *file)
2219
{
2220
pdf_document *doc = fz_malloc_struct(ctx, pdf_document);
2221
2222
doc->super.refs = 1;
2223
doc->super.close = (fz_document_close_fn *)pdf_close_document;
2224
doc->super.needs_password = (fz_document_needs_password_fn *)pdf_needs_password;
2225
doc->super.authenticate_password = (fz_document_authenticate_password_fn *)pdf_authenticate_password;
2226
doc->super.has_permission = (fz_document_has_permission_fn *)pdf_has_permission;
2227
doc->super.load_outline = (fz_document_load_outline_fn *)pdf_load_outline;
2228
doc->super.count_pages = (fz_document_count_pages_fn *)pdf_count_pages;
2229
doc->super.load_page = (fz_document_load_page_fn *)pdf_load_page;
2230
doc->super.lookup_metadata = (fz_document_lookup_metadata_fn *)pdf_lookup_metadata;
2231
doc->super.write = (fz_document_write_fn *)pdf_write_document;
2232
doc->update_appearance = pdf_update_appearance;
2233
2234
pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
2235
doc->file = fz_keep_stream(ctx, file);
2236
2237
return doc;
2238
}
2239
2240
pdf_document *
2241
pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
2242
{
2243
pdf_document *doc = pdf_new_document(ctx, file);
2244
fz_try(ctx)
2245
{
2246
pdf_init_document(ctx, doc);
2247
}
2248
fz_catch(ctx)
2249
{
2250
pdf_close_document(ctx, doc);
2251
fz_rethrow_message(ctx, "cannot load document from stream");
2252
}
2253
return doc;
2254
}
2255
2256
pdf_document *
2257
pdf_open_document(fz_context *ctx, const char *filename)
2258
{
2259
fz_stream *file = NULL;
2260
pdf_document *doc = NULL;
2261
2262
fz_var(file);
2263
fz_var(doc);
2264
2265
fz_try(ctx)
2266
{
2267
file = fz_open_file(ctx, filename);
2268
doc = pdf_new_document(ctx, file);
2269
pdf_init_document(ctx, doc);
2270
}
2271
fz_always(ctx)
2272
{
2273
fz_drop_stream(ctx, file);
2274
}
2275
fz_catch(ctx)
2276
{
2277
pdf_close_document(ctx, doc);
2278
fz_rethrow_message(ctx, "cannot load document '%s'", filename);
2279
}
2280
return doc;
2281
}
2282
2283
static void
2284
pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum, int gennum)
2285
{
2286
fz_stream *stream = NULL;
2287
pdf_obj *dict;
2288
2289
fz_var(stream);
2290
fz_var(dict);
2291
2292
fz_try(ctx)
2293
{
2294
int i, j, least_num_page_objs, page_obj_num_bits;
2295
int least_page_len, page_len_num_bits, shared_hint_offset;
2296
/* int least_page_offset, page_offset_num_bits; */
2297
/* int least_content_stream_len, content_stream_len_num_bits; */
2298
int num_shared_obj_num_bits, shared_obj_num_bits;
2299
/* int numerator_bits, denominator_bits; */
2300
int shared;
2301
int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
2302
int shared_obj_count_total;
2303
int least_shared_group_len, shared_group_len_num_bits;
2304
int max_object_num = pdf_xref_len(ctx, doc);
2305
2306
stream = pdf_open_stream(ctx, doc, objnum, gennum);
2307
dict = pdf_get_xref_entry(ctx, doc, objnum)->obj;
2308
if (dict == NULL || !pdf_is_dict(ctx, dict))
2309
fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint object");
2310
2311
shared_hint_offset = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_S));
2312
2313
/* Malloc the structures (use realloc to cope with the fact we
2314
* may try this several times before enough data is loaded) */
2315
doc->hint_page = fz_resize_array(ctx, doc->hint_page, doc->page_count+1, sizeof(*doc->hint_page));
2316
memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->page_count+1));
2317
doc->hint_obj_offsets = fz_resize_array(ctx, doc->hint_obj_offsets, max_object_num, sizeof(*doc->hint_obj_offsets));
2318
memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
2319
doc->hint_obj_offsets_max = max_object_num;
2320
2321
/* Read the page object hints table: Header first */
2322
least_num_page_objs = fz_read_bits(ctx, stream, 32);
2323
/* The following is sometimes a lie, but we read this version,
2324
* as other table values are built from it. In
2325
* pdf_reference17.pdf, this points to 2 objects before the
2326
* first pages page object. */
2327
doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32);
2328
if (doc->hint_page[0].offset > doc->hint_object_offset)
2329
doc->hint_page[0].offset += doc->hint_object_length;
2330
page_obj_num_bits = fz_read_bits(ctx, stream, 16);
2331
least_page_len = fz_read_bits(ctx, stream, 32);
2332
page_len_num_bits = fz_read_bits(ctx, stream, 16);
2333
/* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32);
2334
/* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
2335
/* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32);
2336
/* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
2337
num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
2338
shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
2339
/* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16);
2340
/* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16);
2341
2342
/* Item 1: Page object numbers */
2343
doc->hint_page[0].number = doc->linear_page1_obj_num;
2344
/* We don't care about the number of objects in the first page */
2345
(void)fz_read_bits(ctx, stream, page_obj_num_bits);
2346
j = 1;
2347
for (i = 1; i < doc->page_count; i++)
2348
{
2349
int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
2350
2351
doc->hint_page[i].number = j;
2352
j += least_num_page_objs + delta_page_objs;
2353
}
2354
doc->hint_page[i].number = j; /* Not a real page object */
2355
fz_sync_bits(ctx, stream);
2356
/* Item 2: Page lengths */
2357
j = doc->hint_page[0].offset;
2358
for (i = 0; i < doc->page_count; i++)
2359
{
2360
int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
2361
int old = j;
2362
2363
doc->hint_page[i].offset = j;
2364
j += least_page_len + delta_page_len;
2365
if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2366
j += doc->hint_object_length;
2367
}
2368
doc->hint_page[i].offset = j;
2369
fz_sync_bits(ctx, stream);
2370
/* Item 3: Shared references */
2371
shared = 0;
2372
for (i = 0; i < doc->page_count; i++)
2373
{
2374
int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
2375
doc->hint_page[i].index = shared;
2376
shared += num_shared_objs;
2377
}
2378
doc->hint_page[i].index = shared;
2379
doc->hint_shared_ref = fz_resize_array(ctx, doc->hint_shared_ref, shared, sizeof(*doc->hint_shared_ref));
2380
memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
2381
fz_sync_bits(ctx, stream);
2382
/* Item 4: Shared references */
2383
for (i = 0; i < shared; i++)
2384
{
2385
int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
2386
doc->hint_shared_ref[i] = ref;
2387
}
2388
/* Skip items 5,6,7 as we don't use them */
2389
2390
fz_seek(ctx, stream, shared_hint_offset, SEEK_SET);
2391
2392
/* Read the shared object hints table: Header first */
2393
shared_obj_num = fz_read_bits(ctx, stream, 32);
2394
shared_obj_offset = fz_read_bits(ctx, stream, 32);
2395
if (shared_obj_offset > doc->hint_object_offset)
2396
shared_obj_offset += doc->hint_object_length;
2397
shared_obj_count_page1 = fz_read_bits(ctx, stream, 32);
2398
shared_obj_count_total = fz_read_bits(ctx, stream, 32);
2399
shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
2400
least_shared_group_len = fz_read_bits(ctx, stream, 32);
2401
shared_group_len_num_bits = fz_read_bits(ctx, stream, 16);
2402
2403
/* Sanity check the references in Item 4 above to ensure we
2404
* don't access out of range with malicious files. */
2405
for (i = 0; i < shared; i++)
2406
{
2407
if (doc->hint_shared_ref[i] >= shared_obj_count_total)
2408
{
2409
fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint stream (shared refs)");
2410
}
2411
}
2412
2413
doc->hint_shared = fz_resize_array(ctx, doc->hint_shared, shared_obj_count_total+1, sizeof(*doc->hint_shared));
2414
memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
2415
2416
/* Item 1: Shared references */
2417
j = doc->hint_page[0].offset;
2418
for (i = 0; i < shared_obj_count_page1; i++)
2419
{
2420
int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
2421
int old = j;
2422
doc->hint_shared[i].offset = j;
2423
j += off + least_shared_group_len;
2424
if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2425
j += doc->hint_object_length;
2426
}
2427
/* FIXME: We would have problems recreating the length of the
2428
* last page 1 shared reference group. But we'll never need
2429
* to, so ignore it. */
2430
j = shared_obj_offset;
2431
for (; i < shared_obj_count_total; i++)
2432
{
2433
int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
2434
int old = j;
2435
doc->hint_shared[i].offset = j;
2436
j += off + least_shared_group_len;
2437
if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2438
j += doc->hint_object_length;
2439
}
2440
doc->hint_shared[i].offset = j;
2441
fz_sync_bits(ctx, stream);
2442
/* Item 2: Signature flags: read these just so we can skip */
2443
for (i = 0; i < shared_obj_count_total; i++)
2444
{
2445
doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1);
2446
}
2447
fz_sync_bits(ctx, stream);
2448
/* Item 3: Signatures: just skip */
2449
for (i = 0; i < shared_obj_count_total; i++)
2450
{
2451
if (doc->hint_shared[i].number)
2452
{
2453
(void) fz_read_bits(ctx, stream, 128);
2454
}
2455
}
2456
fz_sync_bits(ctx, stream);
2457
/* Item 4: Shared object object numbers */
2458
j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
2459
for (i = 0; i < shared_obj_count_page1; i++)
2460
{
2461
doc->hint_shared[i].number = j;
2462
j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
2463
}
2464
j = shared_obj_num;
2465
for (; i < shared_obj_count_total; i++)
2466
{
2467
doc->hint_shared[i].number = j;
2468
j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
2469
}
2470
doc->hint_shared[i].number = j;
2471
2472
/* Now, actually use the data we have gathered. */
2473
for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
2474
{
2475
doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
2476
}
2477
for (i = 0; i < doc->page_count; i++)
2478
{
2479
doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
2480
}
2481
}
2482
fz_always(ctx)
2483
{
2484
fz_drop_stream(ctx, stream);
2485
}
2486
fz_catch(ctx)
2487
{
2488
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2489
/* Don't try to load hints again */
2490
doc->hints_loaded = 1;
2491
/* We won't use the linearized object any more. */
2492
doc->file_reading_linearly = 0;
2493
/* Any other error becomes a TRYLATER */
2494
fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
2495
}
2496
doc->hints_loaded = 1;
2497
}
2498
2499
static void
2500
pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
2501
{
2502
pdf_lexbuf *buf = &doc->lexbuf.base;
2503
int curr_pos;
2504
2505
curr_pos = fz_tell(ctx, doc->file);
2506
fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
2507
fz_try(ctx)
2508
{
2509
while (1)
2510
{
2511
pdf_obj *page = NULL;
2512
int tmpofs, num, gen, tok;
2513
2514
tok = pdf_lex(ctx, doc->file, buf);
2515
if (tok != PDF_TOK_INT)
2516
break;
2517
num = buf->i;
2518
tok = pdf_lex(ctx, doc->file, buf);
2519
if (tok != PDF_TOK_INT)
2520
break;
2521
gen = buf->i;
2522
tok = pdf_lex(ctx, doc->file, buf);
2523
if (tok != PDF_TOK_OBJ)
2524
break;
2525
(void)pdf_repair_obj(ctx, doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs);
2526
pdf_load_hints(ctx, doc, num, gen);
2527
}
2528
}
2529
fz_always(ctx)
2530
{
2531
fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2532
}
2533
fz_catch(ctx)
2534
{
2535
fz_rethrow(ctx);
2536
}
2537
}
2538
2539
pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum)
2540
{
2541
pdf_lexbuf *buf = &doc->lexbuf.base;
2542
int curr_pos;
2543
pdf_obj *page;
2544
2545
pdf_load_hinted_page(ctx, doc, pagenum);
2546
2547
if (pagenum < 0 || pagenum >= doc->page_count)
2548
fz_throw(ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->page_count);
2549
2550
if (doc->linear_pos == doc->file_length)
2551
return doc->linear_page_refs[pagenum];
2552
2553
/* Only load hints once, and then only after we have got page 0 */
2554
if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
2555
{
2556
/* Found hint object */
2557
pdf_load_hint_object(ctx, doc);
2558
}
2559
2560
DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
2561
curr_pos = fz_tell(ctx, doc->file);
2562
2563
fz_var(page);
2564
2565
fz_try(ctx)
2566
{
2567
int eof;
2568
do
2569
{
2570
int num;
2571
page = NULL;
2572
eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
2573
pdf_drop_obj(ctx, page);
2574
page = NULL;
2575
}
2576
while (!eof);
2577
2578
{
2579
pdf_obj *catalog;
2580
pdf_obj *pages;
2581
doc->linear_pos = doc->file_length;
2582
pdf_load_xref(ctx, doc, buf);
2583
catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
2584
pages = pdf_dict_get(ctx, catalog, PDF_NAME_Pages);
2585
2586
if (!pdf_is_dict(ctx, pages))
2587
fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree");
2588
break;
2589
}
2590
}
2591
fz_always(ctx)
2592
{
2593
fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2594
}
2595
fz_catch(ctx)
2596
{
2597
pdf_drop_obj(ctx, page);
2598
if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
2599
{
2600
if (doc->linear_page_refs[pagenum] == NULL)
2601
{
2602
/* Still not got a page */
2603
fz_rethrow(ctx);
2604
}
2605
}
2606
else
2607
fz_rethrow(ctx);
2608
}
2609
2610
return doc->linear_page_refs[pagenum];
2611
}
2612
2613
pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc)
2614
{
2615
return (pdf_document *)((doc && doc->close == (fz_document_close_fn *)pdf_close_document) ? doc : NULL);
2616
}
2617
2618
pdf_document *pdf_create_document(fz_context *ctx)
2619
{
2620
pdf_document *doc;
2621
pdf_obj *o = NULL;
2622
pdf_obj *root;
2623
pdf_obj *pages;
2624
pdf_obj *trailer = NULL;
2625
2626
fz_var(o);
2627
fz_var(trailer);
2628
2629
doc = pdf_new_document(ctx, NULL);
2630
fz_try(ctx)
2631
{
2632
doc->version = 14;
2633
doc->file_size = 0;
2634
doc->startxref = 0;
2635
doc->num_xref_sections = 0;
2636
pdf_get_populating_xref_entry(ctx, doc, 0);
2637
doc->xref_altered = 1;
2638
trailer = pdf_new_dict(ctx, doc, 2);
2639
pdf_dict_put_drop(ctx, trailer, PDF_NAME_Size, pdf_new_int(ctx, doc, 3));
2640
o = root = pdf_new_dict(ctx, doc, 2);
2641
pdf_dict_put_drop(ctx, trailer, PDF_NAME_Root, pdf_new_ref(ctx, doc, o));
2642
pdf_drop_obj(ctx, o);
2643
o = NULL;
2644
pdf_dict_put_drop(ctx, root, PDF_NAME_Type, PDF_NAME_Catalog);
2645
o = pages = pdf_new_dict(ctx, doc, 3);
2646
pdf_dict_put_drop(ctx, root, PDF_NAME_Pages, pdf_new_ref(ctx, doc, o));
2647
pdf_drop_obj(ctx, o);
2648
o = NULL;
2649
pdf_dict_put_drop(ctx, pages, PDF_NAME_Type, PDF_NAME_Pages);
2650
pdf_dict_put_drop(ctx, pages, PDF_NAME_Count, pdf_new_int(ctx, doc, 0));
2651
pdf_dict_put_drop(ctx, pages, PDF_NAME_Kids, pdf_new_array(ctx, doc, 1));
2652
pdf_set_populating_xref_trailer(ctx, doc, trailer);
2653
pdf_drop_obj(ctx, trailer);
2654
}
2655
fz_catch(ctx)
2656
{
2657
pdf_drop_obj(ctx, trailer);
2658
pdf_drop_obj(ctx, o);
2659
fz_rethrow_message(ctx, "Failed to create empty document");
2660
}
2661
return doc;
2662
}
2663
2664
int
2665
pdf_recognize(fz_context *doc, const char *magic)
2666
{
2667
char *ext = strrchr(magic, '.');
2668
2669
if (ext)
2670
{
2671
if (!fz_strcasecmp(ext, ".pdf"))
2672
return 100;
2673
}
2674
if (!strcmp(magic, "pdf") || !strcmp(magic, "application/pdf"))
2675
return 100;
2676
2677
return 1;
2678
}
2679
2680
fz_document_handler pdf_document_handler =
2681
{
2682
(fz_document_recognize_fn *)&pdf_recognize,
2683
(fz_document_open_fn *)&pdf_open_document,
2684
(fz_document_open_with_stream_fn *)&pdf_open_document_with_stream
2685
};
2686
2687
void pdf_mark_xref(fz_context *ctx, pdf_document *doc)
2688
{
2689
int x, e;
2690
2691
for (x = 0; x < doc->num_xref_sections; x++)
2692
{
2693
pdf_xref *xref = &doc->xref_sections[x];
2694
pdf_xref_subsec *sub;
2695
2696
for (sub = xref->subsec; sub != NULL; sub = sub->next)
2697
{
2698
for (e = 0; e < sub->len; e++)
2699
{
2700
pdf_xref_entry *entry = &sub->table[e];
2701
if (entry->obj)
2702
{
2703
entry->flags |= PDF_OBJ_FLAG_MARK;
2704
}
2705
}
2706
}
2707
}
2708
}
2709
2710
void pdf_clear_xref(fz_context *ctx, pdf_document *doc)
2711
{
2712
int x, e;
2713
2714
for (x = 0; x < doc->num_xref_sections; x++)
2715
{
2716
pdf_xref *xref = &doc->xref_sections[x];
2717
pdf_xref_subsec *sub;
2718
2719
for (sub = xref->subsec; sub != NULL; sub = sub->next)
2720
{
2721
for (e = 0; e < sub->len; e++)
2722
{
2723
pdf_xref_entry *entry = &sub->table[e];
2724
/* We cannot drop objects if the stream
2725
* buffer has been updated */
2726
if (entry->obj != NULL && entry->stm_buf == NULL)
2727
{
2728
if (pdf_obj_refs(ctx, entry->obj) == 1)
2729
{
2730
pdf_drop_obj(ctx, entry->obj);
2731
entry->obj = NULL;
2732
}
2733
}
2734
}
2735
}
2736
}
2737
}
2738
2739
void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc)
2740
{
2741
int x, e;
2742
2743
for (x = 0; x < doc->num_xref_sections; x++)
2744
{
2745
pdf_xref *xref = &doc->xref_sections[x];
2746
pdf_xref_subsec *sub;
2747
2748
for (sub = xref->subsec; sub != NULL; sub = sub->next)
2749
{
2750
for (e = 0; e < sub->len; e++)
2751
{
2752
pdf_xref_entry *entry = &sub->table[e];
2753
2754
/* We cannot drop objects if the stream buffer has
2755
* been updated */
2756
if (entry->obj != NULL && entry->stm_buf == NULL)
2757
{
2758
if ((entry->flags & PDF_OBJ_FLAG_MARK) == 0 && pdf_obj_refs(ctx, entry->obj) == 1)
2759
{
2760
pdf_drop_obj(ctx, entry->obj);
2761
entry->obj = NULL;
2762
}
2763
}
2764
}
2765
}
2766
}
2767
}
2768
2769