CoCalc -- pdf-xref.c

bin / mupdf / mupdf-1.7 / source / pdf / pdf-xref.c
⁷⁶⁴³ views
1
#include "mupdf/pdf.h"
2
#include "mupdf/fitz/document.h"
3

4
#undef DEBUG_PROGESSIVE_ADVANCE
5

6
#ifdef DEBUG_PROGESSIVE_ADVANCE
7
#define DEBUGMESS(A) do { fz_warn A; } while (0)
8
#else
9
#define DEBUGMESS(A) do { } while (0)
10
#endif
11

12
static inline int iswhite(int ch)
13
{
14
	return
15
		ch == '\000' || ch == '\011' || ch == '\012' ||
16
		ch == '\014' || ch == '\015' || ch == '\040';
17
}
18

19
/*
20
 * xref tables
21
 */
22

23
static void pdf_drop_xref_sections(fz_context *ctx, pdf_document *doc)
24
{
25
	int x, e;
26

27
	for (x = 0; x < doc->num_xref_sections; x++)
28
	{
29
		pdf_xref *xref = &doc->xref_sections[x];
30
		pdf_xref_subsec *sub = xref->subsec;
31

32
		while (sub != NULL)
33
		{
34
			pdf_xref_subsec *next_sub = sub->next;
35
			for (e = 0; e < sub->len; e++)
36
			{
37
				pdf_xref_entry *entry = &sub->table[e];
38

39
				if (entry->obj)
40
				{
41
					pdf_drop_obj(ctx, entry->obj);
42
					fz_drop_buffer(ctx, entry->stm_buf);
43
				}
44
			}
45
			fz_free(ctx, sub->table);
46
			fz_free(ctx, sub);
47
			sub = next_sub;
48
		}
49

50
		pdf_drop_obj(ctx, xref->pre_repair_trailer);
51
		pdf_drop_obj(ctx, xref->trailer);
52
	}
53

54
	fz_free(ctx, doc->xref_sections);
55
	doc->xref_sections = NULL;
56
	doc->num_xref_sections = 0;
57
}
58

59
static void
60
extend_xref_index(fz_context *ctx, pdf_document *doc, int newlen)
61
{
62
	int i;
63

64
	doc->xref_index = fz_resize_array(ctx, doc->xref_index, newlen, sizeof(int));
65
	for (i = doc->max_xref_len; i < newlen; i++)
66
	{
67
		doc->xref_index[i] = 0;
68
	}
69
	doc->max_xref_len = newlen;
70
}
71

72
/* This is only ever called when we already have an incremental
73
 * xref. This means there will only be 1 subsec, and it will be
74
 * a complete subsec. */
75
static void pdf_resize_xref(fz_context *ctx, pdf_document *doc, int newlen)
76
{
77
	int i;
78
	pdf_xref *xref = &doc->xref_sections[0];
79
	pdf_xref_subsec *sub;
80

81
	assert(xref != NULL);
82
	sub = xref->subsec;
83
	assert(sub->next == NULL && sub->start == 0 && sub->len == xref->num_objects);
84
	assert(newlen > xref->num_objects);
85

86
	sub->table = fz_resize_array(ctx, sub->table, newlen, sizeof(pdf_xref_entry));
87
	for (i = xref->num_objects; i < newlen; i++)
88
	{
89
		sub->table[i].type = 0;
90
		sub->table[i].ofs = 0;
91
		sub->table[i].gen = 0;
92
		sub->table[i].stm_ofs = 0;
93
		sub->table[i].stm_buf = NULL;
94
		sub->table[i].obj = NULL;
95
	}
96
	xref->num_objects = newlen;
97
	sub->len = newlen;
98
	if (doc->max_xref_len < newlen)
99
		extend_xref_index(ctx, doc, newlen);
100
}
101

102
static void pdf_populate_next_xref_level(fz_context *ctx, pdf_document *doc)
103
{
104
	pdf_xref *xref;
105
	doc->xref_sections = fz_resize_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, sizeof(pdf_xref));
106
	doc->num_xref_sections++;
107

108
	xref = &doc->xref_sections[doc->num_xref_sections - 1];
109
	xref->subsec = NULL;
110
	xref->num_objects = 0;
111
	xref->trailer = NULL;
112
	xref->pre_repair_trailer = NULL;
113
}
114

115
pdf_obj *pdf_trailer(fz_context *ctx, pdf_document *doc)
116
{
117
	/* Return the document's final trailer */
118
	pdf_xref *xref = &doc->xref_sections[0];
119

120
	return xref->trailer;
121
}
122

123
void pdf_set_populating_xref_trailer(fz_context *ctx, pdf_document *doc, pdf_obj *trailer)
124
{
125
	/* Update the trailer of the xref section being populated */
126
	pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - 1];
127
	if (xref->trailer)
128
	{
129
		pdf_drop_obj(ctx, xref->pre_repair_trailer);
130
		xref->pre_repair_trailer = xref->trailer;
131
	}
132
	xref->trailer = pdf_keep_obj(ctx, trailer);
133
}
134

135
int pdf_xref_len(fz_context *ctx, pdf_document *doc)
136
{
137
	return doc->max_xref_len;
138
}
139

140
/* Ensure that the given xref has a single subsection
141
 * that covers the entire range. */
142
static void
143
ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num, int which)
144
{
145
	pdf_xref *xref = &doc->xref_sections[which];
146
	pdf_xref_subsec *sub = xref->subsec;
147
	pdf_xref_subsec *new_sub;
148

149
	if (num < xref->num_objects)
150
		num = xref->num_objects;
151

152
	if (sub != NULL && sub->next == NULL && sub->start == 0 && sub->len >= num)
153
		return;
154

155
	new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
156
	fz_try(ctx)
157
	{
158
		new_sub->table = fz_calloc(ctx, num, sizeof(pdf_xref_entry));
159
		new_sub->start = 0;
160
		new_sub->len = num;
161
		new_sub->next = NULL;
162
	}
163
	fz_catch(ctx)
164
	{
165
		fz_free(ctx, new_sub);
166
		fz_rethrow(ctx);
167
	}
168

169
	/* Move objects over to the new subsection and destroy the old
170
	 * ones */
171
	sub = xref->subsec;
172
	while (sub != NULL)
173
	{
174
		pdf_xref_subsec *next = sub->next;
175
		int i;
176

177
		for (i = 0; i < sub->len; i++)
178
		{
179
			new_sub->table[i+sub->start] = sub->table[i];
180
		}
181
		fz_free(ctx, sub->table);
182
		fz_free(ctx, sub);
183
		sub = next;
184
	}
185
	xref->num_objects = num;
186
	xref->subsec = new_sub;
187
	if (doc->max_xref_len < num)
188
		extend_xref_index(ctx, doc, num);
189
}
190

191
/* Used while reading the individual xref sections from a file */
192
pdf_xref_entry *pdf_get_populating_xref_entry(fz_context *ctx, pdf_document *doc, int num)
193
{
194
	/* Return an entry within the xref currently being populated */
195
	pdf_xref *xref;
196
	pdf_xref_subsec *sub;
197

198
	if (doc->num_xref_sections == 0)
199
	{
200
		doc->xref_sections = fz_calloc(ctx, 1, sizeof(pdf_xref));
201
		doc->num_xref_sections = 1;
202
	}
203

204
	/* Prevent accidental heap underflow */
205
	if (num < 0)
206
		fz_throw(ctx, FZ_ERROR_GENERIC, "object number must not be negative (%d)", num);
207

208
	/* Return the pointer to the entry in the last section. */
209
	xref = &doc->xref_sections[doc->num_xref_sections-1];
210

211
	for (sub = xref->subsec; sub != NULL; sub = sub->next)
212
	{
213
		if (num >= sub->start && num < sub->start + sub->len)
214
			return &sub->table[num-sub->start];
215
	}
216

217
	/* We've been asked for an object that's not in a subsec. */
218
	ensure_solid_xref(ctx, doc, num+1, doc->num_xref_sections-1);
219
	xref = &doc->xref_sections[doc->num_xref_sections-1];
220
	sub = xref->subsec;
221

222
	return &sub->table[num-sub->start];
223
}
224

225
/* Used after loading a document to access entries */
226
/* This will never throw anything, or return NULL if it is
227
 * only asked to return objects in range within a 'solid'
228
 * xref. */
229
pdf_xref_entry *pdf_get_xref_entry(fz_context *ctx, pdf_document *doc, int i)
230
{
231
	pdf_xref *xref;
232
	pdf_xref_subsec *sub;
233
	int j;
234

235
	if (i < 0)
236
		fz_throw(ctx, FZ_ERROR_GENERIC, "Negative object number requested");
237

238
	if (i <= doc->max_xref_len)
239
		j = doc->xref_index[i];
240
	else
241
		j = 0;
242

243
	/* Find the first xref section where the entry is defined. */
244
	for (; j < doc->num_xref_sections; j++)
245
	{
246
		xref = &doc->xref_sections[j];
247

248
		if (i < xref->num_objects)
249
		{
250
			for (sub = xref->subsec; sub != NULL; sub = sub->next)
251
			{
252
				pdf_xref_entry *entry;
253

254
				if (i < sub->start || i >= sub->start + sub->len)
255
					continue;
256

257
				entry = &sub->table[i - sub->start];
258
				if (entry->type)
259
				{
260
					doc->xref_index[i] = j;
261
					return entry;
262
				}
263
			}
264
		}
265
	}
266

267
	/* Didn't find the entry in any section. Return the entry from
268
	 * the final section. */
269
	doc->xref_index[i] = 0;
270
	if (i < xref->num_objects)
271
	{
272
		xref = &doc->xref_sections[0];
273
		for (sub = xref->subsec; sub != NULL; sub = sub->next)
274
		{
275
			if (i >= sub->start && i < sub->start + sub->len)
276
				return &sub->table[i - sub->start];
277
		}
278
	}
279

280
	/* At this point, we solidify the xref. This ensures that we
281
	 * can return a pointer. This is the only case where this function
282
	 * might throw an exception, and it will never happen when we are
283
	 * working within a 'solid' xref. */
284
	ensure_solid_xref(ctx, doc, i+1, 0);
285
	xref = &doc->xref_sections[0];
286
	sub = xref->subsec;
287
	return &sub->table[i - sub->start];
288
}
289

290
/*
291
	Ensure we have an incremental xref section where we can store
292
	updated versions of indirect objects. This is a new xref section
293
	consisting of a single xref subsection.
294
*/
295
static void ensure_incremental_xref(fz_context *ctx, pdf_document *doc)
296
{
297

298
	if (!doc->xref_altered)
299
	{
300
		pdf_xref *xref = &doc->xref_sections[0];
301
		pdf_xref *pxref;
302
		pdf_xref_entry *new_table = fz_calloc(ctx, xref->num_objects, sizeof(pdf_xref_entry));
303
		pdf_xref_subsec *sub;
304
		pdf_obj *trailer = NULL;
305
		int i;
306

307
		fz_var(trailer);
308
		fz_try(ctx)
309
		{
310
			sub = fz_malloc_struct(ctx, pdf_xref_subsec);
311
			trailer = pdf_copy_dict(ctx, xref->trailer);
312
			doc->xref_sections = fz_resize_array(ctx, doc->xref_sections, doc->num_xref_sections + 1, sizeof(pdf_xref));
313
			xref = &doc->xref_sections[0];
314
			pxref = &doc->xref_sections[1];
315
			memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
316
			/* xref->num_objects is already correct */
317
			xref->subsec = sub;
318
			xref->trailer = trailer;
319
			xref->pre_repair_trailer = NULL;
320
			sub->next = NULL;
321
			sub->len = xref->num_objects;
322
			sub->start = 0;
323
			sub->table = new_table;
324
			doc->num_xref_sections++;
325
			doc->xref_altered = 1;
326
		}
327
		fz_catch(ctx)
328
		{
329
			fz_free(ctx, new_table);
330
			pdf_drop_obj(ctx, trailer);
331
			fz_rethrow(ctx);
332
		}
333

334
		/* Update the xref_index */
335
		for (i = 0; i < doc->max_xref_len; i++)
336
		{
337
			doc->xref_index[i]++;
338
		}
339
	}
340
}
341

342
/* Used when altering a document */
343
static pdf_xref_entry *pdf_get_incremental_xref_entry(fz_context *ctx, pdf_document *doc, int i)
344
{
345
	pdf_xref *xref;
346
	pdf_xref_subsec *sub;
347

348
	/* Make a new final xref section if we haven't already */
349
	ensure_incremental_xref(ctx, doc);
350

351
	xref = &doc->xref_sections[0];
352
	if (i >= xref->num_objects)
353
		pdf_resize_xref(ctx, doc, i + 1);
354

355
	sub = xref->subsec;
356
	assert(sub != NULL && sub->next == NULL);
357
	assert(i >= sub->start && i < sub->start + sub->len);
358
	doc->xref_index[i] = 0;
359
	return &sub->table[i - sub->start];
360
}
361

362
int pdf_xref_is_incremental(fz_context *ctx, pdf_document *doc, int num)
363
{
364
	pdf_xref *xref = &doc->xref_sections[0];
365
	pdf_xref_subsec *sub = xref->subsec;
366

367
	assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == 0);
368

369
	return doc->xref_altered && num < xref->num_objects && sub->table[num].type;
370
}
371

372
/* Ensure that the current populating xref has a single subsection
373
 * that covers the entire range. */
374
void pdf_ensure_solid_xref(fz_context *ctx, pdf_document *doc, int num)
375
{
376
	if (doc->num_xref_sections == 0)
377
		pdf_populate_next_xref_level(ctx, doc);
378

379
	ensure_solid_xref(ctx, doc, num, doc->num_xref_sections-1);
380
}
381

382
/* Ensure that an object has been cloned into the incremental xref section */
383
void pdf_xref_ensure_incremental_object(fz_context *ctx, pdf_document *doc, int num)
384
{
385
	pdf_xref_entry *new_entry, *old_entry;
386
	pdf_xref_subsec *sub = NULL;
387
	int i;
388

389
	/* Make sure we have created an xref section for incremental updates */
390
	ensure_incremental_xref(ctx, doc);
391

392
	/* Search for the section that contains this object */
393
	for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
394
	{
395
		pdf_xref *xref = &doc->xref_sections[i];
396

397
		if (num < 0 && num >= xref->num_objects)
398
			break;
399
		for (sub = xref->subsec; sub != NULL; sub = sub->next)
400
		{
401
			if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
402
				break;
403
		}
404
		if (sub != NULL)
405
			break;
406
	}
407
	/* sub == NULL implies we did not find it */
408

409
	/* If we don't find it, or it's already in the incremental section, return */
410
	if (i == 0 || sub == NULL)
411
		return;
412

413
	/* Move the object to the incremental section */
414
	doc->xref_index[num] = 0;
415
	old_entry = &sub->table[num - sub->start];
416
	new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
417
	*new_entry = *old_entry;
418
	old_entry->obj = NULL;
419
	old_entry->stm_buf = NULL;
420
}
421

422
void pdf_replace_xref(fz_context *ctx, pdf_document *doc, pdf_xref_entry *entries, int n)
423
{
424
	pdf_xref *xref = NULL;
425
	pdf_xref_subsec *sub;
426
	pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
427

428
	fz_var(xref);
429
	fz_try(ctx)
430
	{
431
		doc->xref_index = fz_calloc(ctx, n, sizeof(int));
432
		xref = fz_malloc_struct(ctx, pdf_xref);
433
		sub = fz_malloc_struct(ctx, pdf_xref_subsec);
434

435
		/* The new table completely replaces the previous separate sections */
436
		pdf_drop_xref_sections(ctx, doc);
437

438
		sub->table = entries;
439
		sub->start = 0;
440
		sub->len = n;
441
		xref->subsec = sub;
442
		xref->num_objects = n;
443
		xref->trailer = trailer;
444
		trailer = NULL;
445

446
		doc->xref_sections = xref;
447
		doc->num_xref_sections = 1;
448
		doc->max_xref_len = n;
449

450
		memset(doc->xref_index, 0, sizeof(int)*doc->max_xref_len);
451
	}
452
	fz_catch(ctx)
453
	{
454
		fz_free(ctx, xref);
455
		pdf_drop_obj(ctx, trailer);
456
		fz_rethrow(ctx);
457
	}
458
}
459

460
/*
461
 * magic version tag and startxref
462
 */
463

464
static void
465
pdf_load_version(fz_context *ctx, pdf_document *doc)
466
{
467
	char buf[20];
468

469
	fz_seek(ctx, doc->file, 0, SEEK_SET);
470
	fz_read_line(ctx, doc->file, buf, sizeof buf);
471
	if (memcmp(buf, "%PDF-", 5) != 0)
472
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize version marker");
473

474
	doc->version = 10 * (fz_atof(buf+5) + 0.05);
475
}
476

477
static void
478
pdf_read_start_xref(fz_context *ctx, pdf_document *doc)
479
{
480
	unsigned char buf[1024];
481
	int t, n;
482
	int i;
483

484
	fz_seek(ctx, doc->file, 0, SEEK_END);
485

486
	doc->file_size = fz_tell(ctx, doc->file);
487

488
	t = fz_maxi(0, doc->file_size - (int)sizeof buf);
489
	fz_seek(ctx, doc->file, t, SEEK_SET);
490

491
	n = fz_read(ctx, doc->file, buf, sizeof buf);
492

493
	for (i = n - 9; i >= 0; i--)
494
	{
495
		if (memcmp(buf + i, "startxref", 9) == 0)
496
		{
497
			i += 9;
498
			while (i < n && iswhite(buf[i]))
499
				i ++;
500
			doc->startxref = 0;
501
			while (i < n && buf[i] >= '0' && buf[i] <= '9')
502
				doc->startxref = doc->startxref * 10 + (buf[i++] - '0');
503
			if (doc->startxref != 0)
504
				return;
505
			break;
506
		}
507
	}
508

509
	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref");
510
}
511

512
/*
513
 * trailer dictionary
514
 */
515

516
static int
517
pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
518
{
519
	int len;
520
	char *s;
521
	int t;
522
	pdf_token tok;
523
	int c;
524
	int size;
525
	int ofs;
526
	pdf_obj *trailer = NULL;
527

528
	fz_var(trailer);
529

530
	/* Record the current file read offset so that we can reinstate it */
531
	ofs = fz_tell(ctx, doc->file);
532

533
	fz_read_line(ctx, doc->file, buf->scratch, buf->size);
534
	if (strncmp(buf->scratch, "xref", 4) != 0)
535
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
536

537
	while (1)
538
	{
539
		c = fz_peek_byte(ctx, doc->file);
540
		if (!(c >= '0' && c <= '9'))
541
			break;
542

543
		fz_read_line(ctx, doc->file, buf->scratch, buf->size);
544
		s = buf->scratch;
545
		fz_strsep(&s, " "); /* ignore ofs */
546
		if (!s)
547
			fz_throw(ctx, FZ_ERROR_GENERIC, "invalid range marker in xref");
548
		len = fz_atoi(fz_strsep(&s, " "));
549
		if (len < 0)
550
			fz_throw(ctx, FZ_ERROR_GENERIC, "xref range marker must be positive");
551

552
		/* broken pdfs where the section is not on a separate line */
553
		if (s && *s != '\0')
554
			fz_seek(ctx, doc->file, -(2 + (int)strlen(s)), SEEK_CUR);
555

556
		t = fz_tell(ctx, doc->file);
557
		if (t < 0)
558
			fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
559
		if (len > (INT_MAX - t) / 20)
560
			fz_throw(ctx, FZ_ERROR_GENERIC, "xref has too many entries");
561

562
		fz_seek(ctx, doc->file, t + 20 * len, SEEK_SET);
563
	}
564

565
	fz_try(ctx)
566
	{
567
		tok = pdf_lex(ctx, doc->file, buf);
568
		if (tok != PDF_TOK_TRAILER)
569
			fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
570

571
		tok = pdf_lex(ctx, doc->file, buf);
572
		if (tok != PDF_TOK_OPEN_DICT)
573
			fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
574

575
		trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
576

577
		size = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Size));
578
		if (!size)
579
			fz_throw(ctx, FZ_ERROR_GENERIC, "trailer missing Size entry");
580
	}
581
	fz_always(ctx)
582
	{
583
		pdf_drop_obj(ctx, trailer);
584
	}
585
	fz_catch(ctx)
586
	{
587
		fz_rethrow_message(ctx, "cannot parse trailer");
588
	}
589

590
	fz_seek(ctx, doc->file, ofs, SEEK_SET);
591

592
	return size;
593
}
594

595
pdf_obj *
596
pdf_new_ref(fz_context *ctx, pdf_document *doc, pdf_obj *obj)
597
{
598
	int num = pdf_create_object(ctx, doc);
599
	pdf_update_object(ctx, doc, num, obj);
600
	return pdf_new_indirect(ctx, doc, num, 0);
601
}
602

603
static pdf_xref_entry *
604
pdf_xref_find_subsection(fz_context *ctx, pdf_document *doc, int ofs, int len)
605
{
606
	pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-1];
607
	pdf_xref_subsec *sub;
608
	int new_max;
609

610
	/* Different cases here. Case 1) We might be asking for a
611
	 * subsection (or a subset of a subsection) that we already
612
	 * have -  Just return it. Case 2) We might be asking for a
613
	 * completely new subsection - Create it and return it.
614
	 * Case 3) We might have an overlapping one - Create a 'solid'
615
	 * subsection and return that. */
616

617
	/* Sanity check */
618
	for (sub = xref->subsec; sub != NULL; sub = sub->next)
619
	{
620
		if (ofs >= sub->start && ofs + len <= sub->start + sub->len)
621
			return &sub->table[ofs-sub->start]; /* Case 1 */
622
		if (ofs + len > sub->start && ofs <= sub->start + sub->len)
623
			break; /* Case 3 */
624
	}
625

626
	new_max = xref->num_objects;
627
	if (new_max < ofs + len)
628
		new_max = ofs + len;
629

630
	if (sub == NULL)
631
	{
632
		/* Case 2 */
633
		sub = fz_malloc_struct(ctx, pdf_xref_subsec);
634
		fz_try(ctx)
635
		{
636
			sub->table = fz_calloc(ctx, len, sizeof(pdf_xref_entry));
637
			sub->start = ofs;
638
			sub->len = len;
639
			sub->next = xref->subsec;
640
			xref->subsec = sub;
641
		}
642
		fz_catch(ctx)
643
		{
644
			fz_free(ctx, sub);
645
			fz_rethrow(ctx);
646
		}
647
		xref->num_objects = new_max;
648
		if (doc->max_xref_len < new_max)
649
			extend_xref_index(ctx, doc, new_max);
650
	}
651
	else
652
	{
653
		/* Case 3 */
654
		ensure_solid_xref(ctx, doc, new_max, doc->num_xref_sections-1);
655
		xref = &doc->xref_sections[doc->num_xref_sections-1];
656
		sub = xref->subsec;
657
	}
658
	return &sub->table[ofs-sub->start];
659
}
660

661
static pdf_obj *
662
pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
663
{
664
	fz_stream *file = doc->file;
665

666
	int ofs, len;
667
	char *s;
668
	int n;
669
	pdf_token tok;
670
	int i;
671
	int c;
672
	pdf_obj *trailer;
673
	int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
674
	pdf_xref_entry *table;
675

676
	fz_read_line(ctx, file, buf->scratch, buf->size);
677
	if (strncmp(buf->scratch, "xref", 4) != 0)
678
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
679

680
	while (1)
681
	{
682
		c = fz_peek_byte(ctx, file);
683
		if (!(c >= '0' && c <= '9'))
684
			break;
685

686
		fz_read_line(ctx, file, buf->scratch, buf->size);
687
		s = buf->scratch;
688
		ofs = fz_atoi(fz_strsep(&s, " "));
689
		len = fz_atoi(fz_strsep(&s, " "));
690

691
		/* broken pdfs where the section is not on a separate line */
692
		if (s && *s != '\0')
693
		{
694
			fz_warn(ctx, "broken xref section. proceeding anyway.");
695
			fz_seek(ctx, file, -(2 + (int)strlen(s)), SEEK_CUR);
696
		}
697

698
		if (ofs < 0)
699
			fz_throw(ctx, FZ_ERROR_GENERIC, "out of range object num in xref: %d", ofs);
700

701
		/* broken pdfs where size in trailer undershoots entries in xref sections */
702
		if (ofs + len > xref_len)
703
		{
704
			fz_warn(ctx, "broken xref section, proceeding anyway.");
705
		}
706

707
		table = pdf_xref_find_subsection(ctx, doc, ofs, len);
708

709
		for (i = ofs; i < ofs + len; i++)
710
		{
711
			pdf_xref_entry *entry = &table[i-ofs];
712
			n = fz_read(ctx, file, (unsigned char *) buf->scratch, 20);
713
			if (n != 20)
714
				fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected EOF in xref table");
715
			if (!entry->type)
716
			{
717
				s = buf->scratch;
718

719
				/* broken pdfs where line start with white space */
720
				while (*s != '\0' && iswhite(*s))
721
					s++;
722

723
				entry->ofs = atoi(s);
724
				entry->gen = atoi(s + 11);
725
				entry->type = s[17];
726
				if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
727
					fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], i, entry->gen);
728
			}
729
		}
730
	}
731

732
	fz_try(ctx)
733
	{
734
		tok = pdf_lex(ctx, file, buf);
735
		if (tok != PDF_TOK_TRAILER)
736
			fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
737

738
		tok = pdf_lex(ctx, file, buf);
739
		if (tok != PDF_TOK_OPEN_DICT)
740
			fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
741

742
		trailer = pdf_parse_dict(ctx, doc, file, buf);
743
	}
744
	fz_catch(ctx)
745
	{
746
		fz_rethrow_message(ctx, "cannot parse trailer");
747
	}
748
	return trailer;
749
}
750

751
static void
752
pdf_read_new_xref_section(fz_context *ctx, pdf_document *doc, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
753
{
754
	pdf_xref_entry *table;
755
	int i, n;
756

757
	if (i0 < 0 || i1 < 0)
758
		fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream entry index");
759
	//if (i0 + i1 > pdf_xref_len(ctx, doc))
760
	//	fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream has too many entries");
761

762
	table = pdf_xref_find_subsection(ctx, doc, i0, i1);
763
	for (i = i0; i < i0 + i1; i++)
764
	{
765
		pdf_xref_entry *entry = &table[i-i0];
766
		int a = 0;
767
		int b = 0;
768
		int c = 0;
769

770
		if (fz_is_eof(ctx, stm))
771
			fz_throw(ctx, FZ_ERROR_GENERIC, "truncated xref stream");
772

773
		for (n = 0; n < w0; n++)
774
			a = (a << 8) + fz_read_byte(ctx, stm);
775
		for (n = 0; n < w1; n++)
776
			b = (b << 8) + fz_read_byte(ctx, stm);
777
		for (n = 0; n < w2; n++)
778
			c = (c << 8) + fz_read_byte(ctx, stm);
779

780
		if (!entry->type)
781
		{
782
			int t = w0 ? a : 1;
783
			entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
784
			entry->ofs = w1 ? b : 0;
785
			entry->gen = w2 ? c : 0;
786
		}
787
	}
788

789
	doc->has_xref_streams = 1;
790
}
791

792
/* Entered with file locked, remains locked throughout. */
793
static pdf_obj *
794
pdf_read_new_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
795
{
796
	fz_stream *stm = NULL;
797
	pdf_obj *trailer = NULL;
798
	pdf_obj *index = NULL;
799
	pdf_obj *obj = NULL;
800
	int num, gen, ofs, stm_ofs;
801
	int size, w0, w1, w2;
802
	int t;
803

804
	fz_var(trailer);
805
	fz_var(stm);
806

807
	fz_try(ctx)
808
	{
809
		ofs = fz_tell(ctx, doc->file);
810
		trailer = pdf_parse_ind_obj(ctx, doc, doc->file, buf, &num, &gen, &stm_ofs, NULL);
811
	}
812
	fz_catch(ctx)
813
	{
814
		pdf_drop_obj(ctx, trailer);
815
		fz_rethrow_message(ctx, "cannot parse compressed xref stream object");
816
	}
817

818
	fz_try(ctx)
819
	{
820
		pdf_xref_entry *entry;
821

822
		obj = pdf_dict_get(ctx, trailer, PDF_NAME_Size);
823
		if (!obj)
824
			fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing Size entry (%d %d R)", num, gen);
825

826
		size = pdf_to_int(ctx, obj);
827

828
		obj = pdf_dict_get(ctx, trailer, PDF_NAME_W);
829
		if (!obj)
830
			fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing W entry (%d %d R)", num, gen);
831
		w0 = pdf_to_int(ctx, pdf_array_get(ctx, obj, 0));
832
		w1 = pdf_to_int(ctx, pdf_array_get(ctx, obj, 1));
833
		w2 = pdf_to_int(ctx, pdf_array_get(ctx, obj, 2));
834

835
		if (w0 < 0)
836
			fz_warn(ctx, "xref stream objects have corrupt type");
837
		if (w1 < 0)
838
			fz_warn(ctx, "xref stream objects have corrupt offset");
839
		if (w2 < 0)
840
			fz_warn(ctx, "xref stream objects have corrupt generation");
841

842
		w0 = w0 < 0 ? 0 : w0;
843
		w1 = w1 < 0 ? 0 : w1;
844
		w2 = w2 < 0 ? 0 : w2;
845

846
		index = pdf_dict_get(ctx, trailer, PDF_NAME_Index);
847

848
		stm = pdf_open_stream_with_offset(ctx, doc, num, gen, trailer, stm_ofs);
849

850
		if (!index)
851
		{
852
			pdf_read_new_xref_section(ctx, doc, stm, 0, size, w0, w1, w2);
853
		}
854
		else
855
		{
856
			int n = pdf_array_len(ctx, index);
857
			for (t = 0; t < n; t += 2)
858
			{
859
				int i0 = pdf_to_int(ctx, pdf_array_get(ctx, index, t + 0));
860
				int i1 = pdf_to_int(ctx, pdf_array_get(ctx, index, t + 1));
861
				pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
862
			}
863
		}
864
		entry = pdf_get_populating_xref_entry(ctx, doc, num);
865
		entry->ofs = ofs;
866
		entry->gen = gen;
867
		entry->stm_ofs = stm_ofs;
868
		pdf_drop_obj(ctx, entry->obj);
869
		entry->obj = pdf_keep_obj(ctx, trailer);
870
		entry->type = 'n';
871
	}
872
	fz_always(ctx)
873
	{
874
		fz_drop_stream(ctx, stm);
875
	}
876
	fz_catch(ctx)
877
	{
878
		pdf_drop_obj(ctx, trailer);
879
		fz_rethrow(ctx);
880
	}
881

882
	return trailer;
883
}
884

885
static pdf_obj *
886
pdf_read_xref(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf)
887
{
888
	pdf_obj *trailer;
889
	int c;
890

891
	fz_seek(ctx, doc->file, ofs, SEEK_SET);
892

893
	while (iswhite(fz_peek_byte(ctx, doc->file)))
894
		fz_read_byte(ctx, doc->file);
895

896
	fz_try(ctx)
897
	{
898
		c = fz_peek_byte(ctx, doc->file);
899
		if (c == 'x')
900
			trailer = pdf_read_old_xref(ctx, doc, buf);
901
		else if (c >= '0' && c <= '9')
902
			trailer = pdf_read_new_xref(ctx, doc, buf);
903
		else
904
			fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize xref format");
905
	}
906
	fz_catch(ctx)
907
	{
908
		fz_rethrow_message(ctx, "cannot read xref (ofs=%d)", ofs);
909
	}
910
	return trailer;
911
}
912

913
typedef struct ofs_list_s ofs_list;
914

915
struct ofs_list_s
916
{
917
	int max;
918
	int len;
919
	int *list;
920
};
921

922
static int
923
read_xref_section(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, ofs_list *offsets)
924
{
925
	pdf_obj *trailer = NULL;
926
	int xrefstmofs = 0;
927
	int prevofs = 0;
928

929
	fz_var(trailer);
930

931
	fz_try(ctx)
932
	{
933
		int i;
934
		/* Avoid potential infinite recursion */
935
		for (i = 0; i < offsets->len; i ++)
936
		{
937
			if (offsets->list[i] == ofs)
938
				break;
939
		}
940
		if (i < offsets->len)
941
		{
942
			fz_warn(ctx, "ignoring xref recursion with offset %d", ofs);
943
			break;
944
		}
945
		if (offsets->len == offsets->max)
946
		{
947
			offsets->list = fz_resize_array(ctx, offsets->list, offsets->max*2, sizeof(int));
948
			offsets->max *= 2;
949
		}
950
		offsets->list[offsets->len++] = ofs;
951

952
		trailer = pdf_read_xref(ctx, doc, ofs, buf);
953

954
		pdf_set_populating_xref_trailer(ctx, doc, trailer);
955

956
		/* FIXME: do we overwrite free entries properly? */
957
		/* FIXME: Does this work properly with progression? */
958
		xrefstmofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_XRefStm));
959
		if (xrefstmofs)
960
		{
961
			if (xrefstmofs < 0)
962
				fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset");
963

964
			/*
965
				Read the XRefStm stream, but throw away the resulting trailer. We do not
966
				follow any Prev tag therein, as specified on Page 108 of the PDF reference
967
				1.7
968
			*/
969
			pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs, buf));
970
		}
971

972
		prevofs = pdf_to_int(ctx, pdf_dict_get(ctx, trailer, PDF_NAME_Prev));
973
		if (prevofs < 0)
974
			fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset for previous xref stream");
975
	}
976
	fz_always(ctx)
977
	{
978
		pdf_drop_obj(ctx, trailer);
979
	}
980
	fz_catch(ctx)
981
	{
982
		fz_rethrow_message(ctx, "cannot read xref at offset %d", ofs);
983
	}
984

985
	return prevofs;
986
}
987

988
static void
989
pdf_read_xref_sections(fz_context *ctx, pdf_document *doc, int ofs, pdf_lexbuf *buf, int read_previous)
990
{
991
	ofs_list list;
992

993
	list.len = 0;
994
	list.max = 10;
995
	list.list = fz_malloc_array(ctx, 10, sizeof(int));
996
	fz_try(ctx)
997
	{
998
		while(ofs)
999
		{
1000
			pdf_populate_next_xref_level(ctx, doc);
1001
			ofs = read_xref_section(ctx, doc, ofs, buf, &list);
1002
			if (!read_previous)
1003
				break;
1004
		}
1005
	}
1006
	fz_always(ctx)
1007
	{
1008
		fz_free(ctx, list.list);
1009
	}
1010
	fz_catch(ctx)
1011
	{
1012
		fz_rethrow(ctx);
1013
	}
1014
}
1015

1016
static void
1017
pdf_prime_xref_index(fz_context *ctx, pdf_document *doc)
1018
{
1019
	int i, j;
1020
	int *idx = doc->xref_index;
1021

1022
	for (i = doc->num_xref_sections-1; i >= 0; i--)
1023
	{
1024
		pdf_xref *xref = &doc->xref_sections[i];
1025
		pdf_xref_subsec *subsec = xref->subsec;
1026
		while (subsec != NULL)
1027
		{
1028
			int start = subsec->start;
1029
			int end = subsec->start + subsec->len;
1030
			for (j = start; j < end; j++)
1031
			{
1032
				char t = subsec->table[j-start].type;
1033
				if (t != 0 && t != 'f')
1034
					idx[j] = i;
1035
			}
1036

1037
			subsec = subsec->next;
1038
		}
1039
	}
1040
}
1041

1042
/*
1043
 * load xref tables from pdf
1044
 *
1045
 * File locked on entry, throughout and on exit.
1046
 */
1047

1048
static void
1049
pdf_load_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
1050
{
1051
	int i;
1052
	int xref_len;
1053
	pdf_xref_entry *entry;
1054

1055
	pdf_read_start_xref(ctx, doc);
1056

1057
	pdf_read_xref_sections(ctx, doc, doc->startxref, buf, 1);
1058

1059
	if (pdf_xref_len(ctx, doc) == 0)
1060
		fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty");
1061

1062
	pdf_prime_xref_index(ctx, doc);
1063

1064
	entry = pdf_get_xref_entry(ctx, doc, 0);
1065
	/* broken pdfs where first object is missing */
1066
	if (!entry->type)
1067
	{
1068
		entry->type = 'f';
1069
		entry->gen = 65535;
1070
	}
1071
	/* broken pdfs where first object is not free */
1072
	else if (entry->type != 'f')
1073
		fz_throw(ctx, FZ_ERROR_GENERIC, "first object in xref is not free");
1074

1075
	/* broken pdfs where object offsets are out of range */
1076
	xref_len = pdf_xref_len(ctx, doc);
1077
	for (i = 0; i < xref_len; i++)
1078
	{
1079
		pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
1080
		if (entry->type == 'n')
1081
		{
1082
			/* Special case code: "0000000000 * n" means free,
1083
			 * according to some producers (inc Quartz) */
1084
			if (entry->ofs == 0)
1085
				entry->type = 'f';
1086
			else if (entry->ofs <= 0 || entry->ofs >= doc->file_size)
1087
				fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", entry->ofs, i);
1088
		}
1089
		if (entry->type == 'o')
1090
			if (entry->ofs <= 0 || entry->ofs >= xref_len || pdf_get_xref_entry(ctx, doc, entry->ofs)->type != 'n')
1091
				fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", entry->ofs, i);
1092
	}
1093
}
1094

1095
static void
1096
pdf_load_linear(fz_context *ctx, pdf_document *doc)
1097
{
1098
	pdf_obj *dict = NULL;
1099
	pdf_obj *hint = NULL;
1100
	pdf_obj *o;
1101
	int num, gen, stmofs, lin, len;
1102

1103
	fz_var(dict);
1104
	fz_var(hint);
1105

1106
	fz_try(ctx)
1107
	{
1108
		pdf_xref_entry *entry;
1109

1110
		dict = pdf_parse_ind_obj(ctx, doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs, NULL);
1111
		if (!pdf_is_dict(ctx, dict))
1112
			fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1113
		o = pdf_dict_get(ctx, dict, PDF_NAME_Linearized);
1114
		if (o == NULL)
1115
			fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1116
		lin = pdf_to_int(ctx, o);
1117
		if (lin != 1)
1118
			fz_throw(ctx, FZ_ERROR_GENERIC, "Unexpected version of Linearized tag (%d)", lin);
1119
		len = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_L));
1120
		if (len != doc->file_length)
1121
			fz_throw(ctx, FZ_ERROR_GENERIC, "File has been updated since linearization");
1122

1123
		pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), &doc->lexbuf.base, 0);
1124

1125
		doc->page_count = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_N));
1126
		doc->linear_page_refs = fz_resize_array(ctx, doc->linear_page_refs, doc->page_count, sizeof(pdf_obj *));
1127
		memset(doc->linear_page_refs, 0, doc->page_count * sizeof(pdf_obj*));
1128
		doc->linear_obj = dict;
1129
		doc->linear_pos = fz_tell(ctx, doc->file);
1130
		doc->linear_page1_obj_num = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_O));
1131
		doc->linear_page_refs[0] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, 0);
1132
		doc->linear_page_num = 0;
1133
		hint = pdf_dict_get(ctx, dict, PDF_NAME_H);
1134
		doc->hint_object_offset = pdf_to_int(ctx, pdf_array_get(ctx, hint, 0));
1135
		doc->hint_object_length = pdf_to_int(ctx, pdf_array_get(ctx, hint, 1));
1136

1137
		entry = pdf_get_populating_xref_entry(ctx, doc, 0);
1138
		entry->type = 'f';
1139
	}
1140
	fz_catch(ctx)
1141
	{
1142
		pdf_drop_obj(ctx, dict);
1143
		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1144
		/* Drop back to non linearized reading mode */
1145
		doc->file_reading_linearly = 0;
1146
	}
1147
}
1148

1149
void
1150
pdf_ocg_set_config(fz_context *ctx, pdf_document *doc, int config)
1151
{
1152
	int i, j, len, len2;
1153
	pdf_ocg_descriptor *desc = doc->ocg;
1154
	pdf_obj *obj, *cobj;
1155
	pdf_obj *name;
1156

1157
	obj = pdf_dict_get(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root), PDF_NAME_OCProperties);
1158
	if (!obj)
1159
	{
1160
		if (config == 0)
1161
			return;
1162
		else
1163
			fz_throw(ctx, FZ_ERROR_GENERIC, "Unknown OCG config (None known!)");
1164
	}
1165
	if (config == 0)
1166
	{
1167
		cobj = pdf_dict_get(ctx, obj, PDF_NAME_D);
1168
		if (!cobj)
1169
			fz_throw(ctx, FZ_ERROR_GENERIC, "No default OCG config");
1170
	}
1171
	else
1172
	{
1173
		cobj = pdf_array_get(ctx, pdf_dict_get(ctx, obj, PDF_NAME_Configs), config);
1174
		if (!cobj)
1175
			fz_throw(ctx, FZ_ERROR_GENERIC, "Illegal OCG config");
1176
	}
1177

1178
	pdf_drop_obj(ctx, desc->intent);
1179
	desc->intent = pdf_dict_get(ctx, cobj, PDF_NAME_Intent);
1180
	if (desc->intent)
1181
		pdf_keep_obj(ctx, desc->intent);
1182

1183
	len = desc->len;
1184
	name = pdf_dict_get(ctx, cobj, PDF_NAME_BaseState);
1185
	if (pdf_name_eq(ctx, name, PDF_NAME_Unchanged))
1186
	{
1187
		/* Do nothing */
1188
	}
1189
	else if (pdf_name_eq(ctx, name, PDF_NAME_OFF))
1190
	{
1191
		for (i = 0; i < len; i++)
1192
		{
1193
			desc->ocgs[i].state = 0;
1194
		}
1195
	}
1196
	else /* Default to ON */
1197
	{
1198
		for (i = 0; i < len; i++)
1199
		{
1200
			desc->ocgs[i].state = 1;
1201
		}
1202
	}
1203

1204
	obj = pdf_dict_get(ctx, cobj, PDF_NAME_ON);
1205
	len2 = pdf_array_len(ctx, obj);
1206
	for (i = 0; i < len2; i++)
1207
	{
1208
		pdf_obj *o = pdf_array_get(ctx, obj, i);
1209
		int n = pdf_to_num(ctx, o);
1210
		int g = pdf_to_gen(ctx, o);
1211
		for (j=0; j < len; j++)
1212
		{
1213
			if (desc->ocgs[j].num == n && desc->ocgs[j].gen == g)
1214
			{
1215
				desc->ocgs[j].state = 1;
1216
				break;
1217
			}
1218
		}
1219
	}
1220

1221
	obj = pdf_dict_get(ctx, cobj, PDF_NAME_OFF);
1222
	len2 = pdf_array_len(ctx, obj);
1223
	for (i = 0; i < len2; i++)
1224
	{
1225
		pdf_obj *o = pdf_array_get(ctx, obj, i);
1226
		int n = pdf_to_num(ctx, o);
1227
		int g = pdf_to_gen(ctx, o);
1228
		for (j=0; j < len; j++)
1229
		{
1230
			if (desc->ocgs[j].num == n && desc->ocgs[j].gen == g)
1231
			{
1232
				desc->ocgs[j].state = 0;
1233
				break;
1234
			}
1235
		}
1236
	}
1237

1238
	/* FIXME: Should make 'num configs' available in the descriptor. */
1239
	/* FIXME: Should copy out 'Intent' here into the descriptor, and remove
1240
	 * csi->intent in favour of that. */
1241
	/* FIXME: Should copy 'AS' into the descriptor, and visibility
1242
	 * decisions should respect it. */
1243
	/* FIXME: Make 'Order' available via the descriptor (when we have an
1244
	 * app that needs it) */
1245
	/* FIXME: Make 'ListMode' available via the descriptor (when we have
1246
	 * an app that needs it) */
1247
	/* FIXME: Make 'RBGroups' available via the descriptor (when we have
1248
	 * an app that needs it) */
1249
	/* FIXME: Make 'Locked' available via the descriptor (when we have
1250
	 * an app that needs it) */
1251
}
1252

1253
static void
1254
pdf_read_ocg(fz_context *ctx, pdf_document *doc)
1255
{
1256
	pdf_obj *obj, *ocg;
1257
	int len, i;
1258
	pdf_ocg_descriptor *desc;
1259

1260
	fz_var(desc);
1261

1262
	obj = pdf_dict_get(ctx, pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root), PDF_NAME_OCProperties);
1263
	if (!obj)
1264
		return;
1265
	ocg = pdf_dict_get(ctx, obj, PDF_NAME_OCGs);
1266
	if (!ocg || !pdf_is_array(ctx, ocg))
1267
		/* Not ever supposed to happen, but live with it. */
1268
		return;
1269
	len = pdf_array_len(ctx, ocg);
1270
	fz_try(ctx)
1271
	{
1272
		desc = fz_calloc(ctx, 1, sizeof(*desc));
1273
		desc->len = len;
1274
		desc->ocgs = fz_calloc(ctx, len, sizeof(*desc->ocgs));
1275
		desc->intent = NULL;
1276
		for (i=0; i < len; i++)
1277
		{
1278
			pdf_obj *o = pdf_array_get(ctx, ocg, i);
1279
			desc->ocgs[i].num = pdf_to_num(ctx, o);
1280
			desc->ocgs[i].gen = pdf_to_gen(ctx, o);
1281
			desc->ocgs[i].state = 1;
1282
		}
1283
		doc->ocg = desc;
1284
	}
1285
	fz_catch(ctx)
1286
	{
1287
		if (desc)
1288
			fz_free(ctx, desc->ocgs);
1289
		fz_free(ctx, desc);
1290
		fz_rethrow(ctx);
1291
	}
1292

1293
	pdf_ocg_set_config(ctx, doc, 0);
1294
}
1295

1296
static void
1297
pdf_drop_ocg(fz_context *ctx, pdf_ocg_descriptor *desc)
1298
{
1299
	if (!desc)
1300
		return;
1301

1302
	pdf_drop_obj(ctx, desc->intent);
1303
	fz_free(ctx, desc->ocgs);
1304
	fz_free(ctx, desc);
1305
}
1306

1307
/*
1308
 * Initialize and load xref tables.
1309
 * If password is not null, try to decrypt.
1310
 */
1311

1312
static void
1313
pdf_init_document(fz_context *ctx, pdf_document *doc)
1314
{
1315
	pdf_obj *encrypt, *id;
1316
	pdf_obj *dict = NULL;
1317
	pdf_obj *obj;
1318
	pdf_obj *nobj = NULL;
1319
	int i, repaired = 0;
1320

1321
	fz_var(dict);
1322
	fz_var(nobj);
1323

1324
	fz_try(ctx)
1325
	{
1326
		pdf_load_version(ctx, doc);
1327

1328
		doc->file_length = fz_stream_meta(ctx, doc->file, FZ_STREAM_META_LENGTH, 0, NULL);
1329
		if (doc->file_length < 0)
1330
			doc->file_length = 0;
1331

1332
		/* Check to see if we should work in progressive mode */
1333
		if (fz_stream_meta(ctx, doc->file, FZ_STREAM_META_PROGRESSIVE, 0, NULL) > 0)
1334
			doc->file_reading_linearly = 1;
1335

1336
		/* Try to load the linearized file if we are in progressive
1337
		 * mode. */
1338
		if (doc->file_reading_linearly)
1339
			pdf_load_linear(ctx, doc);
1340

1341
		/* If we aren't in progressive mode (or the linear load failed
1342
		 * and has set us back to non-progressive mode), load normally.
1343
		 */
1344
		if (!doc->file_reading_linearly)
1345
			pdf_load_xref(ctx, doc, &doc->lexbuf.base);
1346
	}
1347
	fz_catch(ctx)
1348
	{
1349
		pdf_drop_xref_sections(ctx, doc);
1350
		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1351
		fz_warn(ctx, "trying to repair broken xref");
1352
		repaired = 1;
1353
	}
1354

1355
	fz_try(ctx)
1356
	{
1357
		int hasroot, hasinfo;
1358

1359
		if (repaired)
1360
		{
1361
			/* pdf_repair_xref may access xref_index, so reset it properly */
1362
			memset(doc->xref_index, 0, sizeof(int) * doc->max_xref_len);
1363
			pdf_repair_xref(ctx, doc);
1364
			pdf_prime_xref_index(ctx, doc);
1365
		}
1366

1367
		encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Encrypt);
1368
		id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_ID);
1369
		if (pdf_is_dict(ctx, encrypt))
1370
			doc->crypt = pdf_new_crypt(ctx, encrypt, id);
1371

1372
		/* Allow lazy clients to read encrypted files with a blank password */
1373
		pdf_authenticate_password(ctx, doc, "");
1374

1375
		if (repaired)
1376
		{
1377
			int xref_len = pdf_xref_len(ctx, doc);
1378
			pdf_repair_obj_stms(ctx, doc);
1379

1380
			hasroot = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root) != NULL);
1381
			hasinfo = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info) != NULL);
1382

1383
			for (i = 1; i < xref_len; i++)
1384
			{
1385
				pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
1386
				if (entry->type == 0 || entry->type == 'f')
1387
					continue;
1388

1389
				fz_try(ctx)
1390
				{
1391
					dict = pdf_load_object(ctx, doc, i, 0);
1392
				}
1393
				fz_catch(ctx)
1394
				{
1395
					fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1396
					fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
1397
					continue;
1398
				}
1399

1400
				if (!hasroot)
1401
				{
1402
					obj = pdf_dict_get(ctx, dict, PDF_NAME_Type);
1403
					if (pdf_name_eq(ctx, obj, PDF_NAME_Catalog))
1404
					{
1405
						nobj = pdf_new_indirect(ctx, doc, i, 0);
1406
						pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, nobj);
1407
						pdf_drop_obj(ctx, nobj);
1408
						nobj = NULL;
1409
					}
1410
				}
1411

1412
				if (!hasinfo)
1413
				{
1414
					if (pdf_dict_get(ctx, dict, PDF_NAME_Creator) || pdf_dict_get(ctx, dict, PDF_NAME_Producer))
1415
					{
1416
						nobj = pdf_new_indirect(ctx, doc, i, 0);
1417
						pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info, nobj);
1418
						pdf_drop_obj(ctx, nobj);
1419
						nobj = NULL;
1420
					}
1421
				}
1422

1423
				pdf_drop_obj(ctx, dict);
1424
				dict = NULL;
1425
			}
1426

1427
			/* ensure that strings are not used in their repaired, non-decrypted form */
1428
			if (doc->crypt)
1429
				pdf_clear_xref(ctx, doc);
1430
		}
1431
	}
1432
	fz_catch(ctx)
1433
	{
1434
		pdf_drop_obj(ctx, dict);
1435
		pdf_drop_obj(ctx, nobj);
1436
		fz_rethrow_message(ctx, "cannot open document");
1437
	}
1438

1439
	fz_try(ctx)
1440
	{
1441
		pdf_read_ocg(ctx, doc);
1442
	}
1443
	fz_catch(ctx)
1444
	{
1445
		fz_warn(ctx, "Ignoring Broken Optional Content");
1446
	}
1447

1448
	fz_try(ctx)
1449
	{
1450
		char *version_str;
1451
		obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_Version, NULL);
1452
		version_str = pdf_to_name(ctx, obj);
1453
		if (*version_str)
1454
		{
1455
			int version = 10 * (fz_atof(version_str) + 0.05);
1456
			if (version > doc->version)
1457
				doc->version = version;
1458
		}
1459
	}
1460
	fz_catch(ctx) { }
1461
}
1462

1463
void
1464
pdf_close_document(fz_context *ctx, pdf_document *doc)
1465
{
1466
	pdf_unsaved_sig *usig;
1467
	int i;
1468

1469
	if (!doc)
1470
		return;
1471

1472
	/* Type3 glyphs in the glyph cache can contain pdf_obj pointers
1473
	 * that we are about to destroy. Simplest solution is to bin the
1474
	 * glyph cache at this point. */
1475
	fz_purge_glyph_cache(ctx);
1476

1477
	if (doc->js)
1478
		doc->drop_js(doc->js);
1479

1480
	pdf_drop_xref_sections(ctx, doc);
1481
	fz_free(ctx, doc->xref_index);
1482

1483
	if (doc->focus_obj)
1484
		pdf_drop_obj(ctx, doc->focus_obj);
1485
	if (doc->file)
1486
		fz_drop_stream(ctx, doc->file);
1487
	if (doc->crypt)
1488
		pdf_drop_crypt(ctx, doc->crypt);
1489

1490
	pdf_drop_obj(ctx, doc->linear_obj);
1491
	if (doc->linear_page_refs)
1492
	{
1493
		for (i=0; i < doc->page_count; i++)
1494
		{
1495
			pdf_drop_obj(ctx, doc->linear_page_refs[i]);
1496
		}
1497
		fz_free(ctx, doc->linear_page_refs);
1498
	}
1499
	fz_free(ctx, doc->hint_page);
1500
	fz_free(ctx, doc->hint_shared_ref);
1501
	fz_free(ctx, doc->hint_shared);
1502
	fz_free(ctx, doc->hint_obj_offsets);
1503

1504
	while ((usig = doc->unsaved_sigs) != NULL)
1505
	{
1506
		doc->unsaved_sigs = usig->next;
1507
		pdf_drop_obj(ctx, usig->field);
1508
		pdf_drop_signer(ctx, usig->signer);
1509
		fz_free(ctx, usig);
1510
	}
1511

1512
	for (i=0; i < doc->num_type3_fonts; i++)
1513
	{
1514
		fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
1515
		fz_drop_font(ctx, doc->type3_fonts[i]);
1516
	}
1517
	fz_free(ctx, doc->type3_fonts);
1518

1519
	pdf_drop_ocg(ctx, doc->ocg);
1520

1521
	fz_empty_store(ctx);
1522

1523
	pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
1524

1525
	fz_free(ctx, doc);
1526
}
1527

1528
void
1529
pdf_print_xref(fz_context *ctx, pdf_document *doc)
1530
{
1531
	int i;
1532
	int xref_len = pdf_xref_len(ctx, doc);
1533
	printf("xref\n0 %d\n", xref_len);
1534
	for (i = 0; i < xref_len; i++)
1535
	{
1536
		pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
1537
		printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i,
1538
			entry->ofs,
1539
			entry->gen,
1540
			entry->type ? entry->type : '-',
1541
			entry->stm_ofs,
1542
			entry->stm_buf);
1543
	}
1544
}
1545

1546
/*
1547
 * compressed object streams
1548
 */
1549

1550
static pdf_xref_entry *
1551
pdf_load_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen, pdf_lexbuf *buf, int target)
1552
{
1553
	fz_stream *stm = NULL;
1554
	pdf_obj *objstm = NULL;
1555
	int *numbuf = NULL;
1556
	int *ofsbuf = NULL;
1557

1558
	pdf_obj *obj;
1559
	int first;
1560
	int count;
1561
	int i;
1562
	pdf_token tok;
1563
	pdf_xref_entry *ret_entry = NULL;
1564

1565
	fz_var(numbuf);
1566
	fz_var(ofsbuf);
1567
	fz_var(objstm);
1568
	fz_var(stm);
1569

1570
	fz_try(ctx)
1571
	{
1572
		objstm = pdf_load_object(ctx, doc, num, gen);
1573

1574
		count = pdf_to_int(ctx, pdf_dict_get(ctx, objstm, PDF_NAME_N));
1575
		first = pdf_to_int(ctx, pdf_dict_get(ctx, objstm, PDF_NAME_First));
1576

1577
		if (count < 0)
1578
			fz_throw(ctx, FZ_ERROR_GENERIC, "negative number of objects in object stream");
1579
		if (first < 0)
1580
			fz_throw(ctx, FZ_ERROR_GENERIC, "first object in object stream resides outside stream");
1581

1582
		numbuf = fz_calloc(ctx, count, sizeof(int));
1583
		ofsbuf = fz_calloc(ctx, count, sizeof(int));
1584

1585
		stm = pdf_open_stream(ctx, doc, num, gen);
1586
		for (i = 0; i < count; i++)
1587
		{
1588
			tok = pdf_lex(ctx, stm, buf);
1589
			if (tok != PDF_TOK_INT)
1590
				fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen);
1591
			numbuf[i] = buf->i;
1592

1593
			tok = pdf_lex(ctx, stm, buf);
1594
			if (tok != PDF_TOK_INT)
1595
				fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen);
1596
			ofsbuf[i] = buf->i;
1597
		}
1598

1599
		fz_seek(ctx, stm, first, SEEK_SET);
1600

1601
		for (i = 0; i < count; i++)
1602
		{
1603
			int xref_len = pdf_xref_len(ctx, doc);
1604
			pdf_xref_entry *entry;
1605
			fz_seek(ctx, stm, first + ofsbuf[i], SEEK_SET);
1606

1607
			obj = pdf_parse_stm_obj(ctx, doc, stm, buf);
1608

1609
			if (numbuf[i] <= 0 || numbuf[i] >= xref_len)
1610
			{
1611
				pdf_drop_obj(ctx, obj);
1612
				fz_throw(ctx, FZ_ERROR_GENERIC, "object id (%d 0 R) out of range (0..%d)", numbuf[i], xref_len - 1);
1613
			}
1614

1615
			entry = pdf_get_xref_entry(ctx, doc, numbuf[i]);
1616

1617
			pdf_set_obj_parent(ctx, obj, numbuf[i]);
1618

1619
			if (entry->type == 'o' && entry->ofs == num)
1620
			{
1621
				/* If we already have an entry for this object,
1622
				 * we'd like to drop it and use the new one -
1623
				 * but this means that anyone currently holding
1624
				 * a pointer to the old one will be left with a
1625
				 * stale pointer. Instead, we drop the new one
1626
				 * and trust that the old one is correct. */
1627
				if (entry->obj)
1628
				{
1629
					if (pdf_objcmp(ctx, entry->obj, obj))
1630
						fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
1631
					pdf_drop_obj(ctx, obj);
1632
				}
1633
				else
1634
					entry->obj = obj;
1635
				if (numbuf[i] == target)
1636
					ret_entry = entry;
1637
			}
1638
			else
1639
			{
1640
				pdf_drop_obj(ctx, obj);
1641
			}
1642
		}
1643
	}
1644
	fz_always(ctx)
1645
	{
1646
		fz_drop_stream(ctx, stm);
1647
		fz_free(ctx, ofsbuf);
1648
		fz_free(ctx, numbuf);
1649
		pdf_drop_obj(ctx, objstm);
1650
	}
1651
	fz_catch(ctx)
1652
	{
1653
		fz_rethrow_message(ctx, "cannot open object stream (%d %d R)", num, gen);
1654
	}
1655
	return ret_entry;
1656
}
1657

1658
/*
1659
 * object loading
1660
 */
1661
static int
1662
pdf_obj_read(fz_context *ctx, pdf_document *doc, int *offset, int *nump, pdf_obj **page)
1663
{
1664
	pdf_lexbuf *buf = &doc->lexbuf.base;
1665
	int num, numofs, gen, genofs, stmofs, tmpofs, tok;
1666
	int xref_len;
1667
	pdf_xref_entry *entry;
1668
	int newtmpofs;
1669

1670
	numofs = *offset;
1671
	fz_seek(ctx, doc->file, numofs, SEEK_SET);
1672

1673
	/* We expect to read 'num' here */
1674
	tok = pdf_lex(ctx, doc->file, buf);
1675
	genofs = fz_tell(ctx, doc->file);
1676
	if (tok != PDF_TOK_INT)
1677
	{
1678
		/* Failed! */
1679
		DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
1680
		*offset = genofs;
1681
		return tok == PDF_TOK_EOF;
1682
	}
1683
	*nump = num = buf->i;
1684

1685
	/* We expect to read 'gen' here */
1686
	tok = pdf_lex(ctx, doc->file, buf);
1687
	tmpofs = fz_tell(ctx, doc->file);
1688
	if (tok != PDF_TOK_INT)
1689
	{
1690
		/* Failed! */
1691
		DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
1692
		*offset = tmpofs;
1693
		return tok == PDF_TOK_EOF;
1694
	}
1695
	gen = buf->i;
1696

1697
	/* We expect to read 'obj' here */
1698
	do
1699
	{
1700
		tmpofs = fz_tell(ctx, doc->file);
1701
		tok = pdf_lex(ctx, doc->file, buf);
1702
		if (tok == PDF_TOK_OBJ)
1703
			break;
1704
		if (tok != PDF_TOK_INT)
1705
		{
1706
			DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
1707
			*offset = fz_tell(ctx, doc->file);
1708
			return tok == PDF_TOK_EOF;
1709
		}
1710
		DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
1711
		*nump = num = gen;
1712
		numofs = genofs;
1713
		gen = buf->i;
1714
		genofs = tmpofs;
1715
	}
1716
	while (1);
1717

1718
	/* Now we read the actual object */
1719
	xref_len = pdf_xref_len(ctx, doc);
1720

1721
	/* When we are reading a progressive file, we typically see:
1722
	 *    File Header
1723
	 *    obj m (Linearization params)
1724
	 *    xref #1 (refers to objects m-n)
1725
	 *    obj m+1
1726
	 *    ...
1727
	 *    obj n
1728
	 *    obj 1
1729
	 *    ...
1730
	 *    obj n-1
1731
	 *    xref #2
1732
	 *
1733
	 * The linearisation params are read elsewhere, hence
1734
	 * whenever we read an object it should just go into the
1735
	 * previous xref.
1736
	 */
1737
	tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs);
1738

1739
	do /* So we can break out of it */
1740
	{
1741
		if (num <= 0 || num >= xref_len)
1742
		{
1743
			fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
1744
			break;
1745
		}
1746
		if (gen != 0)
1747
		{
1748
			fz_warn(ctx, "Unexpected non zero generation number in linearized file");
1749
		}
1750
		entry = pdf_get_populating_xref_entry(ctx, doc, num);
1751
		if (entry->type != 0)
1752
		{
1753
			DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
1754
			break;
1755
		}
1756
		if (page && *page)
1757
		{
1758
			DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
1759
			if (!entry->obj)
1760
				entry->obj = pdf_keep_obj(ctx, *page);
1761

1762
			if (doc->linear_page_refs[doc->linear_page_num] == NULL)
1763
				doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
1764
		}
1765
		else
1766
		{
1767
			DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
1768
		}
1769
		entry->type = 'n';
1770
		entry->gen = 0;
1771
		entry->ofs = numofs;
1772
		entry->stm_ofs = stmofs;
1773
	}
1774
	while (0);
1775
	if (page && *page)
1776
		doc->linear_page_num++;
1777

1778
	if (tok == PDF_TOK_ENDOBJ)
1779
	{
1780
		*offset = fz_tell(ctx, doc->file);
1781
	}
1782
	else
1783
	{
1784
		*offset = newtmpofs;
1785
	}
1786
	return 0;
1787
}
1788

1789
static void
1790
pdf_load_hinted_page(fz_context *ctx, pdf_document *doc, int pagenum)
1791
{
1792

1793
	if (!doc->hints_loaded || !doc->linear_page_refs)
1794
		return;
1795

1796
	if (doc->linear_page_refs[pagenum])
1797
		return;
1798

1799
	fz_try(ctx)
1800
	{
1801
		int num = doc->hint_page[pagenum].number;
1802
		pdf_obj *page = pdf_load_object(ctx, doc, num, 0);
1803
		if (pdf_name_eq(ctx, PDF_NAME_Page, pdf_dict_get(ctx, page, PDF_NAME_Type)))
1804
		{
1805
			/* We have found the page object! */
1806
			DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
1807
			doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, 0);
1808
		}
1809
		pdf_drop_obj(ctx, page);
1810
	}
1811
	fz_catch(ctx)
1812
	{
1813
		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1814
		/* Silently swallow the error and proceed as normal */
1815
	}
1816

1817
}
1818

1819
static int
1820
read_hinted_object(fz_context *ctx, pdf_document *doc, int num)
1821
{
1822
	/* Try to find the object using our hint table. Find the closest
1823
	 * object <= the one we want that has a hint and read forward from
1824
	 * there. */
1825
	int expected = num;
1826
	int curr_pos;
1827
	int start, offset;
1828

1829
	while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
1830
		expected--;
1831
	if (expected != num)
1832
		DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
1833
	if (expected == 0)	/* No hints found, just bale */
1834
		return 0;
1835

1836
	curr_pos = fz_tell(ctx, doc->file);
1837
	offset = doc->hint_obj_offsets[expected];
1838

1839
	fz_var(expected);
1840

1841
	fz_try(ctx)
1842
	{
1843
		int found;
1844

1845
		/* Try to read forward from there */
1846
		do
1847
		{
1848
			start = offset;
1849
			DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
1850
			pdf_obj_read(ctx, doc, &offset, &found, 0);
1851
			DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
1852
			if (found <= expected)
1853
			{
1854
				/* We found the right one (or one earlier than
1855
				 * we expected). Update the hints. */
1856
				doc->hint_obj_offsets[expected] = offset;
1857
				doc->hint_obj_offsets[found] = start;
1858
				doc->hint_obj_offsets[found+1] = offset;
1859
				/* Retry with the next one */
1860
				expected = found+1;
1861
			}
1862
			else
1863
			{
1864
				/* We found one later than we expected. */
1865
				doc->hint_obj_offsets[expected] = 0;
1866
				doc->hint_obj_offsets[found] = start;
1867
				doc->hint_obj_offsets[found+1] = offset;
1868
				while (doc->hint_obj_offsets[expected] == 0 && expected > 0)
1869
					expected--;
1870
				if (expected == 0)	/* No hints found, just bale */
1871
					return 0;
1872
			}
1873
		}
1874
		while (found != num);
1875
	}
1876
	fz_always(ctx)
1877
	{
1878
		fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
1879
	}
1880
	fz_catch(ctx)
1881
	{
1882
		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1883
		/* FIXME: Currently we ignore the hint. Perhaps we should
1884
		 * drop back to non-hinted operation here. */
1885
		doc->hint_obj_offsets[expected] = 0;
1886
		fz_rethrow(ctx);
1887
	}
1888
	return 1;
1889
}
1890

1891
pdf_xref_entry *
1892
pdf_cache_object(fz_context *ctx, pdf_document *doc, int num, int gen)
1893
{
1894
	pdf_xref_entry *x;
1895
	int rnum, rgen, try_repair;
1896

1897
	fz_var(try_repair);
1898

1899
	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
1900
		fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(ctx, doc));
1901

1902
object_updated:
1903
	try_repair = 0;
1904
	rnum = num;
1905

1906
	x = pdf_get_xref_entry(ctx, doc, num);
1907

1908
	if (x->obj != NULL)
1909
		return x;
1910

1911
	if (x->type == 'f')
1912
	{
1913
		x->obj = pdf_new_null(ctx, doc);
1914
	}
1915
	else if (x->type == 'n')
1916
	{
1917
		fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
1918

1919
		fz_try(ctx)
1920
		{
1921
			x->obj = pdf_parse_ind_obj(ctx, doc, doc->file, &doc->lexbuf.base,
1922
					&rnum, &rgen, &x->stm_ofs, &try_repair);
1923
		}
1924
		fz_catch(ctx)
1925
		{
1926
			if (!try_repair || fz_caught(ctx) == FZ_ERROR_TRYLATER)
1927
				fz_rethrow(ctx);
1928
		}
1929

1930
		if (!try_repair && rnum != num)
1931
		{
1932
			pdf_drop_obj(ctx, x->obj);
1933
			x->obj = NULL;
1934
			try_repair = (doc->repair_attempted == 0);
1935
		}
1936

1937
		if (try_repair)
1938
		{
1939
			fz_try(ctx)
1940
			{
1941
				pdf_repair_xref(ctx, doc);
1942
				pdf_prime_xref_index(ctx, doc);
1943
			}
1944
			fz_catch(ctx)
1945
			{
1946
				if (rnum == num)
1947
					fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse object (%d %d R)", num, gen);
1948
				else
1949
					fz_throw(ctx, FZ_ERROR_GENERIC, "found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);
1950
			}
1951
			goto object_updated;
1952
		}
1953

1954
		if (doc->crypt)
1955
			pdf_crypt_obj(ctx, doc->crypt, x->obj, num, gen);
1956
	}
1957
	else if (x->type == 'o')
1958
	{
1959
		if (!x->obj)
1960
		{
1961
			fz_try(ctx)
1962
			{
1963
				x = pdf_load_obj_stm(ctx, doc, x->ofs, 0, &doc->lexbuf.base, num);
1964
			}
1965
			fz_catch(ctx)
1966
			{
1967
				fz_rethrow_message(ctx, "cannot load object stream containing object (%d %d R)", num, gen);
1968
			}
1969
			if (x == NULL)
1970
				fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load object stream containing object (%d %d R)", num, gen);
1971
			if (!x->obj)
1972
				fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d %d R) was not found in its object stream", num, gen);
1973
		}
1974
	}
1975
	else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
1976
	{
1977
		goto object_updated;
1978
	}
1979
	else if (doc->file_length && doc->linear_pos < doc->file_length)
1980
	{
1981
		fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d %d R) - not loaded yet?", num, gen);
1982
	}
1983
	else
1984
	{
1985
		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d %d R)", num, gen);
1986
	}
1987

1988
	pdf_set_obj_parent(ctx, x->obj, num);
1989
	return x;
1990
}
1991

1992
pdf_obj *
1993
pdf_load_object(fz_context *ctx, pdf_document *doc, int num, int gen)
1994
{
1995
	pdf_xref_entry *entry;
1996

1997
	fz_try(ctx)
1998
	{
1999
		entry = pdf_cache_object(ctx, doc, num, gen);
2000
	}
2001
	fz_catch(ctx)
2002
	{
2003
		fz_rethrow_message(ctx, "cannot load object (%d %d R) into cache", num, gen);
2004
	}
2005

2006
	assert(entry->obj != NULL);
2007

2008
	return pdf_keep_obj(ctx, entry->obj);
2009
}
2010

2011
pdf_obj *
2012
pdf_resolve_indirect(fz_context *ctx, pdf_obj *ref)
2013
{
2014
	int sanity = 10;
2015
	int num;
2016
	int gen;
2017
	pdf_xref_entry *entry;
2018

2019
	while (pdf_is_indirect(ctx, ref))
2020
	{
2021
		pdf_document *doc;
2022

2023
		if (--sanity == 0)
2024
		{
2025
			fz_warn(ctx, "too many indirections (possible indirection cycle involving %d %d R)", num, gen);
2026
			return NULL;
2027
		}
2028

2029
		doc = pdf_get_indirect_document(ctx, ref);
2030
		if (!doc)
2031
			return NULL;
2032
		num = pdf_to_num(ctx, ref);
2033
		gen = pdf_to_gen(ctx, ref);
2034

2035
		if (num <= 0 || gen < 0)
2036
		{
2037
			fz_warn(ctx, "invalid indirect reference (%d %d R)", num, gen);
2038
			return NULL;
2039
		}
2040

2041
		fz_try(ctx)
2042
		{
2043
			entry = pdf_cache_object(ctx, doc, num, gen);
2044
		}
2045
		fz_catch(ctx)
2046
		{
2047
			fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2048
			fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen);
2049
			return NULL;
2050
		}
2051

2052
		if (entry->obj == NULL)
2053
			return NULL;
2054
		ref = entry->obj;
2055
	}
2056

2057
	return ref;
2058
}
2059

2060
int
2061
pdf_count_objects(fz_context *ctx, pdf_document *doc)
2062
{
2063
	return pdf_xref_len(ctx, doc);
2064
}
2065

2066
int
2067
pdf_create_object(fz_context *ctx, pdf_document *doc)
2068
{
2069
	/* TODO: reuse free object slots by properly linking free object chains in the ofs field */
2070
	pdf_xref_entry *entry;
2071
	int num = pdf_xref_len(ctx, doc);
2072
	entry = pdf_get_incremental_xref_entry(ctx, doc, num);
2073
	entry->type = 'f';
2074
	entry->ofs = -1;
2075
	entry->gen = 0;
2076
	entry->stm_ofs = 0;
2077
	entry->stm_buf = NULL;
2078
	entry->obj = NULL;
2079
	return num;
2080
}
2081

2082
void
2083
pdf_delete_object(fz_context *ctx, pdf_document *doc, int num)
2084
{
2085
	pdf_xref_entry *x;
2086

2087
	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2088
	{
2089
		fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2090
		return;
2091
	}
2092

2093
	x = pdf_get_incremental_xref_entry(ctx, doc, num);
2094

2095
	fz_drop_buffer(ctx, x->stm_buf);
2096
	pdf_drop_obj(ctx, x->obj);
2097

2098
	x->type = 'f';
2099
	x->ofs = 0;
2100
	x->gen = 0;
2101
	x->stm_ofs = 0;
2102
	x->stm_buf = NULL;
2103
	x->obj = NULL;
2104
}
2105

2106
void
2107
pdf_update_object(fz_context *ctx, pdf_document *doc, int num, pdf_obj *newobj)
2108
{
2109
	pdf_xref_entry *x;
2110

2111
	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2112
	{
2113
		fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2114
		return;
2115
	}
2116

2117
	x = pdf_get_incremental_xref_entry(ctx, doc, num);
2118

2119
	pdf_drop_obj(ctx, x->obj);
2120

2121
	x->type = 'n';
2122
	x->ofs = 0;
2123
	x->obj = pdf_keep_obj(ctx, newobj);
2124

2125
	pdf_set_obj_parent(ctx, newobj, num);
2126
}
2127

2128
void
2129
pdf_update_stream(fz_context *ctx, pdf_document *doc, pdf_obj *obj, fz_buffer *newbuf, int compressed)
2130
{
2131
	int num;
2132
	pdf_xref_entry *x;
2133

2134
	if (pdf_is_indirect(ctx, obj))
2135
		num = pdf_to_num(ctx, obj);
2136
	else
2137
		num = pdf_obj_parent_num(ctx, obj);
2138
	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
2139
	{
2140
		fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2141
		return;
2142
	}
2143

2144
	x = pdf_get_xref_entry(ctx, doc, num);
2145

2146
	fz_drop_buffer(ctx, x->stm_buf);
2147
	x->stm_buf = fz_keep_buffer(ctx, newbuf);
2148

2149
	pdf_dict_puts_drop(ctx, obj, "Length", pdf_new_int(ctx, doc, newbuf->len));
2150
	if (!compressed)
2151
	{
2152
		pdf_dict_dels(ctx, obj, "Filter");
2153
		pdf_dict_dels(ctx, obj, "DecodeParms");
2154
	}
2155
}
2156

2157
int
2158
pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *buf, int size)
2159
{
2160
	if (!strcmp(key, "format"))
2161
		return fz_snprintf(buf, size, "PDF %d.%d", doc->version/10, doc->version % 10);
2162

2163
	if (!strcmp(key, "encryption"))
2164
	{
2165
		if (doc->crypt)
2166
			return fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
2167
					pdf_crypt_version(ctx, doc),
2168
					pdf_crypt_revision(ctx, doc),
2169
					pdf_crypt_length(ctx, doc),
2170
					pdf_crypt_method(ctx, doc));
2171
		else
2172
			return fz_strlcpy(buf, "None", size);
2173
	}
2174

2175
	if (strstr(key, "info:") == key)
2176
	{
2177
		pdf_obj *info;
2178
		char *s;
2179
		int n;
2180

2181
		info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Info);
2182
		if (!info)
2183
			return -1;
2184

2185
		info = pdf_dict_gets(ctx, info, key + 5);
2186
		if (!info)
2187
			return -1;
2188

2189
		s = pdf_to_utf8(ctx, doc, info);
2190
		n = fz_strlcpy(buf, s, size);
2191
		fz_free(ctx, s);
2192
		return n;
2193
	}
2194

2195
	return -1;
2196
}
2197

2198
fz_transition *
2199
pdf_page_presentation(fz_context *ctx, pdf_page *page, float *duration)
2200
{
2201
	*duration = page->duration;
2202
	if (!page->transition_present)
2203
		return NULL;
2204
	return &page->transition;
2205
}
2206

2207
/*
2208
	Initializers for the fz_document interface.
2209

2210
	The functions are split across two files to allow calls to a
2211
	version of the constructor that does not link in the interpreter.
2212
	The interpreter references the built-in font and cmap resources
2213
	which are quite big. Not linking those into the mubusy binary
2214
	saves roughly 6MB of space.
2215
*/
2216

2217
static pdf_document *
2218
pdf_new_document(fz_context *ctx, fz_stream *file)
2219
{
2220
	pdf_document *doc = fz_malloc_struct(ctx, pdf_document);
2221

2222
	doc->super.refs = 1;
2223
	doc->super.close = (fz_document_close_fn *)pdf_close_document;
2224
	doc->super.needs_password = (fz_document_needs_password_fn *)pdf_needs_password;
2225
	doc->super.authenticate_password = (fz_document_authenticate_password_fn *)pdf_authenticate_password;
2226
	doc->super.has_permission = (fz_document_has_permission_fn *)pdf_has_permission;
2227
	doc->super.load_outline = (fz_document_load_outline_fn *)pdf_load_outline;
2228
	doc->super.count_pages = (fz_document_count_pages_fn *)pdf_count_pages;
2229
	doc->super.load_page = (fz_document_load_page_fn *)pdf_load_page;
2230
	doc->super.lookup_metadata = (fz_document_lookup_metadata_fn *)pdf_lookup_metadata;
2231
	doc->super.write = (fz_document_write_fn *)pdf_write_document;
2232
	doc->update_appearance = pdf_update_appearance;
2233

2234
	pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
2235
	doc->file = fz_keep_stream(ctx, file);
2236

2237
	return doc;
2238
}
2239

2240
pdf_document *
2241
pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
2242
{
2243
	pdf_document *doc = pdf_new_document(ctx, file);
2244
	fz_try(ctx)
2245
	{
2246
		pdf_init_document(ctx, doc);
2247
	}
2248
	fz_catch(ctx)
2249
	{
2250
		pdf_close_document(ctx, doc);
2251
		fz_rethrow_message(ctx, "cannot load document from stream");
2252
	}
2253
	return doc;
2254
}
2255

2256
pdf_document *
2257
pdf_open_document(fz_context *ctx, const char *filename)
2258
{
2259
	fz_stream *file = NULL;
2260
	pdf_document *doc = NULL;
2261

2262
	fz_var(file);
2263
	fz_var(doc);
2264

2265
	fz_try(ctx)
2266
	{
2267
		file = fz_open_file(ctx, filename);
2268
		doc = pdf_new_document(ctx, file);
2269
		pdf_init_document(ctx, doc);
2270
	}
2271
	fz_always(ctx)
2272
	{
2273
		fz_drop_stream(ctx, file);
2274
	}
2275
	fz_catch(ctx)
2276
	{
2277
		pdf_close_document(ctx, doc);
2278
		fz_rethrow_message(ctx, "cannot load document '%s'", filename);
2279
	}
2280
	return doc;
2281
}
2282

2283
static void
2284
pdf_load_hints(fz_context *ctx, pdf_document *doc, int objnum, int gennum)
2285
{
2286
	fz_stream *stream = NULL;
2287
	pdf_obj *dict;
2288

2289
	fz_var(stream);
2290
	fz_var(dict);
2291

2292
	fz_try(ctx)
2293
	{
2294
		int i, j, least_num_page_objs, page_obj_num_bits;
2295
		int least_page_len, page_len_num_bits, shared_hint_offset;
2296
		/* int least_page_offset, page_offset_num_bits; */
2297
		/* int least_content_stream_len, content_stream_len_num_bits; */
2298
		int num_shared_obj_num_bits, shared_obj_num_bits;
2299
		/* int numerator_bits, denominator_bits; */
2300
		int shared;
2301
		int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
2302
		int shared_obj_count_total;
2303
		int least_shared_group_len, shared_group_len_num_bits;
2304
		int max_object_num = pdf_xref_len(ctx, doc);
2305

2306
		stream = pdf_open_stream(ctx, doc, objnum, gennum);
2307
		dict = pdf_get_xref_entry(ctx, doc, objnum)->obj;
2308
		if (dict == NULL || !pdf_is_dict(ctx, dict))
2309
			fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint object");
2310

2311
		shared_hint_offset = pdf_to_int(ctx, pdf_dict_get(ctx, dict, PDF_NAME_S));
2312

2313
		/* Malloc the structures (use realloc to cope with the fact we
2314
		 * may try this several times before enough data is loaded) */
2315
		doc->hint_page = fz_resize_array(ctx, doc->hint_page, doc->page_count+1, sizeof(*doc->hint_page));
2316
		memset(doc->hint_page, 0, sizeof(*doc->hint_page) * (doc->page_count+1));
2317
		doc->hint_obj_offsets = fz_resize_array(ctx, doc->hint_obj_offsets, max_object_num, sizeof(*doc->hint_obj_offsets));
2318
		memset(doc->hint_obj_offsets, 0, sizeof(*doc->hint_obj_offsets) * max_object_num);
2319
		doc->hint_obj_offsets_max = max_object_num;
2320

2321
		/* Read the page object hints table: Header first */
2322
		least_num_page_objs = fz_read_bits(ctx, stream, 32);
2323
		/* The following is sometimes a lie, but we read this version,
2324
		 * as other table values are built from it. In
2325
		 * pdf_reference17.pdf, this points to 2 objects before the
2326
		 * first pages page object. */
2327
		doc->hint_page[0].offset = fz_read_bits(ctx, stream, 32);
2328
		if (doc->hint_page[0].offset > doc->hint_object_offset)
2329
			doc->hint_page[0].offset += doc->hint_object_length;
2330
		page_obj_num_bits = fz_read_bits(ctx, stream, 16);
2331
		least_page_len = fz_read_bits(ctx, stream, 32);
2332
		page_len_num_bits = fz_read_bits(ctx, stream, 16);
2333
		/* least_page_offset = */ (void) fz_read_bits(ctx, stream, 32);
2334
		/* page_offset_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
2335
		/* least_content_stream_len = */ (void) fz_read_bits(ctx, stream, 32);
2336
		/* content_stream_len_num_bits = */ (void) fz_read_bits(ctx, stream, 16);
2337
		num_shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
2338
		shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
2339
		/* numerator_bits = */ (void) fz_read_bits(ctx, stream, 16);
2340
		/* denominator_bits = */ (void) fz_read_bits(ctx, stream, 16);
2341

2342
		/* Item 1: Page object numbers */
2343
		doc->hint_page[0].number = doc->linear_page1_obj_num;
2344
		/* We don't care about the number of objects in the first page */
2345
		(void)fz_read_bits(ctx, stream, page_obj_num_bits);
2346
		j = 1;
2347
		for (i = 1; i < doc->page_count; i++)
2348
		{
2349
			int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
2350

2351
			doc->hint_page[i].number = j;
2352
			j += least_num_page_objs + delta_page_objs;
2353
		}
2354
		doc->hint_page[i].number = j; /* Not a real page object */
2355
		fz_sync_bits(ctx, stream);
2356
		/* Item 2: Page lengths */
2357
		j = doc->hint_page[0].offset;
2358
		for (i = 0; i < doc->page_count; i++)
2359
		{
2360
			int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
2361
			int old = j;
2362

2363
			doc->hint_page[i].offset = j;
2364
			j += least_page_len + delta_page_len;
2365
			if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2366
				j += doc->hint_object_length;
2367
		}
2368
		doc->hint_page[i].offset = j;
2369
		fz_sync_bits(ctx, stream);
2370
		/* Item 3: Shared references */
2371
		shared = 0;
2372
		for (i = 0; i < doc->page_count; i++)
2373
		{
2374
			int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
2375
			doc->hint_page[i].index = shared;
2376
			shared += num_shared_objs;
2377
		}
2378
		doc->hint_page[i].index = shared;
2379
		doc->hint_shared_ref = fz_resize_array(ctx, doc->hint_shared_ref, shared, sizeof(*doc->hint_shared_ref));
2380
		memset(doc->hint_shared_ref, 0, sizeof(*doc->hint_shared_ref) * shared);
2381
		fz_sync_bits(ctx, stream);
2382
		/* Item 4: Shared references */
2383
		for (i = 0; i < shared; i++)
2384
		{
2385
			int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
2386
			doc->hint_shared_ref[i] = ref;
2387
		}
2388
		/* Skip items 5,6,7 as we don't use them */
2389

2390
		fz_seek(ctx, stream, shared_hint_offset, SEEK_SET);
2391

2392
		/* Read the shared object hints table: Header first */
2393
		shared_obj_num = fz_read_bits(ctx, stream, 32);
2394
		shared_obj_offset = fz_read_bits(ctx, stream, 32);
2395
		if (shared_obj_offset > doc->hint_object_offset)
2396
			shared_obj_offset += doc->hint_object_length;
2397
		shared_obj_count_page1 = fz_read_bits(ctx, stream, 32);
2398
		shared_obj_count_total = fz_read_bits(ctx, stream, 32);
2399
		shared_obj_num_bits = fz_read_bits(ctx, stream, 16);
2400
		least_shared_group_len = fz_read_bits(ctx, stream, 32);
2401
		shared_group_len_num_bits = fz_read_bits(ctx, stream, 16);
2402

2403
		/* Sanity check the references in Item 4 above to ensure we
2404
		 * don't access out of range with malicious files. */
2405
		for (i = 0; i < shared; i++)
2406
		{
2407
			if (doc->hint_shared_ref[i] >= shared_obj_count_total)
2408
			{
2409
				fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint stream (shared refs)");
2410
			}
2411
		}
2412

2413
		doc->hint_shared = fz_resize_array(ctx, doc->hint_shared, shared_obj_count_total+1, sizeof(*doc->hint_shared));
2414
		memset(doc->hint_shared, 0, sizeof(*doc->hint_shared) * (shared_obj_count_total+1));
2415

2416
		/* Item 1: Shared references */
2417
		j = doc->hint_page[0].offset;
2418
		for (i = 0; i < shared_obj_count_page1; i++)
2419
		{
2420
			int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
2421
			int old = j;
2422
			doc->hint_shared[i].offset = j;
2423
			j += off + least_shared_group_len;
2424
			if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2425
				j += doc->hint_object_length;
2426
		}
2427
		/* FIXME: We would have problems recreating the length of the
2428
		 * last page 1 shared reference group. But we'll never need
2429
		 * to, so ignore it. */
2430
		j = shared_obj_offset;
2431
		for (; i < shared_obj_count_total; i++)
2432
		{
2433
			int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
2434
			int old = j;
2435
			doc->hint_shared[i].offset = j;
2436
			j += off + least_shared_group_len;
2437
			if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2438
				j += doc->hint_object_length;
2439
		}
2440
		doc->hint_shared[i].offset = j;
2441
		fz_sync_bits(ctx, stream);
2442
		/* Item 2: Signature flags: read these just so we can skip */
2443
		for (i = 0; i < shared_obj_count_total; i++)
2444
		{
2445
			doc->hint_shared[i].number = fz_read_bits(ctx, stream, 1);
2446
		}
2447
		fz_sync_bits(ctx, stream);
2448
		/* Item 3: Signatures: just skip */
2449
		for (i = 0; i < shared_obj_count_total; i++)
2450
		{
2451
			if (doc->hint_shared[i].number)
2452
			{
2453
				(void) fz_read_bits(ctx, stream, 128);
2454
			}
2455
		}
2456
		fz_sync_bits(ctx, stream);
2457
		/* Item 4: Shared object object numbers */
2458
		j = doc->linear_page1_obj_num; /* FIXME: This is a lie! */
2459
		for (i = 0; i < shared_obj_count_page1; i++)
2460
		{
2461
			doc->hint_shared[i].number = j;
2462
			j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
2463
		}
2464
		j = shared_obj_num;
2465
		for (; i < shared_obj_count_total; i++)
2466
		{
2467
			doc->hint_shared[i].number = j;
2468
			j += fz_read_bits(ctx, stream, shared_obj_num_bits) + 1;
2469
		}
2470
		doc->hint_shared[i].number = j;
2471

2472
		/* Now, actually use the data we have gathered. */
2473
		for (i = 0 /*shared_obj_count_page1*/; i < shared_obj_count_total; i++)
2474
		{
2475
			doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
2476
		}
2477
		for (i = 0; i < doc->page_count; i++)
2478
		{
2479
			doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
2480
		}
2481
	}
2482
	fz_always(ctx)
2483
	{
2484
		fz_drop_stream(ctx, stream);
2485
	}
2486
	fz_catch(ctx)
2487
	{
2488
		fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2489
		/* Don't try to load hints again */
2490
		doc->hints_loaded = 1;
2491
		/* We won't use the linearized object any more. */
2492
		doc->file_reading_linearly = 0;
2493
		/* Any other error becomes a TRYLATER */
2494
		fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
2495
	}
2496
	doc->hints_loaded = 1;
2497
}
2498

2499
static void
2500
pdf_load_hint_object(fz_context *ctx, pdf_document *doc)
2501
{
2502
	pdf_lexbuf *buf = &doc->lexbuf.base;
2503
	int curr_pos;
2504

2505
	curr_pos = fz_tell(ctx, doc->file);
2506
	fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
2507
	fz_try(ctx)
2508
	{
2509
		while (1)
2510
		{
2511
			pdf_obj *page = NULL;
2512
			int tmpofs, num, gen, tok;
2513

2514
			tok = pdf_lex(ctx, doc->file, buf);
2515
			if (tok != PDF_TOK_INT)
2516
				break;
2517
			num = buf->i;
2518
			tok = pdf_lex(ctx, doc->file, buf);
2519
			if (tok != PDF_TOK_INT)
2520
				break;
2521
			gen = buf->i;
2522
			tok = pdf_lex(ctx, doc->file, buf);
2523
			if (tok != PDF_TOK_OBJ)
2524
				break;
2525
			(void)pdf_repair_obj(ctx, doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs);
2526
			pdf_load_hints(ctx, doc, num, gen);
2527
		}
2528
	}
2529
	fz_always(ctx)
2530
	{
2531
		fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2532
	}
2533
	fz_catch(ctx)
2534
	{
2535
		fz_rethrow(ctx);
2536
	}
2537
}
2538

2539
pdf_obj *pdf_progressive_advance(fz_context *ctx, pdf_document *doc, int pagenum)
2540
{
2541
	pdf_lexbuf *buf = &doc->lexbuf.base;
2542
	int curr_pos;
2543
	pdf_obj *page;
2544

2545
	pdf_load_hinted_page(ctx, doc, pagenum);
2546

2547
	if (pagenum < 0 || pagenum >= doc->page_count)
2548
		fz_throw(ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->page_count);
2549

2550
	if (doc->linear_pos == doc->file_length)
2551
		return doc->linear_page_refs[pagenum];
2552

2553
	/* Only load hints once, and then only after we have got page 0 */
2554
	if (pagenum > 0 && !doc->hints_loaded && doc->hint_object_offset > 0 && doc->linear_pos >= doc->hint_object_offset)
2555
	{
2556
		/* Found hint object */
2557
		pdf_load_hint_object(ctx, doc);
2558
	}
2559

2560
	DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
2561
	curr_pos = fz_tell(ctx, doc->file);
2562

2563
	fz_var(page);
2564

2565
	fz_try(ctx)
2566
	{
2567
		int eof;
2568
		do
2569
		{
2570
			int num;
2571
			page = NULL;
2572
			eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
2573
			pdf_drop_obj(ctx, page);
2574
			page = NULL;
2575
		}
2576
		while (!eof);
2577

2578
		{
2579
			pdf_obj *catalog;
2580
			pdf_obj *pages;
2581
			doc->linear_pos = doc->file_length;
2582
			pdf_load_xref(ctx, doc, buf);
2583
			catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root);
2584
			pages = pdf_dict_get(ctx, catalog, PDF_NAME_Pages);
2585

2586
			if (!pdf_is_dict(ctx, pages))
2587
				fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree");
2588
			break;
2589
		}
2590
	}
2591
	fz_always(ctx)
2592
	{
2593
		fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2594
	}
2595
	fz_catch(ctx)
2596
	{
2597
		pdf_drop_obj(ctx, page);
2598
		if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
2599
		{
2600
			if (doc->linear_page_refs[pagenum] == NULL)
2601
			{
2602
				/* Still not got a page */
2603
				fz_rethrow(ctx);
2604
			}
2605
		}
2606
		else
2607
			fz_rethrow(ctx);
2608
	}
2609

2610
	return doc->linear_page_refs[pagenum];
2611
}
2612

2613
pdf_document *pdf_specifics(fz_context *ctx, fz_document *doc)
2614
{
2615
	return (pdf_document *)((doc && doc->close == (fz_document_close_fn *)pdf_close_document) ? doc : NULL);
2616
}
2617

2618
pdf_document *pdf_create_document(fz_context *ctx)
2619
{
2620
	pdf_document *doc;
2621
	pdf_obj *o = NULL;
2622
	pdf_obj *root;
2623
	pdf_obj *pages;
2624
	pdf_obj *trailer = NULL;
2625

2626
	fz_var(o);
2627
	fz_var(trailer);
2628

2629
	doc = pdf_new_document(ctx, NULL);
2630
	fz_try(ctx)
2631
	{
2632
		doc->version = 14;
2633
		doc->file_size = 0;
2634
		doc->startxref = 0;
2635
		doc->num_xref_sections = 0;
2636
		pdf_get_populating_xref_entry(ctx, doc, 0);
2637
		doc->xref_altered = 1;
2638
		trailer = pdf_new_dict(ctx, doc, 2);
2639
		pdf_dict_put_drop(ctx, trailer, PDF_NAME_Size, pdf_new_int(ctx, doc, 3));
2640
		o = root = pdf_new_dict(ctx, doc, 2);
2641
		pdf_dict_put_drop(ctx, trailer, PDF_NAME_Root, pdf_new_ref(ctx, doc, o));
2642
		pdf_drop_obj(ctx, o);
2643
		o = NULL;
2644
		pdf_dict_put_drop(ctx, root, PDF_NAME_Type, PDF_NAME_Catalog);
2645
		o = pages = pdf_new_dict(ctx, doc, 3);
2646
		pdf_dict_put_drop(ctx, root, PDF_NAME_Pages, pdf_new_ref(ctx, doc, o));
2647
		pdf_drop_obj(ctx, o);
2648
		o = NULL;
2649
		pdf_dict_put_drop(ctx, pages, PDF_NAME_Type, PDF_NAME_Pages);
2650
		pdf_dict_put_drop(ctx, pages, PDF_NAME_Count, pdf_new_int(ctx, doc, 0));
2651
		pdf_dict_put_drop(ctx, pages, PDF_NAME_Kids, pdf_new_array(ctx, doc, 1));
2652
		pdf_set_populating_xref_trailer(ctx, doc, trailer);
2653
		pdf_drop_obj(ctx, trailer);
2654
	}
2655
	fz_catch(ctx)
2656
	{
2657
		pdf_drop_obj(ctx, trailer);
2658
		pdf_drop_obj(ctx, o);
2659
		fz_rethrow_message(ctx, "Failed to create empty document");
2660
	}
2661
	return doc;
2662
}
2663

2664
int
2665
pdf_recognize(fz_context *doc, const char *magic)
2666
{
2667
	char *ext = strrchr(magic, '.');
2668

2669
	if (ext)
2670
	{
2671
		if (!fz_strcasecmp(ext, ".pdf"))
2672
			return 100;
2673
	}
2674
	if (!strcmp(magic, "pdf") || !strcmp(magic, "application/pdf"))
2675
		return 100;
2676

2677
	return 1;
2678
}
2679

2680
fz_document_handler pdf_document_handler =
2681
{
2682
	(fz_document_recognize_fn *)&pdf_recognize,
2683
	(fz_document_open_fn *)&pdf_open_document,
2684
	(fz_document_open_with_stream_fn *)&pdf_open_document_with_stream
2685
};
2686

2687
void pdf_mark_xref(fz_context *ctx, pdf_document *doc)
2688
{
2689
	int x, e;
2690

2691
	for (x = 0; x < doc->num_xref_sections; x++)
2692
	{
2693
		pdf_xref *xref = &doc->xref_sections[x];
2694
		pdf_xref_subsec *sub;
2695

2696
		for (sub = xref->subsec; sub != NULL; sub = sub->next)
2697
		{
2698
			for (e = 0; e < sub->len; e++)
2699
			{
2700
				pdf_xref_entry *entry = &sub->table[e];
2701
				if (entry->obj)
2702
				{
2703
					entry->flags |= PDF_OBJ_FLAG_MARK;
2704
				}
2705
			}
2706
		}
2707
	}
2708
}
2709

2710
void pdf_clear_xref(fz_context *ctx, pdf_document *doc)
2711
{
2712
	int x, e;
2713

2714
	for (x = 0; x < doc->num_xref_sections; x++)
2715
	{
2716
		pdf_xref *xref = &doc->xref_sections[x];
2717
		pdf_xref_subsec *sub;
2718

2719
		for (sub = xref->subsec; sub != NULL; sub = sub->next)
2720
		{
2721
			for (e = 0; e < sub->len; e++)
2722
			{
2723
				pdf_xref_entry *entry = &sub->table[e];
2724
				/* We cannot drop objects if the stream
2725
				 * buffer has been updated */
2726
				if (entry->obj != NULL && entry->stm_buf == NULL)
2727
				{
2728
					if (pdf_obj_refs(ctx, entry->obj) == 1)
2729
					{
2730
						pdf_drop_obj(ctx, entry->obj);
2731
						entry->obj = NULL;
2732
					}
2733
				}
2734
			}
2735
		}
2736
	}
2737
}
2738

2739
void pdf_clear_xref_to_mark(fz_context *ctx, pdf_document *doc)
2740
{
2741
	int x, e;
2742

2743
	for (x = 0; x < doc->num_xref_sections; x++)
2744
	{
2745
		pdf_xref *xref = &doc->xref_sections[x];
2746
		pdf_xref_subsec *sub;
2747

2748
		for (sub = xref->subsec; sub != NULL; sub = sub->next)
2749
		{
2750
			for (e = 0; e < sub->len; e++)
2751
			{
2752
				pdf_xref_entry *entry = &sub->table[e];
2753

2754
				/* We cannot drop objects if the stream buffer has
2755
				 * been updated */
2756
				if (entry->obj != NULL && entry->stm_buf == NULL)
2757
				{
2758
					if ((entry->flags & PDF_OBJ_FLAG_MARK) == 0 && pdf_obj_refs(ctx, entry->obj) == 1)
2759
					{
2760
						pdf_drop_obj(ctx, entry->obj);
2761
						entry->obj = NULL;
2762
					}
2763
				}
2764
			}
2765
		}
2766
	}
2767
}
2768

2769
Product

Resources

Company