Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
7639 views
1
#include "mupdf/html.h"
2
3
enum { T, R, B, L };
4
5
typedef struct epub_document_s epub_document;
6
typedef struct epub_chapter_s epub_chapter;
7
typedef struct epub_page_s epub_page;
8
9
struct epub_document_s
10
{
11
fz_document super;
12
fz_archive *zip;
13
fz_html_font_set *set;
14
float page_w, page_h, em;
15
float page_margin[4];
16
int count;
17
epub_chapter *spine;
18
};
19
20
struct epub_chapter_s
21
{
22
int start;
23
fz_html *box;
24
epub_chapter *next;
25
};
26
27
struct epub_page_s
28
{
29
fz_page super;
30
epub_document *doc;
31
int number;
32
};
33
34
static void
35
epub_layout(fz_context *ctx, fz_document *doc_, float w, float h, float em)
36
{
37
epub_document *doc = (epub_document*)doc_;
38
epub_chapter *ch;
39
40
doc->page_margin[T] = em;
41
doc->page_margin[B] = em;
42
doc->page_margin[L] = 0;
43
doc->page_margin[R] = 0;
44
45
doc->page_w = w - doc->page_margin[L] - doc->page_margin[R];
46
doc->page_h = h - doc->page_margin[T] - doc->page_margin[B];
47
doc->em = em;
48
49
printf("epub: laying out chapters.\n");
50
for (ch = doc->spine; ch; ch = ch->next)
51
fz_layout_html(ctx, ch->box, doc->page_w, doc->page_h, doc->em);
52
printf("epub: done.\n");
53
}
54
55
static int
56
epub_count_pages(fz_context *ctx, fz_document *doc_)
57
{
58
epub_document *doc = (epub_document*)doc_;
59
epub_chapter *ch;
60
int count = 0;
61
for (ch = doc->spine; ch; ch = ch->next)
62
count += ceilf(ch->box->h / doc->page_h);
63
return count;
64
}
65
66
static void
67
epub_drop_page_imp(fz_context *ctx, fz_page *page_)
68
{
69
}
70
71
static fz_rect *
72
epub_bound_page(fz_context *ctx, fz_page *page_, fz_rect *bbox)
73
{
74
epub_page *page = (epub_page*)page_;
75
epub_document *doc = page->doc;
76
bbox->x0 = 0;
77
bbox->y0 = 0;
78
bbox->x1 = doc->page_w + doc->page_margin[L] + doc->page_margin[R];
79
bbox->y1 = doc->page_h + doc->page_margin[T] + doc->page_margin[B];
80
return bbox;
81
}
82
83
static void
84
epub_run_page(fz_context *ctx, fz_page *page_, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie)
85
{
86
epub_page *page = (epub_page*)page_;
87
epub_document *doc = page->doc;
88
epub_chapter *ch;
89
fz_matrix local_ctm = *ctm;
90
int n = page->number;
91
int count = 0;
92
93
fz_pre_translate(&local_ctm, doc->page_margin[L], doc->page_margin[T]);
94
95
for (ch = doc->spine; ch; ch = ch->next)
96
{
97
int cn = ceilf(ch->box->h / doc->page_h);
98
if (n < count + cn)
99
{
100
fz_draw_html(ctx, ch->box, (n-count) * doc->page_h, (n-count+1) * doc->page_h, dev, &local_ctm);
101
break;
102
}
103
count += cn;
104
}
105
}
106
107
static fz_page *
108
epub_load_page(fz_context *ctx, fz_document *doc_, int number)
109
{
110
epub_document *doc = (epub_document*)doc_;
111
epub_page *page = fz_new_page(ctx, sizeof *page);
112
page->super.bound_page = epub_bound_page;
113
page->super.run_page_contents = epub_run_page;
114
page->super.drop_page_imp = epub_drop_page_imp;
115
page->doc = doc;
116
page->number = number;
117
return (fz_page*)page;
118
}
119
120
static void
121
epub_close_document(fz_context *ctx, fz_document *doc_)
122
{
123
epub_document *doc = (epub_document*)doc_;
124
epub_chapter *ch, *next;
125
ch = doc->spine;
126
while (ch)
127
{
128
next = ch->next;
129
fz_drop_html(ctx, ch->box);
130
fz_free(ctx, ch);
131
ch = next;
132
}
133
fz_drop_archive(ctx, doc->zip);
134
fz_drop_html_font_set(ctx, doc->set);
135
fz_free(ctx, doc);
136
}
137
138
static const char *
139
rel_path_from_idref(fz_xml *manifest, const char *idref)
140
{
141
fz_xml *item;
142
if (!idref)
143
return NULL;
144
item = fz_xml_find_down(manifest, "item");
145
while (item)
146
{
147
const char *id = fz_xml_att(item, "id");
148
if (id && !strcmp(id, idref))
149
return fz_xml_att(item, "href");
150
item = fz_xml_find_next(item, "item");
151
}
152
return NULL;
153
}
154
155
static const char *
156
path_from_idref(char *path, fz_xml *manifest, const char *base_uri, const char *idref, int n)
157
{
158
const char *rel_path = rel_path_from_idref(manifest, idref);
159
if (!rel_path)
160
{
161
path[0] = 0;
162
return NULL;
163
}
164
fz_strlcpy(path, base_uri, n);
165
fz_strlcat(path, "/", n);
166
fz_strlcat(path, rel_path, n);
167
return fz_cleanname(path);
168
}
169
170
static epub_chapter *
171
epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path)
172
{
173
fz_archive *zip = doc->zip;
174
fz_buffer *buf;
175
epub_chapter *ch;
176
char base_uri[2048];
177
178
fz_dirname(base_uri, path, sizeof base_uri);
179
180
buf = fz_read_archive_entry(ctx, zip, path);
181
fz_write_buffer_byte(ctx, buf, 0);
182
183
ch = fz_malloc_struct(ctx, epub_chapter);
184
ch->box = fz_parse_html(ctx, doc->set, zip, base_uri, buf, NULL);
185
ch->next = NULL;
186
187
fz_drop_buffer(ctx, buf);
188
189
return ch;
190
}
191
192
static void
193
epub_parse_header(fz_context *ctx, epub_document *doc)
194
{
195
fz_archive *zip = doc->zip;
196
fz_buffer *buf;
197
fz_xml *container_xml, *content_opf;
198
fz_xml *container, *rootfiles, *rootfile;
199
fz_xml *package, *manifest, *spine, *itemref;
200
char base_uri[2048];
201
const char *full_path;
202
const char *version;
203
char ncx[2048], s[2048];
204
epub_chapter *head, *tail;
205
206
/* parse META-INF/container.xml to find OPF */
207
208
buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml");
209
fz_write_buffer_byte(ctx, buf, 0);
210
container_xml = fz_parse_xml(ctx, buf->data, buf->len, 0);
211
fz_drop_buffer(ctx, buf);
212
213
container = fz_xml_find(container_xml, "container");
214
rootfiles = fz_xml_find_down(container, "rootfiles");
215
rootfile = fz_xml_find_down(rootfiles, "rootfile");
216
full_path = fz_xml_att(rootfile, "full-path");
217
if (!full_path)
218
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB");
219
220
printf("epub: found root: %s\n", full_path);
221
222
fz_dirname(base_uri, full_path, sizeof base_uri);
223
224
/* parse OPF to find NCX and spine */
225
226
buf = fz_read_archive_entry(ctx, zip, full_path);
227
fz_write_buffer_byte(ctx, buf, 0);
228
content_opf = fz_parse_xml(ctx, buf->data, buf->len, 0);
229
fz_drop_buffer(ctx, buf);
230
231
package = fz_xml_find(content_opf, "package");
232
version = fz_xml_att(package, "version");
233
if (!version || strcmp(version, "2.0"))
234
fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>");
235
236
manifest = fz_xml_find_down(package, "manifest");
237
spine = fz_xml_find_down(package, "spine");
238
239
if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx))
240
{
241
/* TODO: parse NCX to create fz_outline */
242
printf("epub: found outline: %s\n", ncx);
243
}
244
245
head = tail = NULL;
246
itemref = fz_xml_find_down(spine, "itemref");
247
while (itemref)
248
{
249
if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s))
250
{
251
printf("epub: found spine %s\n", s);
252
if (!head)
253
head = tail = epub_parse_chapter(ctx, doc, s);
254
else
255
tail = tail->next = epub_parse_chapter(ctx, doc, s);
256
}
257
itemref = fz_xml_find_next(itemref, "itemref");
258
}
259
260
doc->spine = head;
261
262
printf("epub: done.\n");
263
264
fz_drop_xml(ctx, container_xml);
265
fz_drop_xml(ctx, content_opf);
266
}
267
268
static fz_document *
269
epub_init(fz_context *ctx, fz_archive *zip)
270
{
271
epub_document *doc;
272
273
doc = fz_malloc_struct(ctx, epub_document);
274
doc->zip = zip;
275
doc->set = fz_new_html_font_set(ctx);
276
277
doc->super.close = epub_close_document;
278
doc->super.layout = epub_layout;
279
doc->super.count_pages = epub_count_pages;
280
doc->super.load_page = epub_load_page;
281
282
fz_try(ctx)
283
{
284
epub_parse_header(ctx, doc);
285
}
286
fz_catch(ctx)
287
{
288
epub_close_document(ctx, (fz_document*)doc);
289
fz_rethrow(ctx);
290
}
291
292
return (fz_document*)doc;
293
}
294
295
static fz_document *
296
epub_open_document_with_stream(fz_context *ctx, fz_stream *file)
297
{
298
return epub_init(ctx, fz_open_archive_with_stream(ctx, file));
299
}
300
301
static fz_document *
302
epub_open_document(fz_context *ctx, const char *filename)
303
{
304
if (strstr(filename, "META-INF/container.xml") || strstr(filename, "META-INF\\container.xml"))
305
{
306
char dirname[2048], *p;
307
fz_strlcpy(dirname, filename, sizeof dirname);
308
p = strstr(dirname, "META-INF");
309
*p = 0;
310
if (!dirname[0])
311
fz_strlcpy(dirname, ".", sizeof dirname);
312
return epub_init(ctx, fz_open_directory(ctx, dirname));
313
}
314
315
return epub_init(ctx, fz_open_archive(ctx, filename));
316
}
317
318
static int
319
epub_recognize(fz_context *doc, const char *magic)
320
{
321
char *ext = strrchr(magic, '.');
322
if (ext)
323
if (!fz_strcasecmp(ext, ".epub"))
324
return 100;
325
if (strstr(magic, "META-INF/container.xml") || strstr(magic, "META-INF\\container.xml"))
326
return 200;
327
if (!strcmp(magic, "application/epub+zip"))
328
return 100;
329
return 0;
330
}
331
332
fz_document_handler epub_document_handler =
333
{
334
&epub_recognize,
335
&epub_open_document,
336
&epub_open_document_with_stream
337
};
338
339