Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/format/pdf/format-pdf.ts
6451 views
1
/*
2
* format-pdf.ts
3
*
4
* Copyright (C) 2020-2022 Posit Software, PBC
5
*/
6
7
import { basename, extname, join } from "../../deno_ral/path.ts";
8
9
import { mergeConfigs } from "../../core/config.ts";
10
import { texSafeFilename } from "../../core/tex.ts";
11
12
import {
13
kBibliography,
14
kCapBottom,
15
kCapLoc,
16
kCapTop,
17
kCitationLocation,
18
kCiteMethod,
19
kClassOption,
20
kDefaultImageExtension,
21
kDocumentClass,
22
kEcho,
23
kFigCapLoc,
24
kFigDpi,
25
kFigFormat,
26
kFigHeight,
27
kFigWidth,
28
kHeaderIncludes,
29
kKeepTex,
30
kLang,
31
kNumberSections,
32
kPaperSize,
33
kPdfEngine,
34
kPdfStandard,
35
kPdfStandardApplied,
36
kReferenceLocation,
37
kShiftHeadingLevelBy,
38
kTblCapLoc,
39
kTopLevelDivision,
40
kWarning,
41
} from "../../config/constants.ts";
42
import { warning } from "../../deno_ral/log.ts";
43
import { asArray } from "../../core/array.ts";
44
import { Format, FormatExtras, PandocFlags } from "../../config/types.ts";
45
46
import { createFormat } from "../formats-shared.ts";
47
48
import { RenderedFile, RenderServices } from "../../command/render/types.ts";
49
import { ProjectConfig, ProjectContext } from "../../project/types.ts";
50
import { BookExtension } from "../../project/types/book/book-shared.ts";
51
52
import { readLines } from "io/read-lines";
53
import { TempContext } from "../../core/temp.ts";
54
import { isLatexPdfEngine, pdfEngine } from "../../config/pdf.ts";
55
import { formatResourcePath } from "../../core/resources.ts";
56
import { kTemplatePartials } from "../../command/render/template.ts";
57
import { copyTo } from "../../core/copy.ts";
58
import { kCodeAnnotations } from "../html/format-html-shared.ts";
59
import { safeModeFromFile } from "../../deno_ral/fs.ts";
60
import { hasLevelOneHeadings as hasL1Headings } from "../../core/lib/markdown-analysis/level-one-headings.ts";
61
62
export function pdfFormat(): Format {
63
return mergeConfigs(
64
createPdfFormat("PDF"),
65
{
66
extensions: {
67
book: pdfBookExtension,
68
},
69
},
70
);
71
}
72
73
export function beamerFormat(): Format {
74
return createFormat(
75
"Beamer",
76
"pdf",
77
createPdfFormat("Beamer", false, false),
78
{
79
execute: {
80
[kFigWidth]: 10,
81
[kFigHeight]: 7,
82
[kEcho]: false,
83
[kWarning]: false,
84
},
85
classoption: ["notheorems"],
86
},
87
);
88
}
89
90
export function latexFormat(displayName: string): Format {
91
return createFormat(
92
displayName,
93
"tex",
94
mergeConfigs(
95
createPdfFormat(displayName),
96
{
97
extensions: {
98
book: {
99
onSingleFilePreRender: (
100
format: Format,
101
_config?: ProjectConfig,
102
) => {
103
// If we're targeting LaTeX output, be sure to keep
104
// the supporting files around (since we're not building
105
// them into a PDF)
106
format.render[kKeepTex] = true;
107
return format;
108
},
109
formatOutputDirectory: () => {
110
return "book-latex";
111
},
112
},
113
},
114
},
115
),
116
);
117
}
118
119
function createPdfFormat(
120
displayName: string,
121
autoShiftHeadings = true,
122
koma = true,
123
): Format {
124
return createFormat(
125
displayName,
126
"pdf",
127
{
128
execute: {
129
[kFigWidth]: 5.5,
130
[kFigHeight]: 3.5,
131
[kFigFormat]: "pdf",
132
[kFigDpi]: 300,
133
},
134
pandoc: {
135
[kPdfEngine]: "lualatex",
136
standalone: true,
137
variables: {
138
graphics: true,
139
tables: true,
140
},
141
[kDefaultImageExtension]: "pdf",
142
},
143
metadata: {
144
["block-headings"]: true,
145
},
146
formatExtras: async (
147
_input: string,
148
markdown: string,
149
flags: PandocFlags,
150
format: Format,
151
_libDir: string,
152
services: RenderServices,
153
) => {
154
const extras: FormatExtras = {};
155
156
// only apply extras if this is latex (as opposed to context)
157
const engine = pdfEngine(format.pandoc, format.render, flags);
158
if (!isLatexPdfEngine(engine)) {
159
return extras;
160
}
161
162
// Post processed for dealing with latex output
163
extras.postprocessors = [
164
pdfLatexPostProcessor(flags, format, services.temp),
165
];
166
167
// user may have overridden koma, check for that here
168
const documentclass = format.metadata[kDocumentClass] as
169
| string
170
| undefined;
171
172
const usingCustomTemplates = format.pandoc.template !== undefined ||
173
format.metadata[kTemplatePartials] !== undefined;
174
175
if (
176
usingCustomTemplates ||
177
(documentclass &&
178
![
179
"srcbook",
180
"scrreprt",
181
"scrreport",
182
"scrartcl",
183
"scrarticle",
184
].includes(
185
documentclass,
186
))
187
) {
188
koma = false;
189
}
190
191
// default to KOMA article class. we do this here rather than
192
// above so that projectExtras can override us
193
if (koma) {
194
// determine caption options
195
const captionOptions = [];
196
const tblCaploc = tblCapLocation(format);
197
captionOptions.push(
198
tblCaploc === kCapTop ? "tableheading" : "tablesignature",
199
);
200
if (figCapLocation(format) === kCapTop) {
201
captionOptions.push("figureheading");
202
}
203
204
// establish default class options
205
const defaultClassOptions = ["DIV=11"];
206
if (format.metadata[kLang] !== "de") {
207
defaultClassOptions.push("numbers=noendperiod");
208
}
209
210
// determine class options (filter by options already set by the user)
211
const userClassOptions = format.metadata[kClassOption] as
212
| string[]
213
| undefined;
214
const classOptions = defaultClassOptions.filter((option) => {
215
if (Array.isArray(userClassOptions)) {
216
const name = option.split("=")[0];
217
return !userClassOptions.some((userOption) =>
218
String(userOption).startsWith(name + "=")
219
);
220
} else {
221
return true;
222
}
223
});
224
225
const headerIncludes = [];
226
headerIncludes.push(
227
"\\KOMAoption{captions}{" + captionOptions.join(",") + "}",
228
);
229
230
extras.metadata = {
231
[kDocumentClass]: "scrartcl",
232
[kClassOption]: classOptions,
233
[kPaperSize]: "letter",
234
[kHeaderIncludes]: headerIncludes,
235
};
236
}
237
238
// Provide a custom template for this format
239
// Partials can be the one from Quarto division
240
const partialNamesQuarto: string[] = [
241
"babel-lang",
242
"before-bib",
243
"biblio",
244
"biblio-config",
245
"citations",
246
"doc-class",
247
"graphics",
248
"after-body",
249
"before-body",
250
"pandoc",
251
"tables",
252
"tightlist",
253
"before-title",
254
"title",
255
"toc",
256
];
257
// or the one from Pandoc division (since Pandoc 3.6.3)
258
const partialNamesPandoc: string[] = [
259
"after-header-includes",
260
"common",
261
"document-metadata",
262
"font-settings",
263
"fonts",
264
"hypersetup",
265
"passoptions",
266
];
267
268
const createTemplateContext = function (
269
to: string,
270
partialNamesQuarto: string[],
271
partialNamesPandoc: string[],
272
) {
273
return {
274
template: formatResourcePath(to, "pandoc/template.tex"),
275
partials: [
276
...partialNamesQuarto.map((name) => {
277
return formatResourcePath(to, `pandoc/${name}.tex`);
278
}),
279
...partialNamesPandoc.map((name) => {
280
return formatResourcePath(to, `pandoc/${name}.latex`);
281
}),
282
],
283
};
284
};
285
// Beamer doesn't use document-metadata partial (its template doesn't include it)
286
const beamerPartialNamesPandoc = partialNamesPandoc.filter(
287
(name) => name !== "document-metadata",
288
);
289
extras.templateContext = createTemplateContext(
290
displayName === "Beamer" ? "beamer" : "pdf",
291
partialNamesQuarto,
292
displayName === "Beamer"
293
? beamerPartialNamesPandoc
294
: partialNamesPandoc,
295
);
296
297
// Don't shift the headings if we see any H1s (we can't shift up any longer)
298
const hasLevelOneHeadings = await hasL1Headings(markdown);
299
300
// pdfs with no other heading level oriented options get their heading level shifted by -1
301
if (
302
!hasLevelOneHeadings &&
303
autoShiftHeadings &&
304
(flags?.[kNumberSections] === true ||
305
format.pandoc[kNumberSections] === true) &&
306
flags?.[kTopLevelDivision] === undefined &&
307
format.pandoc?.[kTopLevelDivision] === undefined &&
308
flags?.[kShiftHeadingLevelBy] === undefined &&
309
format.pandoc?.[kShiftHeadingLevelBy] === undefined
310
) {
311
extras.pandoc = {
312
[kShiftHeadingLevelBy]: -1,
313
};
314
}
315
316
// pdfs with document class scrbook get number sections turned on
317
// https://github.com/quarto-dev/quarto-cli/issues/2369
318
extras.pandoc = extras.pandoc || {};
319
if (
320
documentclass === "scrbook" &&
321
format.pandoc[kNumberSections] !== false &&
322
flags[kNumberSections] !== false
323
) {
324
extras.pandoc[kNumberSections] = true;
325
}
326
327
// Handle pdf-standard option for PDF/A, PDF/UA, PDF/X conformance
328
const pdfStandard = asArray(
329
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard],
330
);
331
if (pdfStandard.length > 0) {
332
const { version, standards, needsTagging } =
333
normalizePdfStandardForLatex(pdfStandard);
334
// Set pdfstandard as a map if there are standards or a version
335
if (standards.length > 0 || version) {
336
extras.pandoc.variables = extras.pandoc.variables || {};
337
const pdfstandardMap: Record<string, unknown> = {};
338
if (standards.length > 0) {
339
pdfstandardMap.standards = standards;
340
}
341
if (version) {
342
pdfstandardMap.version = version;
343
}
344
if (needsTagging) {
345
pdfstandardMap.tagging = true;
346
}
347
extras.pandoc.variables["pdfstandard"] = pdfstandardMap;
348
}
349
// Store applied standards in metadata for verapdf validation
350
// (only standards that LaTeX actually supports, not the original list)
351
if (standards.length > 0) {
352
extras.metadata = extras.metadata || {};
353
extras.metadata[kPdfStandardApplied] = standards;
354
}
355
}
356
357
return extras;
358
},
359
},
360
);
361
}
362
363
const pdfBookExtension: BookExtension = {
364
selfContainedOutput: true,
365
onSingleFilePostRender: (
366
project: ProjectContext,
367
renderedFile: RenderedFile,
368
) => {
369
// if we have keep-tex then rename the input tex file to match the final output
370
// file (but make sure it has a tex-friendly filename)
371
if (renderedFile.format.render[kKeepTex]) {
372
const finalOutputFile = renderedFile.file!;
373
const texOutputFile =
374
texSafeFilename(basename(finalOutputFile, extname(finalOutputFile))) +
375
".tex";
376
Deno.renameSync(
377
join(project.dir, "index.tex"),
378
join(project.dir, texOutputFile),
379
);
380
}
381
},
382
};
383
type LineProcessor = (line: string) => string | undefined;
384
385
function pdfLatexPostProcessor(
386
flags: PandocFlags,
387
format: Format,
388
temp: TempContext,
389
) {
390
return async (output: string) => {
391
const lineProcessors: LineProcessor[] = [
392
sidecaptionLineProcessor(),
393
calloutFloatHoldLineProcessor(),
394
tableColumnMarginLineProcessor(),
395
guidsProcessor(),
396
];
397
398
if (format.pandoc[kCiteMethod] === "biblatex") {
399
lineProcessors.push(bibLatexBibligraphyRefsDivProcessor());
400
} else if (format.pandoc[kCiteMethod] === "natbib") {
401
lineProcessors.push(
402
natbibBibligraphyRefsDivProcessor(
403
format.metadata[kBibliography] as string[] | undefined,
404
),
405
);
406
}
407
408
const marginCites = format.metadata[kCitationLocation] === "margin";
409
const renderedCites = {};
410
if (marginCites) {
411
// Based upon the cite method, post process the file to
412
// process unresolved citations
413
if (format.pandoc[kCiteMethod] === "biblatex") {
414
lineProcessors.push(suppressBibLatexBibliographyLineProcessor());
415
lineProcessors.push(bibLatexCiteLineProcessor());
416
} else if (format.pandoc[kCiteMethod] === "natbib") {
417
lineProcessors.push(suppressNatbibBibliographyLineProcessor());
418
lineProcessors.push(natbibCiteLineProcessor());
419
} else {
420
// If this is using the pandoc default citeproc, we need to
421
// do a more complex processing, since it is generating raw latex
422
// for the citations (not running a tool in the pdf chain to
423
// generate the bibliography). As a result, we first read the
424
// rendered bibliography, indexing the entring and removing it
425
// from the latex, then we run a second pass where we use that index
426
// to replace cites with the rendered versions.
427
lineProcessors.push(
428
indexAndSuppressPandocBibliography(renderedCites),
429
cleanReferencesChapter(),
430
);
431
}
432
}
433
434
// Move longtable captions below if requested
435
if (tblCapLocation(format) === kCapBottom) {
436
lineProcessors.push(longtableBottomCaptionProcessor());
437
}
438
439
// If enabled, switch to sidenote footnotes
440
if (marginRefs(flags, format)) {
441
// Replace notes with side notes
442
lineProcessors.push(sideNoteLineProcessor());
443
}
444
lineProcessors.push(captionFootnoteLineProcessor());
445
446
if (
447
format.metadata[kCodeAnnotations] as boolean !== false &&
448
format.metadata[kCodeAnnotations] as string !== "none"
449
) {
450
lineProcessors.push(codeAnnotationPostProcessor());
451
lineProcessors.push(codeListAnnotationPostProcessor());
452
}
453
454
lineProcessors.push(tableSidenoteProcessor());
455
456
// This is pass 1
457
await processLines(output, lineProcessors, temp);
458
459
// This is pass 2; we need these to happen after the first pass
460
const pass2Processors: LineProcessor[] = [
461
longTableSidenoteProcessor(),
462
];
463
if (Object.keys(renderedCites).length > 0) {
464
pass2Processors.push(placePandocBibliographyEntries(renderedCites));
465
}
466
await processLines(output, pass2Processors, temp);
467
};
468
}
469
470
function tblCapLocation(format: Format) {
471
return format.metadata[kTblCapLoc] || format.metadata[kCapLoc] || kCapTop;
472
}
473
474
function figCapLocation(format: Format) {
475
return format.metadata[kFigCapLoc] || format.metadata[kCapLoc] || kCapBottom;
476
}
477
478
function marginRefs(flags: PandocFlags, format: Format) {
479
return format.pandoc[kReferenceLocation] === "margin" ||
480
flags[kReferenceLocation] === "margin";
481
}
482
483
// Processes the lines of an input file, processing each line
484
// and replacing the input file with the processed output file
485
async function processLines(
486
inputFile: string,
487
lineProcessors: LineProcessor[],
488
temp: TempContext,
489
) {
490
// The temp file we generate into
491
const outputFile = temp.createFile({ suffix: ".tex" });
492
const file = await Deno.open(inputFile);
493
// Preserve the existing permissions as we'll replace
494
const mode = safeModeFromFile(inputFile);
495
try {
496
for await (const line of readLines(file)) {
497
let processedLine: string | undefined = line;
498
// Give each processor a shot at the line
499
for (const processor of lineProcessors) {
500
if (processedLine !== undefined) {
501
processedLine = processor(processedLine);
502
}
503
}
504
505
// skip lines that a processor has 'eaten'
506
if (processedLine !== undefined) {
507
Deno.writeTextFileSync(outputFile, processedLine + "\n", {
508
append: true,
509
mode,
510
});
511
}
512
}
513
} finally {
514
file.close();
515
516
// Always overwrite the input file with an incompletely processed file
517
// which should make debugging the error easier (I hope)
518
copyTo(outputFile, inputFile);
519
}
520
}
521
522
const kBeginScanRegex = /^%quartopost-sidecaption-206BE349/;
523
const kEndScanRegex = /^%\/quartopost-sidecaption-206BE349/;
524
525
const sidecaptionLineProcessor = () => {
526
let state: "scanning" | "replacing" = "scanning";
527
return (line: string): string | undefined => {
528
switch (state) {
529
case "scanning":
530
if (line.match(kBeginScanRegex)) {
531
state = "replacing";
532
return kbeginLongTablesideCap;
533
} else {
534
return line;
535
}
536
537
case "replacing":
538
if (line.match(kEndScanRegex)) {
539
state = "scanning";
540
return kEndLongTableSideCap;
541
} else {
542
return line;
543
}
544
}
545
};
546
};
547
548
// Reads the first command encountered as a balanced command
549
// (e.g. \caption{...} or \footnote{...}) and returns
550
// the complete command
551
//
552
// This expects the latex string to start with the command
553
const readBalancedCommand = (latex: string) => {
554
let braceCount = 0;
555
let entered = false;
556
const chars: string[] = [];
557
for (let i = 0; i < latex.length; i++) {
558
const char = latex.charAt(i);
559
if (char === "{") {
560
braceCount++;
561
entered = true;
562
} else if (char === "}") {
563
braceCount--;
564
}
565
566
chars.push(char);
567
if (entered && braceCount === 0) {
568
break;
569
}
570
}
571
return chars.join("");
572
};
573
574
// Process element caption footnotes on a latex string
575
// This expects a latex elements with a `\caption{}`
576
//
577
// It will extract footnotes from the caption and replace
578
// them with a footnote mark and position the footnote
579
// below the latex element (e.g. it will remove the footnote
580
// from the element and then return the footnote below
581
// the element)
582
const processElementCaptionFootnotes = (latexFigure: string) => {
583
const footnoteMark = "\\footnote{";
584
const captionMark = "\\caption{";
585
586
// Contents holds the final contents that will be returned
587
// after being joined. This function will append to contents
588
// to build up the final output
589
const contents: string[] = [];
590
591
// Read up to the caption itself
592
const captionIndex = latexFigure.indexOf(captionMark);
593
if (captionIndex > -1) {
594
// Slice off the figure up to the caption
595
contents.push(latexFigure.substring(0, captionIndex));
596
const captionStartStr = latexFigure.slice(captionIndex);
597
598
// Read the caption
599
const captionLatex = readBalancedCommand(captionStartStr);
600
const figureSuffix = captionStartStr.slice(captionLatex.length);
601
602
// Slice off the command prefix and suffix
603
let captionContents = captionLatex.slice(
604
captionMark.length,
605
captionLatex.length - 1,
606
);
607
608
// Deal with footnotes in the caption
609
let footNoteIndex = captionContents.indexOf(footnoteMark);
610
if (footNoteIndex > -1) {
611
// Caption text will not have any footnotes in it
612
const captionText: string[] = [];
613
// Caption with note will have footnotemarks in it
614
const captionWithNote: string[] = [];
615
// The footnotes that we found along the way
616
const footNotes: string[] = [];
617
while (footNoteIndex > -1) {
618
// capture any prefix
619
const prefix = captionContents.substring(0, footNoteIndex);
620
captionContents = captionContents.slice(footNoteIndex);
621
622
// push the prefix onto the captions
623
captionText.push(prefix);
624
captionWithNote.push(prefix);
625
626
// process the footnote
627
const footnoteLatex = readBalancedCommand(captionContents);
628
captionContents = captionContents.slice(footnoteLatex.length);
629
footNoteIndex = captionContents.indexOf(footnoteMark);
630
631
// Capture the footnote and place a footnote mark in the caption
632
captionWithNote.push("\\footnotemark{}");
633
footNotes.push(
634
footnoteLatex.slice(footnoteMark.length, footnoteLatex.length - 1),
635
);
636
}
637
// Push any leftovers onto the caption contents
638
captionText.push(captionContents);
639
captionWithNote.push(captionContents);
640
641
// push the caption onto the contents
642
contents.push(
643
`\\caption[${captionText.join("")}]{${captionWithNote.join("")}}`,
644
);
645
646
// push the suffix onto the contents
647
contents.push(figureSuffix);
648
649
// push the footnotes on the contents
650
contents.push("\n");
651
652
// Add a proper footnote counter offset, if necessary
653
if (footNotes.length > 1) {
654
contents.push(`\\addtocounter{footnote}{-${footNotes.length - 1}}`);
655
}
656
657
for (let i = 0; i < footNotes.length; i++) {
658
contents.push(`\\footnotetext{${footNotes[i]}}`);
659
if (footNotes.length > 1 && i < footNotes.length - 1) {
660
contents.push(`\\addtocounter{footnote}{1}`);
661
}
662
}
663
return contents.join("");
664
} else {
665
// No footnotes in the caption, just leave it alone
666
return latexFigure;
667
}
668
} else {
669
// No caption means just let it go
670
return latexFigure;
671
}
672
};
673
674
const kMatchLongTableSize = /^(.*)p{\(\\columnwidth - (\d+\\tabcolsep\).*$)/;
675
676
const kStartLongTable = /^\\begin{longtable}/;
677
const kEndLongTable = /^\\end{longtable}/;
678
679
const guidsProcessor = () => {
680
let state: "looking-for-definition-start" | "looking-for-definition-end" =
681
"looking-for-definition-start";
682
const guidDefinitions: [string, string][] = [];
683
let guidBeingProcessed: string | undefined;
684
let guidContents: string[] = [];
685
return (line: string): string | undefined => {
686
switch (state) {
687
case "looking-for-definition-start": {
688
if (line.startsWith("%quarto-define-uuid: ")) {
689
state = "looking-for-definition-end";
690
line = line.replace(/^%quarto-define-uuid:\s*/, "");
691
guidBeingProcessed = line.trim();
692
return undefined;
693
}
694
for (const [key, value] of guidDefinitions) {
695
line = line.replaceAll(key, value);
696
}
697
return line;
698
}
699
case "looking-for-definition-end": {
700
if (line === "%quarto-end-define-uuid") {
701
state = "looking-for-definition-start";
702
if (guidBeingProcessed === undefined) {
703
throw new Error("guidBeingProcessed is undefined");
704
}
705
guidDefinitions.push([
706
guidBeingProcessed,
707
guidContents.join("").trim(),
708
]);
709
guidContents = [];
710
guidBeingProcessed = undefined;
711
return undefined;
712
} else {
713
guidContents.push(line);
714
return undefined;
715
}
716
}
717
}
718
};
719
};
720
721
const tableColumnMarginLineProcessor = () => {
722
let state: "looking-for-boundaries" | "looking-for-tables" | "processing" =
723
"looking-for-boundaries";
724
return (line: string): string | undefined => {
725
switch (state) {
726
case "looking-for-boundaries": {
727
if (line === "% quarto-tables-in-margin-AB1927C9:begin") {
728
state = "looking-for-tables";
729
return undefined;
730
}
731
return line;
732
}
733
case "looking-for-tables": {
734
if (line.match(kStartLongTable)) {
735
state = "processing";
736
return line;
737
} else if (line === "% quarto-tables-in-margin-AB1927C9:end") {
738
state = "looking-for-boundaries";
739
return undefined;
740
}
741
return line;
742
}
743
case "processing": {
744
if (line.match(kEndLongTable)) {
745
state = "looking-for-tables";
746
return line;
747
} else {
748
const match = line.match(kMatchLongTableSize);
749
if (match) {
750
return `${
751
match[1]
752
}p{(\\marginparwidth + \\marginparsep + \\columnwidth - ${
753
match[2]
754
}`;
755
} else {
756
return line;
757
}
758
}
759
}
760
default: {
761
return line;
762
}
763
}
764
};
765
};
766
767
const captionFootnoteLineProcessor = () => {
768
let state: "scanning" | "capturing" = "scanning";
769
let capturedLines: string[] = [];
770
return (line: string): string | undefined => {
771
switch (state) {
772
case "scanning":
773
if (line.match(/^\\begin{figure}.*$/)) {
774
state = "capturing";
775
capturedLines = [line];
776
return undefined;
777
} else {
778
return line;
779
}
780
case "capturing":
781
capturedLines.push(line);
782
if (line.match(/^\\end{figure}%*$/)) {
783
state = "scanning";
784
785
// read the whole figure and clear any capture state
786
const lines = capturedLines.join("\n");
787
capturedLines = [];
788
789
// Process the captions and relocate footnotes
790
return processElementCaptionFootnotes(lines);
791
} else {
792
return undefined;
793
}
794
}
795
};
796
};
797
798
const processSideNotes = (endMarker: string) => {
799
return (latexLongTable: string) => {
800
const sideNoteMarker = "\\sidenote{\\footnotesize ";
801
let strProcessing = latexLongTable;
802
const strOutput: string[] = [];
803
const sidenotes: string[] = [];
804
805
let sidenotePos = strProcessing.indexOf(sideNoteMarker);
806
while (sidenotePos > -1) {
807
strOutput.push(strProcessing.substring(0, sidenotePos));
808
809
const remainingStr = strProcessing.substring(
810
sidenotePos + sideNoteMarker.length,
811
);
812
let escaped = false;
813
let sideNoteEnd = -1;
814
for (let i = 0; i < remainingStr.length; i++) {
815
const ch = remainingStr[i];
816
if (ch === "\\") {
817
escaped = true;
818
} else {
819
if (!escaped && ch === "}") {
820
sideNoteEnd = i;
821
break;
822
} else {
823
escaped = false;
824
}
825
}
826
}
827
828
if (sideNoteEnd > -1) {
829
strOutput.push("\\sidenotemark{}");
830
const contents = remainingStr.substring(0, sideNoteEnd);
831
sidenotes.push(contents);
832
strProcessing = remainingStr.substring(sideNoteEnd + 1);
833
sidenotePos = strProcessing.indexOf(sideNoteMarker);
834
} else {
835
strOutput.push(remainingStr);
836
}
837
}
838
839
// Ensure that we inject sidenotes after the longtable
840
const endTable = endMarker;
841
const endPos = strProcessing.indexOf(endTable);
842
const prefix = strProcessing.substring(0, endPos + endTable.length);
843
const suffix = strProcessing.substring(
844
endPos + endTable.length,
845
strProcessing.length,
846
);
847
848
strOutput.push(prefix);
849
for (const note of sidenotes) {
850
strOutput.push(`\\sidenotetext{${note}}\n`);
851
}
852
if (suffix) {
853
strOutput.push(suffix);
854
}
855
856
return strOutput.join("");
857
};
858
};
859
860
const processLongTableSidenotes = processSideNotes("\\end{longtable}");
861
const processTableSidenotes = processSideNotes("\\end{table}");
862
863
const sideNoteProcessor = (
864
beginRegex: RegExp,
865
endRegex: RegExp,
866
callback: (str: string) => string,
867
) => {
868
return () => {
869
let state: "scanning" | "capturing" = "scanning";
870
let capturedLines: string[] = [];
871
return (line: string): string | undefined => {
872
switch (state) {
873
case "scanning":
874
if (line.match(beginRegex)) {
875
state = "capturing";
876
capturedLines = [line];
877
return undefined;
878
} else {
879
return line;
880
}
881
case "capturing":
882
capturedLines.push(line);
883
if (line.match(endRegex)) {
884
state = "scanning";
885
886
// read the whole figure and clear any capture state
887
const lines = capturedLines.join("\n");
888
capturedLines = [];
889
890
// Process the captions and relocate footnotes
891
return callback(lines);
892
} else {
893
return undefined;
894
}
895
}
896
};
897
};
898
};
899
const longTableSidenoteProcessor = sideNoteProcessor(
900
/^\\begin{longtable}.*$/,
901
/^\\end{longtable}%*$/,
902
processLongTableSidenotes,
903
);
904
905
const tableSidenoteProcessor = sideNoteProcessor(
906
/^\\begin{table}.*$/,
907
/^\\end{table}%*$/,
908
processTableSidenotes,
909
);
910
911
const calloutFloatHoldLineProcessor = () => {
912
let state: "scanning" | "replacing" = "scanning";
913
return (line: string): string | undefined => {
914
switch (state) {
915
case "scanning":
916
if (line.match(/^\\begin{tcolorbox}/)) {
917
state = "replacing";
918
return line;
919
} else {
920
return line;
921
}
922
923
case "replacing":
924
if (line.match(/^\\end{tcolorbox}/)) {
925
state = "scanning";
926
return line;
927
} else if (line.match(/^\\begin{figure}$/)) {
928
return "\\begin{figure}[H]";
929
} else if (line.match(/^\\begin{codelisting}$/)) {
930
return "\\begin{codelisting}[H]";
931
} else {
932
return line;
933
}
934
}
935
};
936
};
937
938
const kQuartoBibPlaceholderRegex = "%bib-loc-124C8010";
939
const bibLatexBibligraphyRefsDivProcessor = () => {
940
let hasRefsDiv = false;
941
return (line: string): string | undefined => {
942
if (line === kQuartoBibPlaceholderRegex) {
943
if (!hasRefsDiv) {
944
hasRefsDiv = true;
945
return "\\printbibliography[heading=none]";
946
} else {
947
// already seen a refs div, just ignore this one
948
return undefined;
949
}
950
} else if (hasRefsDiv && line.match(/^\\printbibliography$/)) {
951
return undefined;
952
} else {
953
return line;
954
}
955
};
956
};
957
958
const natbibBibligraphyRefsDivProcessor = (bibs?: string[]) => {
959
let hasRefsDiv = false;
960
return (line: string): string | undefined => {
961
if (line === kQuartoBibPlaceholderRegex) {
962
if (bibs && !hasRefsDiv) {
963
hasRefsDiv = true;
964
return `\\renewcommand{\\bibsection}{}\n\\bibliography{${
965
bibs.join(",")
966
}}`;
967
} else {
968
// already seen a refs div, just ignore this one
969
return undefined;
970
}
971
} else if (hasRefsDiv && line.match(/^\s*\\bibliography{.*}$/)) {
972
return undefined;
973
} else {
974
return line;
975
}
976
};
977
};
978
979
// Removes the biblatex \printbibiliography command
980
const suppressBibLatexBibliographyLineProcessor = () => {
981
return (line: string): string | undefined => {
982
if (line.match(/^\\printbibliography$/)) {
983
return "";
984
}
985
return line;
986
};
987
};
988
989
// Replaces the natbib bibligography declaration with a version
990
// that will not be printed in the PDF
991
const suppressNatbibBibliographyLineProcessor = () => {
992
return (line: string): string | undefined => {
993
return line.replace(/^\s*\\bibliography{(.*)}$/, (_match, bib) => {
994
return `\\newsavebox\\mytempbib
995
\\savebox\\mytempbib{\\parbox{\\textwidth}{\\bibliography{${bib}}}}`;
996
});
997
};
998
};
999
1000
// {?quarto-cite:(id)}
1001
const kQuartoCiteRegex = /{\?quarto-cite:(.*?)}/g;
1002
const bibLatexCiteLineProcessor = () => {
1003
return (line: string): string | undefined => {
1004
return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1005
return `\\fullcite{${citeKey}}`;
1006
});
1007
};
1008
};
1009
1010
const natbibCiteLineProcessor = () => {
1011
return (line: string): string | undefined => {
1012
return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1013
return `\\bibentry{${citeKey}}`;
1014
});
1015
};
1016
};
1017
1018
const sideNoteLineProcessor = () => {
1019
return (line: string): string | undefined => {
1020
return line.replaceAll(/\\footnote{/g, "\\sidenote{\\footnotesize ");
1021
};
1022
};
1023
1024
const longtableBottomCaptionProcessor = () => {
1025
let scanning = false;
1026
let capturing = false;
1027
let caption: string | undefined;
1028
1029
return (line: string): string | undefined => {
1030
const isEndOfDocument = !!line.match(/^\\end{document}/);
1031
if (isEndOfDocument && caption) {
1032
return `${caption}\n${line}`;
1033
} else if (scanning) {
1034
// look for a caption line
1035
if (capturing) {
1036
caption = `${caption}\n${line}`;
1037
capturing = !line.match(/\\tabularnewline$/);
1038
return undefined;
1039
} else {
1040
if (
1041
line.match(/^\\caption.*?\\tabularnewline$/) ||
1042
line.match(/^\\caption{.*}\\\\$/)
1043
) {
1044
caption = line;
1045
return undefined;
1046
} else if (line.match(/^\\caption.*?/)) {
1047
caption = line;
1048
capturing = true;
1049
return undefined;
1050
} else if (line.match(/^\\endlastfoot/) && caption) {
1051
line = `\\tabularnewline\n${caption}\n${line}`;
1052
caption = undefined;
1053
return line;
1054
} else if (line.match(/^\\end{longtable}$/)) {
1055
scanning = false;
1056
if (caption) {
1057
line = caption + "\n" + line;
1058
caption = undefined;
1059
return line;
1060
}
1061
}
1062
}
1063
} else {
1064
scanning = !!line.match(/^\\begin{longtable}/);
1065
}
1066
1067
return line;
1068
};
1069
};
1070
1071
const kChapterRefNameRegex = /^\\chapter\*?{(.*?)}\\label{references.*?}$/;
1072
const cleanReferencesChapter = () => {
1073
let refChapterName: string | undefined;
1074
let refChapterContentsRegex: RegExp | undefined;
1075
let refChapterMarkRegex: RegExp | undefined;
1076
1077
return (line: string): string | undefined => {
1078
const chapterRefMatch = line.match(kChapterRefNameRegex);
1079
if (chapterRefMatch) {
1080
refChapterName = chapterRefMatch[1];
1081
refChapterContentsRegex = new RegExp(
1082
`\\\\addcontentsline{toc}{chapter}{${refChapterName}}`,
1083
);
1084
refChapterMarkRegex = new RegExp(
1085
`\\\\markboth{${refChapterName}}{${refChapterName}}`,
1086
);
1087
// Eat this line
1088
return undefined;
1089
} else if (refChapterContentsRegex && line.match(refChapterContentsRegex)) {
1090
// Eat this line
1091
return undefined;
1092
} else if (refChapterMarkRegex && line.match(refChapterMarkRegex)) {
1093
// Eat this line
1094
return undefined;
1095
}
1096
return line;
1097
};
1098
};
1099
1100
const indexAndSuppressPandocBibliography = (
1101
renderedCites: Record<string, string[]>,
1102
) => {
1103
let readingBibliography = false;
1104
let currentCiteKey: string | undefined = undefined;
1105
1106
return (line: string): string | undefined => {
1107
if (
1108
!readingBibliography &&
1109
line.match(/^(\\protect)?\\phantomsection\\label{refs}$/)
1110
) {
1111
readingBibliography = true;
1112
return undefined;
1113
} else if (readingBibliography && line.match(/^\\end{CSLReferences}$/)) {
1114
readingBibliography = false;
1115
return undefined;
1116
} else if (readingBibliography) {
1117
const matches = line.match(/\\bibitem\[\\citeproctext\]{ref\-(.*?)}/);
1118
if (matches && matches[1]) {
1119
currentCiteKey = matches[1];
1120
renderedCites[currentCiteKey] = [line];
1121
} else if (line.length === 0) {
1122
currentCiteKey = undefined;
1123
} else if (currentCiteKey) {
1124
renderedCites[currentCiteKey].push(line);
1125
}
1126
}
1127
1128
if (readingBibliography) {
1129
return undefined;
1130
} else {
1131
return line;
1132
}
1133
};
1134
};
1135
1136
const kInSideCaptionRegex = /^\\sidecaption{/;
1137
const kBeginFigureRegex = /^\\begin{figure}\[.*?\]$/;
1138
const kEndFigureRegex = /^\\end{figure}\%?$/;
1139
1140
const placePandocBibliographyEntries = (
1141
renderedCites: Record<string, string[]>,
1142
) => {
1143
let biblioEntryState: "scanning" | "in-figure" | "in-sidecaption" =
1144
"scanning";
1145
let pendingCiteKeys: string[] = [];
1146
1147
return (line: string): string | undefined => {
1148
switch (biblioEntryState) {
1149
case "scanning": {
1150
if (line.match(kBeginFigureRegex)) {
1151
biblioEntryState = "in-figure";
1152
}
1153
break;
1154
}
1155
case "in-figure": {
1156
if (line.match(kInSideCaptionRegex)) {
1157
biblioEntryState = "in-sidecaption";
1158
} else {
1159
if (line.match(kEndFigureRegex)) {
1160
biblioEntryState = "scanning";
1161
}
1162
}
1163
break;
1164
}
1165
case "in-sidecaption": {
1166
if (line.match(kEndFigureRegex)) {
1167
biblioEntryState = "scanning";
1168
}
1169
break;
1170
}
1171
default:
1172
break;
1173
}
1174
1175
if (biblioEntryState === "scanning" && pendingCiteKeys.length > 0) {
1176
const result = [
1177
line,
1178
"\n\\begin{CSLReferences}{2}{0}",
1179
...pendingCiteKeys,
1180
"\\end{CSLReferences}\n",
1181
].join("\n");
1182
pendingCiteKeys = [];
1183
return result;
1184
}
1185
1186
return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1187
const citeLines = renderedCites[citeKey];
1188
if (citeLines) {
1189
if (biblioEntryState === "in-sidecaption" && citeLines.length > 0) {
1190
pendingCiteKeys.push(citeLines[0]);
1191
return ["", ...citeLines.slice(1)].join("\n");
1192
} else {
1193
return [
1194
"\n\\begin{CSLReferences}{2}{0}",
1195
...citeLines,
1196
"\\end{CSLReferences}\n",
1197
].join("\n");
1198
}
1199
} else {
1200
return citeKey;
1201
}
1202
});
1203
};
1204
};
1205
1206
const kCodeAnnotationRegex =
1207
/(.*)\\CommentTok\{(.*?)[^\s]+? +\\textless\{\}(\d+)\\textgreater\{\}.*\}$/gm;
1208
const kCodePlainAnnotationRegex = /(.*)% \((\d+)\)$/g;
1209
const codeAnnotationPostProcessor = () => {
1210
let lastAnnotation: string | undefined;
1211
1212
return (line: string): string | undefined => {
1213
if (line === "\\begin{Shaded}") {
1214
lastAnnotation = undefined;
1215
}
1216
1217
// Replace colorized code
1218
line = line.replaceAll(
1219
kCodeAnnotationRegex,
1220
(_match, prefix: string, comment: string, annotationNumber: string) => {
1221
if (annotationNumber !== lastAnnotation) {
1222
lastAnnotation = annotationNumber;
1223
if (comment.length > 0) {
1224
// There is something else inside the comment line so
1225
// We need to recreate the comment line without the annotation
1226
prefix = `${prefix}\\CommentTok\{${comment}\}`;
1227
}
1228
return `${prefix}\\hspace*{\\fill}\\NormalTok{\\circled{${annotationNumber}}}`;
1229
} else {
1230
return `${prefix}`;
1231
}
1232
},
1233
);
1234
1235
// Replace plain code
1236
line = line.replaceAll(
1237
kCodePlainAnnotationRegex,
1238
(_match, prefix: string, annotationNumber: string) => {
1239
if (annotationNumber !== lastAnnotation) {
1240
lastAnnotation = annotationNumber;
1241
1242
const replaceValue = `(${annotationNumber})`;
1243
const paddingNumber = Math.max(
1244
0,
1245
75 - prefix.length - replaceValue.length,
1246
);
1247
const padding = " ".repeat(paddingNumber);
1248
return `${prefix}${padding}${replaceValue}`;
1249
} else {
1250
return `${prefix}`;
1251
}
1252
},
1253
);
1254
1255
return line;
1256
};
1257
};
1258
1259
const kListAnnotationRegex = /(.*)5CB6E08D-list-annote-(\d+)(.*)/g;
1260
const codeListAnnotationPostProcessor = () => {
1261
return (line: string): string | undefined => {
1262
return line.replaceAll(
1263
kListAnnotationRegex,
1264
(_match, prefix: string, annotationNumber: string, suffix: string) => {
1265
return `${prefix}\\circled{${annotationNumber}}${suffix}`;
1266
},
1267
);
1268
};
1269
};
1270
1271
const kbeginLongTablesideCap = `{
1272
\\makeatletter
1273
\\def\\LT@makecaption#1#2#3{%
1274
\\noalign{\\smash{\\hbox{\\kern\\textwidth\\rlap{\\kern\\marginparsep
1275
\\parbox[t]{\\marginparwidth}{%
1276
\\footnotesize{%
1277
\\vspace{(1.1\\baselineskip)}
1278
#1{#2: }\\ignorespaces #3}}}}}}%
1279
}
1280
\\makeatother`;
1281
1282
const kEndLongTableSideCap = "}";
1283
1284
// LaTeX-supported PDF standards (from latex3/latex2e DocumentMetadata)
1285
// See: https://github.com/latex3/latex2e - documentmetadata-support.dtx
1286
const kLatexSupportedStandards = new Set([
1287
// PDF/A standards (note: a-1a is NOT supported, only a-1b)
1288
"a-1b",
1289
"a-2a",
1290
"a-2b",
1291
"a-2u",
1292
"a-3a",
1293
"a-3b",
1294
"a-3u",
1295
"a-4",
1296
"a-4e",
1297
"a-4f",
1298
// PDF/X standards
1299
"x-4",
1300
"x-4p",
1301
"x-5g",
1302
"x-5n",
1303
"x-5pg",
1304
"x-6",
1305
"x-6n",
1306
"x-6p",
1307
// PDF/UA standards (only ua-2 is supported by LaTeX)
1308
"ua-2",
1309
]);
1310
1311
// Standards that require PDF tagging (document structure)
1312
// - PDF/A level "a" variants require tagged structure per PDF/A spec
1313
// - PDF/UA standards require tagging for universal accessibility
1314
// (LaTeX does NOT automatically enable tagging for UA standards)
1315
const kTaggingRequiredStandards = new Set([
1316
"a-2a",
1317
"a-3a",
1318
"ua-1",
1319
"ua-2",
1320
]);
1321
1322
const kVersionPattern = /^(1\.[4-7]|2\.0)$/;
1323
1324
// PDF version required by each standard (maximum version limits)
1325
// LaTeX defaults to PDF 2.0 with \DocumentMetadata, but some standards
1326
// have maximum version requirements that are incompatible with 2.0
1327
// Note: a-1a is intentionally omitted as LaTeX doesn't support it
1328
const kStandardRequiredVersion: Record<string, string> = {
1329
// PDF/A-1 requires exactly PDF 1.4 (only a-1b supported by LaTeX)
1330
"a-1b": "1.4",
1331
// PDF/A-2 and PDF/A-3 have maximum version of 1.7
1332
"a-2a": "1.7",
1333
"a-2b": "1.7",
1334
"a-2u": "1.7",
1335
"a-3a": "1.7",
1336
"a-3b": "1.7",
1337
"a-3u": "1.7",
1338
// PDF/A-4, PDF/UA-1, PDF/UA-2 all work with PDF 2.0 (the default)
1339
};
1340
1341
function normalizePdfStandardForLatex(
1342
standards: unknown[],
1343
): { version?: string; standards: string[]; needsTagging: boolean } {
1344
let version: string | undefined;
1345
const result: string[] = [];
1346
let needsTagging = false;
1347
1348
for (const s of standards) {
1349
// Convert to string - YAML may parse versions like 2.0 as integer 2
1350
let str: string;
1351
if (typeof s === "number") {
1352
// Handle YAML numeric parsing: integer 2 -> "2.0", float 1.4 -> "1.4"
1353
str = Number.isInteger(s) ? `${s}.0` : String(s);
1354
} else if (typeof s === "string") {
1355
str = s;
1356
} else {
1357
continue;
1358
}
1359
// Normalize: lowercase, remove any "pdf" prefix
1360
const normalized = str.toLowerCase().replace(/^pdf[/-]?/, "");
1361
1362
if (kVersionPattern.test(normalized)) {
1363
// Use first explicit version (ignore subsequent ones)
1364
if (!version) {
1365
version = normalized;
1366
}
1367
} else if (kLatexSupportedStandards.has(normalized)) {
1368
// LaTeX is case-insensitive, pass through lowercase
1369
result.push(normalized);
1370
// Check if this standard requires tagging
1371
if (kTaggingRequiredStandards.has(normalized)) {
1372
needsTagging = true;
1373
}
1374
// Infer required PDF version from standard (if not explicitly set)
1375
if (!version && kStandardRequiredVersion[normalized]) {
1376
version = kStandardRequiredVersion[normalized];
1377
}
1378
} else {
1379
warning(
1380
`PDF standard '${s}' is not supported by LaTeX and will be ignored`,
1381
);
1382
}
1383
}
1384
1385
return { version, standards: result, needsTagging };
1386
}
1387
1388