CoCalc -- format-pdf.ts

GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/format/pdf/format-pdf.ts
⁶⁴⁵¹ views
1
/*
2
 * format-pdf.ts
3
 *
4
 * Copyright (C) 2020-2022 Posit Software, PBC
5
 */
6

7
import { basename, extname, join } from "../../deno_ral/path.ts";
8

9
import { mergeConfigs } from "../../core/config.ts";
10
import { texSafeFilename } from "../../core/tex.ts";
11

12
import {
13
  kBibliography,
14
  kCapBottom,
15
  kCapLoc,
16
  kCapTop,
17
  kCitationLocation,
18
  kCiteMethod,
19
  kClassOption,
20
  kDefaultImageExtension,
21
  kDocumentClass,
22
  kEcho,
23
  kFigCapLoc,
24
  kFigDpi,
25
  kFigFormat,
26
  kFigHeight,
27
  kFigWidth,
28
  kHeaderIncludes,
29
  kKeepTex,
30
  kLang,
31
  kNumberSections,
32
  kPaperSize,
33
  kPdfEngine,
34
  kPdfStandard,
35
  kPdfStandardApplied,
36
  kReferenceLocation,
37
  kShiftHeadingLevelBy,
38
  kTblCapLoc,
39
  kTopLevelDivision,
40
  kWarning,
41
} from "../../config/constants.ts";
42
import { warning } from "../../deno_ral/log.ts";
43
import { asArray } from "../../core/array.ts";
44
import { Format, FormatExtras, PandocFlags } from "../../config/types.ts";
45

46
import { createFormat } from "../formats-shared.ts";
47

48
import { RenderedFile, RenderServices } from "../../command/render/types.ts";
49
import { ProjectConfig, ProjectContext } from "../../project/types.ts";
50
import { BookExtension } from "../../project/types/book/book-shared.ts";
51

52
import { readLines } from "io/read-lines";
53
import { TempContext } from "../../core/temp.ts";
54
import { isLatexPdfEngine, pdfEngine } from "../../config/pdf.ts";
55
import { formatResourcePath } from "../../core/resources.ts";
56
import { kTemplatePartials } from "../../command/render/template.ts";
57
import { copyTo } from "../../core/copy.ts";
58
import { kCodeAnnotations } from "../html/format-html-shared.ts";
59
import { safeModeFromFile } from "../../deno_ral/fs.ts";
60
import { hasLevelOneHeadings as hasL1Headings } from "../../core/lib/markdown-analysis/level-one-headings.ts";
61

62
export function pdfFormat(): Format {
63
  return mergeConfigs(
64
    createPdfFormat("PDF"),
65
    {
66
      extensions: {
67
        book: pdfBookExtension,
68
      },
69
    },
70
  );
71
}
72

73
export function beamerFormat(): Format {
74
  return createFormat(
75
    "Beamer",
76
    "pdf",
77
    createPdfFormat("Beamer", false, false),
78
    {
79
      execute: {
80
        [kFigWidth]: 10,
81
        [kFigHeight]: 7,
82
        [kEcho]: false,
83
        [kWarning]: false,
84
      },
85
      classoption: ["notheorems"],
86
    },
87
  );
88
}
89

90
export function latexFormat(displayName: string): Format {
91
  return createFormat(
92
    displayName,
93
    "tex",
94
    mergeConfigs(
95
      createPdfFormat(displayName),
96
      {
97
        extensions: {
98
          book: {
99
            onSingleFilePreRender: (
100
              format: Format,
101
              _config?: ProjectConfig,
102
            ) => {
103
              // If we're targeting LaTeX output, be sure to keep
104
              // the supporting files around (since we're not building
105
              // them into a PDF)
106
              format.render[kKeepTex] = true;
107
              return format;
108
            },
109
            formatOutputDirectory: () => {
110
              return "book-latex";
111
            },
112
          },
113
        },
114
      },
115
    ),
116
  );
117
}
118

119
function createPdfFormat(
120
  displayName: string,
121
  autoShiftHeadings = true,
122
  koma = true,
123
): Format {
124
  return createFormat(
125
    displayName,
126
    "pdf",
127
    {
128
      execute: {
129
        [kFigWidth]: 5.5,
130
        [kFigHeight]: 3.5,
131
        [kFigFormat]: "pdf",
132
        [kFigDpi]: 300,
133
      },
134
      pandoc: {
135
        [kPdfEngine]: "lualatex",
136
        standalone: true,
137
        variables: {
138
          graphics: true,
139
          tables: true,
140
        },
141
        [kDefaultImageExtension]: "pdf",
142
      },
143
      metadata: {
144
        ["block-headings"]: true,
145
      },
146
      formatExtras: async (
147
        _input: string,
148
        markdown: string,
149
        flags: PandocFlags,
150
        format: Format,
151
        _libDir: string,
152
        services: RenderServices,
153
      ) => {
154
        const extras: FormatExtras = {};
155

156
        // only apply extras if this is latex (as opposed to context)
157
        const engine = pdfEngine(format.pandoc, format.render, flags);
158
        if (!isLatexPdfEngine(engine)) {
159
          return extras;
160
        }
161

162
        // Post processed for dealing with latex output
163
        extras.postprocessors = [
164
          pdfLatexPostProcessor(flags, format, services.temp),
165
        ];
166

167
        // user may have overridden koma, check for that here
168
        const documentclass = format.metadata[kDocumentClass] as
169
          | string
170
          | undefined;
171

172
        const usingCustomTemplates = format.pandoc.template !== undefined ||
173
          format.metadata[kTemplatePartials] !== undefined;
174

175
        if (
176
          usingCustomTemplates ||
177
          (documentclass &&
178
            ![
179
              "srcbook",
180
              "scrreprt",
181
              "scrreport",
182
              "scrartcl",
183
              "scrarticle",
184
            ].includes(
185
              documentclass,
186
            ))
187
        ) {
188
          koma = false;
189
        }
190

191
        // default to KOMA article class. we do this here rather than
192
        // above so that projectExtras can override us
193
        if (koma) {
194
          // determine caption options
195
          const captionOptions = [];
196
          const tblCaploc = tblCapLocation(format);
197
          captionOptions.push(
198
            tblCaploc === kCapTop ? "tableheading" : "tablesignature",
199
          );
200
          if (figCapLocation(format) === kCapTop) {
201
            captionOptions.push("figureheading");
202
          }
203

204
          // establish default class options
205
          const defaultClassOptions = ["DIV=11"];
206
          if (format.metadata[kLang] !== "de") {
207
            defaultClassOptions.push("numbers=noendperiod");
208
          }
209

210
          // determine class options (filter by options already set by the user)
211
          const userClassOptions = format.metadata[kClassOption] as
212
            | string[]
213
            | undefined;
214
          const classOptions = defaultClassOptions.filter((option) => {
215
            if (Array.isArray(userClassOptions)) {
216
              const name = option.split("=")[0];
217
              return !userClassOptions.some((userOption) =>
218
                String(userOption).startsWith(name + "=")
219
              );
220
            } else {
221
              return true;
222
            }
223
          });
224

225
          const headerIncludes = [];
226
          headerIncludes.push(
227
            "\\KOMAoption{captions}{" + captionOptions.join(",") + "}",
228
          );
229

230
          extras.metadata = {
231
            [kDocumentClass]: "scrartcl",
232
            [kClassOption]: classOptions,
233
            [kPaperSize]: "letter",
234
            [kHeaderIncludes]: headerIncludes,
235
          };
236
        }
237

238
        // Provide a custom template for this format
239
        // Partials can be the one from Quarto division
240
        const partialNamesQuarto: string[] = [
241
          "babel-lang",
242
          "before-bib",
243
          "biblio",
244
          "biblio-config",
245
          "citations",
246
          "doc-class",
247
          "graphics",
248
          "after-body",
249
          "before-body",
250
          "pandoc",
251
          "tables",
252
          "tightlist",
253
          "before-title",
254
          "title",
255
          "toc",
256
        ];
257
        // or the one from Pandoc division (since Pandoc 3.6.3)
258
        const partialNamesPandoc: string[] = [
259
          "after-header-includes",
260
          "common",
261
          "document-metadata",
262
          "font-settings",
263
          "fonts",
264
          "hypersetup",
265
          "passoptions",
266
        ];
267

268
        const createTemplateContext = function (
269
          to: string,
270
          partialNamesQuarto: string[],
271
          partialNamesPandoc: string[],
272
        ) {
273
          return {
274
            template: formatResourcePath(to, "pandoc/template.tex"),
275
            partials: [
276
              ...partialNamesQuarto.map((name) => {
277
                return formatResourcePath(to, `pandoc/${name}.tex`);
278
              }),
279
              ...partialNamesPandoc.map((name) => {
280
                return formatResourcePath(to, `pandoc/${name}.latex`);
281
              }),
282
            ],
283
          };
284
        };
285
        // Beamer doesn't use document-metadata partial (its template doesn't include it)
286
        const beamerPartialNamesPandoc = partialNamesPandoc.filter(
287
          (name) => name !== "document-metadata",
288
        );
289
        extras.templateContext = createTemplateContext(
290
          displayName === "Beamer" ? "beamer" : "pdf",
291
          partialNamesQuarto,
292
          displayName === "Beamer"
293
            ? beamerPartialNamesPandoc
294
            : partialNamesPandoc,
295
        );
296

297
        // Don't shift the headings if we see any H1s (we can't shift up any longer)
298
        const hasLevelOneHeadings = await hasL1Headings(markdown);
299

300
        // pdfs with no other heading level oriented options get their heading level shifted by -1
301
        if (
302
          !hasLevelOneHeadings &&
303
          autoShiftHeadings &&
304
          (flags?.[kNumberSections] === true ||
305
            format.pandoc[kNumberSections] === true) &&
306
          flags?.[kTopLevelDivision] === undefined &&
307
          format.pandoc?.[kTopLevelDivision] === undefined &&
308
          flags?.[kShiftHeadingLevelBy] === undefined &&
309
          format.pandoc?.[kShiftHeadingLevelBy] === undefined
310
        ) {
311
          extras.pandoc = {
312
            [kShiftHeadingLevelBy]: -1,
313
          };
314
        }
315

316
        // pdfs with document class scrbook get number sections turned on
317
        // https://github.com/quarto-dev/quarto-cli/issues/2369
318
        extras.pandoc = extras.pandoc || {};
319
        if (
320
          documentclass === "scrbook" &&
321
          format.pandoc[kNumberSections] !== false &&
322
          flags[kNumberSections] !== false
323
        ) {
324
          extras.pandoc[kNumberSections] = true;
325
        }
326

327
        // Handle pdf-standard option for PDF/A, PDF/UA, PDF/X conformance
328
        const pdfStandard = asArray(
329
          format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard],
330
        );
331
        if (pdfStandard.length > 0) {
332
          const { version, standards, needsTagging } =
333
            normalizePdfStandardForLatex(pdfStandard);
334
          // Set pdfstandard as a map if there are standards or a version
335
          if (standards.length > 0 || version) {
336
            extras.pandoc.variables = extras.pandoc.variables || {};
337
            const pdfstandardMap: Record<string, unknown> = {};
338
            if (standards.length > 0) {
339
              pdfstandardMap.standards = standards;
340
            }
341
            if (version) {
342
              pdfstandardMap.version = version;
343
            }
344
            if (needsTagging) {
345
              pdfstandardMap.tagging = true;
346
            }
347
            extras.pandoc.variables["pdfstandard"] = pdfstandardMap;
348
          }
349
          // Store applied standards in metadata for verapdf validation
350
          // (only standards that LaTeX actually supports, not the original list)
351
          if (standards.length > 0) {
352
            extras.metadata = extras.metadata || {};
353
            extras.metadata[kPdfStandardApplied] = standards;
354
          }
355
        }
356

357
        return extras;
358
      },
359
    },
360
  );
361
}
362

363
const pdfBookExtension: BookExtension = {
364
  selfContainedOutput: true,
365
  onSingleFilePostRender: (
366
    project: ProjectContext,
367
    renderedFile: RenderedFile,
368
  ) => {
369
    // if we have keep-tex then rename the input tex file to match the final output
370
    // file (but make sure it has a tex-friendly filename)
371
    if (renderedFile.format.render[kKeepTex]) {
372
      const finalOutputFile = renderedFile.file!;
373
      const texOutputFile =
374
        texSafeFilename(basename(finalOutputFile, extname(finalOutputFile))) +
375
        ".tex";
376
      Deno.renameSync(
377
        join(project.dir, "index.tex"),
378
        join(project.dir, texOutputFile),
379
      );
380
    }
381
  },
382
};
383
type LineProcessor = (line: string) => string | undefined;
384

385
function pdfLatexPostProcessor(
386
  flags: PandocFlags,
387
  format: Format,
388
  temp: TempContext,
389
) {
390
  return async (output: string) => {
391
    const lineProcessors: LineProcessor[] = [
392
      sidecaptionLineProcessor(),
393
      calloutFloatHoldLineProcessor(),
394
      tableColumnMarginLineProcessor(),
395
      guidsProcessor(),
396
    ];
397

398
    if (format.pandoc[kCiteMethod] === "biblatex") {
399
      lineProcessors.push(bibLatexBibligraphyRefsDivProcessor());
400
    } else if (format.pandoc[kCiteMethod] === "natbib") {
401
      lineProcessors.push(
402
        natbibBibligraphyRefsDivProcessor(
403
          format.metadata[kBibliography] as string[] | undefined,
404
        ),
405
      );
406
    }
407

408
    const marginCites = format.metadata[kCitationLocation] === "margin";
409
    const renderedCites = {};
410
    if (marginCites) {
411
      // Based upon the cite method, post process the file to
412
      // process unresolved citations
413
      if (format.pandoc[kCiteMethod] === "biblatex") {
414
        lineProcessors.push(suppressBibLatexBibliographyLineProcessor());
415
        lineProcessors.push(bibLatexCiteLineProcessor());
416
      } else if (format.pandoc[kCiteMethod] === "natbib") {
417
        lineProcessors.push(suppressNatbibBibliographyLineProcessor());
418
        lineProcessors.push(natbibCiteLineProcessor());
419
      } else {
420
        // If this is using the pandoc default citeproc, we need to
421
        // do a more complex processing, since it is generating raw latex
422
        // for the citations (not running a tool in the pdf chain to
423
        // generate the bibliography). As a result, we first read the
424
        // rendered bibliography, indexing the entring and removing it
425
        // from the latex, then we run a second pass where we use that index
426
        // to replace cites with the rendered versions.
427
        lineProcessors.push(
428
          indexAndSuppressPandocBibliography(renderedCites),
429
          cleanReferencesChapter(),
430
        );
431
      }
432
    }
433

434
    // Move longtable captions below if requested
435
    if (tblCapLocation(format) === kCapBottom) {
436
      lineProcessors.push(longtableBottomCaptionProcessor());
437
    }
438

439
    // If enabled, switch to sidenote footnotes
440
    if (marginRefs(flags, format)) {
441
      // Replace notes with side notes
442
      lineProcessors.push(sideNoteLineProcessor());
443
    }
444
    lineProcessors.push(captionFootnoteLineProcessor());
445

446
    if (
447
      format.metadata[kCodeAnnotations] as boolean !== false &&
448
      format.metadata[kCodeAnnotations] as string !== "none"
449
    ) {
450
      lineProcessors.push(codeAnnotationPostProcessor());
451
      lineProcessors.push(codeListAnnotationPostProcessor());
452
    }
453

454
    lineProcessors.push(tableSidenoteProcessor());
455

456
    // This is pass 1
457
    await processLines(output, lineProcessors, temp);
458

459
    // This is pass 2; we need these to happen after the first pass
460
    const pass2Processors: LineProcessor[] = [
461
      longTableSidenoteProcessor(),
462
    ];
463
    if (Object.keys(renderedCites).length > 0) {
464
      pass2Processors.push(placePandocBibliographyEntries(renderedCites));
465
    }
466
    await processLines(output, pass2Processors, temp);
467
  };
468
}
469

470
function tblCapLocation(format: Format) {
471
  return format.metadata[kTblCapLoc] || format.metadata[kCapLoc] || kCapTop;
472
}
473

474
function figCapLocation(format: Format) {
475
  return format.metadata[kFigCapLoc] || format.metadata[kCapLoc] || kCapBottom;
476
}
477

478
function marginRefs(flags: PandocFlags, format: Format) {
479
  return format.pandoc[kReferenceLocation] === "margin" ||
480
    flags[kReferenceLocation] === "margin";
481
}
482

483
// Processes the lines of an input file, processing each line
484
// and replacing the input file with the processed output file
485
async function processLines(
486
  inputFile: string,
487
  lineProcessors: LineProcessor[],
488
  temp: TempContext,
489
) {
490
  // The temp file we generate into
491
  const outputFile = temp.createFile({ suffix: ".tex" });
492
  const file = await Deno.open(inputFile);
493
  // Preserve the existing permissions as we'll replace
494
  const mode = safeModeFromFile(inputFile);
495
  try {
496
    for await (const line of readLines(file)) {
497
      let processedLine: string | undefined = line;
498
      // Give each processor a shot at the line
499
      for (const processor of lineProcessors) {
500
        if (processedLine !== undefined) {
501
          processedLine = processor(processedLine);
502
        }
503
      }
504

505
      // skip lines that a processor has 'eaten'
506
      if (processedLine !== undefined) {
507
        Deno.writeTextFileSync(outputFile, processedLine + "\n", {
508
          append: true,
509
          mode,
510
        });
511
      }
512
    }
513
  } finally {
514
    file.close();
515

516
    // Always overwrite the input file with an incompletely processed file
517
    // which should make debugging the error easier (I hope)
518
    copyTo(outputFile, inputFile);
519
  }
520
}
521

522
const kBeginScanRegex = /^%quartopost-sidecaption-206BE349/;
523
const kEndScanRegex = /^%\/quartopost-sidecaption-206BE349/;
524

525
const sidecaptionLineProcessor = () => {
526
  let state: "scanning" | "replacing" = "scanning";
527
  return (line: string): string | undefined => {
528
    switch (state) {
529
      case "scanning":
530
        if (line.match(kBeginScanRegex)) {
531
          state = "replacing";
532
          return kbeginLongTablesideCap;
533
        } else {
534
          return line;
535
        }
536

537
      case "replacing":
538
        if (line.match(kEndScanRegex)) {
539
          state = "scanning";
540
          return kEndLongTableSideCap;
541
        } else {
542
          return line;
543
        }
544
    }
545
  };
546
};
547

548
// Reads the first command encountered as a balanced command
549
// (e.g. \caption{...} or \footnote{...}) and returns
550
// the complete command
551
//
552
// This expects the latex string to start with the command
553
const readBalancedCommand = (latex: string) => {
554
  let braceCount = 0;
555
  let entered = false;
556
  const chars: string[] = [];
557
  for (let i = 0; i < latex.length; i++) {
558
    const char = latex.charAt(i);
559
    if (char === "{") {
560
      braceCount++;
561
      entered = true;
562
    } else if (char === "}") {
563
      braceCount--;
564
    }
565

566
    chars.push(char);
567
    if (entered && braceCount === 0) {
568
      break;
569
    }
570
  }
571
  return chars.join("");
572
};
573

574
// Process element caption footnotes on a latex string
575
// This expects a latex elements with a `\caption{}`
576
//
577
// It will extract footnotes from the caption and replace
578
// them with a footnote mark and position the footnote
579
// below the latex element (e.g. it will remove the footnote
580
// from the element and then return the footnote below
581
// the element)
582
const processElementCaptionFootnotes = (latexFigure: string) => {
583
  const footnoteMark = "\\footnote{";
584
  const captionMark = "\\caption{";
585

586
  // Contents holds the final contents that will be returned
587
  // after being joined. This function will append to contents
588
  // to build up the final output
589
  const contents: string[] = [];
590

591
  // Read up to the caption itself
592
  const captionIndex = latexFigure.indexOf(captionMark);
593
  if (captionIndex > -1) {
594
    // Slice off the figure up to the caption
595
    contents.push(latexFigure.substring(0, captionIndex));
596
    const captionStartStr = latexFigure.slice(captionIndex);
597

598
    // Read the caption
599
    const captionLatex = readBalancedCommand(captionStartStr);
600
    const figureSuffix = captionStartStr.slice(captionLatex.length);
601

602
    // Slice off the command prefix and suffix
603
    let captionContents = captionLatex.slice(
604
      captionMark.length,
605
      captionLatex.length - 1,
606
    );
607

608
    // Deal with footnotes in the caption
609
    let footNoteIndex = captionContents.indexOf(footnoteMark);
610
    if (footNoteIndex > -1) {
611
      // Caption text will not have any footnotes in it
612
      const captionText: string[] = [];
613
      // Caption with note will have footnotemarks in it
614
      const captionWithNote: string[] = [];
615
      // The footnotes that we found along the way
616
      const footNotes: string[] = [];
617
      while (footNoteIndex > -1) {
618
        // capture any prefix
619
        const prefix = captionContents.substring(0, footNoteIndex);
620
        captionContents = captionContents.slice(footNoteIndex);
621

622
        // push the prefix onto the captions
623
        captionText.push(prefix);
624
        captionWithNote.push(prefix);
625

626
        // process the footnote
627
        const footnoteLatex = readBalancedCommand(captionContents);
628
        captionContents = captionContents.slice(footnoteLatex.length);
629
        footNoteIndex = captionContents.indexOf(footnoteMark);
630

631
        // Capture the footnote and place a footnote mark in the caption
632
        captionWithNote.push("\\footnotemark{}");
633
        footNotes.push(
634
          footnoteLatex.slice(footnoteMark.length, footnoteLatex.length - 1),
635
        );
636
      }
637
      // Push any leftovers onto the caption contents
638
      captionText.push(captionContents);
639
      captionWithNote.push(captionContents);
640

641
      // push the caption onto the contents
642
      contents.push(
643
        `\\caption[${captionText.join("")}]{${captionWithNote.join("")}}`,
644
      );
645

646
      // push the suffix onto the contents
647
      contents.push(figureSuffix);
648

649
      // push the footnotes on the contents
650
      contents.push("\n");
651

652
      // Add a proper footnote counter offset, if necessary
653
      if (footNotes.length > 1) {
654
        contents.push(`\\addtocounter{footnote}{-${footNotes.length - 1}}`);
655
      }
656

657
      for (let i = 0; i < footNotes.length; i++) {
658
        contents.push(`\\footnotetext{${footNotes[i]}}`);
659
        if (footNotes.length > 1 && i < footNotes.length - 1) {
660
          contents.push(`\\addtocounter{footnote}{1}`);
661
        }
662
      }
663
      return contents.join("");
664
    } else {
665
      // No footnotes in the caption, just leave it alone
666
      return latexFigure;
667
    }
668
  } else {
669
    // No caption means just let it go
670
    return latexFigure;
671
  }
672
};
673

674
const kMatchLongTableSize = /^(.*)p{\(\\columnwidth - (\d+\\tabcolsep\).*$)/;
675

676
const kStartLongTable = /^\\begin{longtable}/;
677
const kEndLongTable = /^\\end{longtable}/;
678

679
const guidsProcessor = () => {
680
  let state: "looking-for-definition-start" | "looking-for-definition-end" =
681
    "looking-for-definition-start";
682
  const guidDefinitions: [string, string][] = [];
683
  let guidBeingProcessed: string | undefined;
684
  let guidContents: string[] = [];
685
  return (line: string): string | undefined => {
686
    switch (state) {
687
      case "looking-for-definition-start": {
688
        if (line.startsWith("%quarto-define-uuid: ")) {
689
          state = "looking-for-definition-end";
690
          line = line.replace(/^%quarto-define-uuid:\s*/, "");
691
          guidBeingProcessed = line.trim();
692
          return undefined;
693
        }
694
        for (const [key, value] of guidDefinitions) {
695
          line = line.replaceAll(key, value);
696
        }
697
        return line;
698
      }
699
      case "looking-for-definition-end": {
700
        if (line === "%quarto-end-define-uuid") {
701
          state = "looking-for-definition-start";
702
          if (guidBeingProcessed === undefined) {
703
            throw new Error("guidBeingProcessed is undefined");
704
          }
705
          guidDefinitions.push([
706
            guidBeingProcessed,
707
            guidContents.join("").trim(),
708
          ]);
709
          guidContents = [];
710
          guidBeingProcessed = undefined;
711
          return undefined;
712
        } else {
713
          guidContents.push(line);
714
          return undefined;
715
        }
716
      }
717
    }
718
  };
719
};
720

721
const tableColumnMarginLineProcessor = () => {
722
  let state: "looking-for-boundaries" | "looking-for-tables" | "processing" =
723
    "looking-for-boundaries";
724
  return (line: string): string | undefined => {
725
    switch (state) {
726
      case "looking-for-boundaries": {
727
        if (line === "% quarto-tables-in-margin-AB1927C9:begin") {
728
          state = "looking-for-tables";
729
          return undefined;
730
        }
731
        return line;
732
      }
733
      case "looking-for-tables": {
734
        if (line.match(kStartLongTable)) {
735
          state = "processing";
736
          return line;
737
        } else if (line === "% quarto-tables-in-margin-AB1927C9:end") {
738
          state = "looking-for-boundaries";
739
          return undefined;
740
        }
741
        return line;
742
      }
743
      case "processing": {
744
        if (line.match(kEndLongTable)) {
745
          state = "looking-for-tables";
746
          return line;
747
        } else {
748
          const match = line.match(kMatchLongTableSize);
749
          if (match) {
750
            return `${
751
              match[1]
752
            }p{(\\marginparwidth + \\marginparsep + \\columnwidth - ${
753
              match[2]
754
            }`;
755
          } else {
756
            return line;
757
          }
758
        }
759
      }
760
      default: {
761
        return line;
762
      }
763
    }
764
  };
765
};
766

767
const captionFootnoteLineProcessor = () => {
768
  let state: "scanning" | "capturing" = "scanning";
769
  let capturedLines: string[] = [];
770
  return (line: string): string | undefined => {
771
    switch (state) {
772
      case "scanning":
773
        if (line.match(/^\\begin{figure}.*$/)) {
774
          state = "capturing";
775
          capturedLines = [line];
776
          return undefined;
777
        } else {
778
          return line;
779
        }
780
      case "capturing":
781
        capturedLines.push(line);
782
        if (line.match(/^\\end{figure}%*$/)) {
783
          state = "scanning";
784

785
          // read the whole figure and clear any capture state
786
          const lines = capturedLines.join("\n");
787
          capturedLines = [];
788

789
          // Process the captions and relocate footnotes
790
          return processElementCaptionFootnotes(lines);
791
        } else {
792
          return undefined;
793
        }
794
    }
795
  };
796
};
797

798
const processSideNotes = (endMarker: string) => {
799
  return (latexLongTable: string) => {
800
    const sideNoteMarker = "\\sidenote{\\footnotesize ";
801
    let strProcessing = latexLongTable;
802
    const strOutput: string[] = [];
803
    const sidenotes: string[] = [];
804

805
    let sidenotePos = strProcessing.indexOf(sideNoteMarker);
806
    while (sidenotePos > -1) {
807
      strOutput.push(strProcessing.substring(0, sidenotePos));
808

809
      const remainingStr = strProcessing.substring(
810
        sidenotePos + sideNoteMarker.length,
811
      );
812
      let escaped = false;
813
      let sideNoteEnd = -1;
814
      for (let i = 0; i < remainingStr.length; i++) {
815
        const ch = remainingStr[i];
816
        if (ch === "\\") {
817
          escaped = true;
818
        } else {
819
          if (!escaped && ch === "}") {
820
            sideNoteEnd = i;
821
            break;
822
          } else {
823
            escaped = false;
824
          }
825
        }
826
      }
827

828
      if (sideNoteEnd > -1) {
829
        strOutput.push("\\sidenotemark{}");
830
        const contents = remainingStr.substring(0, sideNoteEnd);
831
        sidenotes.push(contents);
832
        strProcessing = remainingStr.substring(sideNoteEnd + 1);
833
        sidenotePos = strProcessing.indexOf(sideNoteMarker);
834
      } else {
835
        strOutput.push(remainingStr);
836
      }
837
    }
838

839
    // Ensure that we inject sidenotes after the longtable
840
    const endTable = endMarker;
841
    const endPos = strProcessing.indexOf(endTable);
842
    const prefix = strProcessing.substring(0, endPos + endTable.length);
843
    const suffix = strProcessing.substring(
844
      endPos + endTable.length,
845
      strProcessing.length,
846
    );
847

848
    strOutput.push(prefix);
849
    for (const note of sidenotes) {
850
      strOutput.push(`\\sidenotetext{${note}}\n`);
851
    }
852
    if (suffix) {
853
      strOutput.push(suffix);
854
    }
855

856
    return strOutput.join("");
857
  };
858
};
859

860
const processLongTableSidenotes = processSideNotes("\\end{longtable}");
861
const processTableSidenotes = processSideNotes("\\end{table}");
862

863
const sideNoteProcessor = (
864
  beginRegex: RegExp,
865
  endRegex: RegExp,
866
  callback: (str: string) => string,
867
) => {
868
  return () => {
869
    let state: "scanning" | "capturing" = "scanning";
870
    let capturedLines: string[] = [];
871
    return (line: string): string | undefined => {
872
      switch (state) {
873
        case "scanning":
874
          if (line.match(beginRegex)) {
875
            state = "capturing";
876
            capturedLines = [line];
877
            return undefined;
878
          } else {
879
            return line;
880
          }
881
        case "capturing":
882
          capturedLines.push(line);
883
          if (line.match(endRegex)) {
884
            state = "scanning";
885

886
            // read the whole figure and clear any capture state
887
            const lines = capturedLines.join("\n");
888
            capturedLines = [];
889

890
            // Process the captions and relocate footnotes
891
            return callback(lines);
892
          } else {
893
            return undefined;
894
          }
895
      }
896
    };
897
  };
898
};
899
const longTableSidenoteProcessor = sideNoteProcessor(
900
  /^\\begin{longtable}.*$/,
901
  /^\\end{longtable}%*$/,
902
  processLongTableSidenotes,
903
);
904

905
const tableSidenoteProcessor = sideNoteProcessor(
906
  /^\\begin{table}.*$/,
907
  /^\\end{table}%*$/,
908
  processTableSidenotes,
909
);
910

911
const calloutFloatHoldLineProcessor = () => {
912
  let state: "scanning" | "replacing" = "scanning";
913
  return (line: string): string | undefined => {
914
    switch (state) {
915
      case "scanning":
916
        if (line.match(/^\\begin{tcolorbox}/)) {
917
          state = "replacing";
918
          return line;
919
        } else {
920
          return line;
921
        }
922

923
      case "replacing":
924
        if (line.match(/^\\end{tcolorbox}/)) {
925
          state = "scanning";
926
          return line;
927
        } else if (line.match(/^\\begin{figure}$/)) {
928
          return "\\begin{figure}[H]";
929
        } else if (line.match(/^\\begin{codelisting}$/)) {
930
          return "\\begin{codelisting}[H]";
931
        } else {
932
          return line;
933
        }
934
    }
935
  };
936
};
937

938
const kQuartoBibPlaceholderRegex = "%bib-loc-124C8010";
939
const bibLatexBibligraphyRefsDivProcessor = () => {
940
  let hasRefsDiv = false;
941
  return (line: string): string | undefined => {
942
    if (line === kQuartoBibPlaceholderRegex) {
943
      if (!hasRefsDiv) {
944
        hasRefsDiv = true;
945
        return "\\printbibliography[heading=none]";
946
      } else {
947
        // already seen a refs div, just ignore this one
948
        return undefined;
949
      }
950
    } else if (hasRefsDiv && line.match(/^\\printbibliography$/)) {
951
      return undefined;
952
    } else {
953
      return line;
954
    }
955
  };
956
};
957

958
const natbibBibligraphyRefsDivProcessor = (bibs?: string[]) => {
959
  let hasRefsDiv = false;
960
  return (line: string): string | undefined => {
961
    if (line === kQuartoBibPlaceholderRegex) {
962
      if (bibs && !hasRefsDiv) {
963
        hasRefsDiv = true;
964
        return `\\renewcommand{\\bibsection}{}\n\\bibliography{${
965
          bibs.join(",")
966
        }}`;
967
      } else {
968
        // already seen a refs div, just ignore this one
969
        return undefined;
970
      }
971
    } else if (hasRefsDiv && line.match(/^\s*\\bibliography{.*}$/)) {
972
      return undefined;
973
    } else {
974
      return line;
975
    }
976
  };
977
};
978

979
// Removes the biblatex \printbibiliography command
980
const suppressBibLatexBibliographyLineProcessor = () => {
981
  return (line: string): string | undefined => {
982
    if (line.match(/^\\printbibliography$/)) {
983
      return "";
984
    }
985
    return line;
986
  };
987
};
988

989
// Replaces the natbib bibligography declaration with a version
990
// that will not be printed in the PDF
991
const suppressNatbibBibliographyLineProcessor = () => {
992
  return (line: string): string | undefined => {
993
    return line.replace(/^\s*\\bibliography{(.*)}$/, (_match, bib) => {
994
      return `\\newsavebox\\mytempbib
995
\\savebox\\mytempbib{\\parbox{\\textwidth}{\\bibliography{${bib}}}}`;
996
    });
997
  };
998
};
999

1000
// {?quarto-cite:(id)}
1001
const kQuartoCiteRegex = /{\?quarto-cite:(.*?)}/g;
1002
const bibLatexCiteLineProcessor = () => {
1003
  return (line: string): string | undefined => {
1004
    return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1005
      return `\\fullcite{${citeKey}}`;
1006
    });
1007
  };
1008
};
1009

1010
const natbibCiteLineProcessor = () => {
1011
  return (line: string): string | undefined => {
1012
    return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1013
      return `\\bibentry{${citeKey}}`;
1014
    });
1015
  };
1016
};
1017

1018
const sideNoteLineProcessor = () => {
1019
  return (line: string): string | undefined => {
1020
    return line.replaceAll(/\\footnote{/g, "\\sidenote{\\footnotesize ");
1021
  };
1022
};
1023

1024
const longtableBottomCaptionProcessor = () => {
1025
  let scanning = false;
1026
  let capturing = false;
1027
  let caption: string | undefined;
1028

1029
  return (line: string): string | undefined => {
1030
    const isEndOfDocument = !!line.match(/^\\end{document}/);
1031
    if (isEndOfDocument && caption) {
1032
      return `${caption}\n${line}`;
1033
    } else if (scanning) {
1034
      // look for a caption line
1035
      if (capturing) {
1036
        caption = `${caption}\n${line}`;
1037
        capturing = !line.match(/\\tabularnewline$/);
1038
        return undefined;
1039
      } else {
1040
        if (
1041
          line.match(/^\\caption.*?\\tabularnewline$/) ||
1042
          line.match(/^\\caption{.*}\\\\$/)
1043
        ) {
1044
          caption = line;
1045
          return undefined;
1046
        } else if (line.match(/^\\caption.*?/)) {
1047
          caption = line;
1048
          capturing = true;
1049
          return undefined;
1050
        } else if (line.match(/^\\endlastfoot/) && caption) {
1051
          line = `\\tabularnewline\n${caption}\n${line}`;
1052
          caption = undefined;
1053
          return line;
1054
        } else if (line.match(/^\\end{longtable}$/)) {
1055
          scanning = false;
1056
          if (caption) {
1057
            line = caption + "\n" + line;
1058
            caption = undefined;
1059
            return line;
1060
          }
1061
        }
1062
      }
1063
    } else {
1064
      scanning = !!line.match(/^\\begin{longtable}/);
1065
    }
1066

1067
    return line;
1068
  };
1069
};
1070

1071
const kChapterRefNameRegex = /^\\chapter\*?{(.*?)}\\label{references.*?}$/;
1072
const cleanReferencesChapter = () => {
1073
  let refChapterName: string | undefined;
1074
  let refChapterContentsRegex: RegExp | undefined;
1075
  let refChapterMarkRegex: RegExp | undefined;
1076

1077
  return (line: string): string | undefined => {
1078
    const chapterRefMatch = line.match(kChapterRefNameRegex);
1079
    if (chapterRefMatch) {
1080
      refChapterName = chapterRefMatch[1];
1081
      refChapterContentsRegex = new RegExp(
1082
        `\\\\addcontentsline{toc}{chapter}{${refChapterName}}`,
1083
      );
1084
      refChapterMarkRegex = new RegExp(
1085
        `\\\\markboth{${refChapterName}}{${refChapterName}}`,
1086
      );
1087
      // Eat this line
1088
      return undefined;
1089
    } else if (refChapterContentsRegex && line.match(refChapterContentsRegex)) {
1090
      // Eat this line
1091
      return undefined;
1092
    } else if (refChapterMarkRegex && line.match(refChapterMarkRegex)) {
1093
      // Eat this line
1094
      return undefined;
1095
    }
1096
    return line;
1097
  };
1098
};
1099

1100
const indexAndSuppressPandocBibliography = (
1101
  renderedCites: Record<string, string[]>,
1102
) => {
1103
  let readingBibliography = false;
1104
  let currentCiteKey: string | undefined = undefined;
1105

1106
  return (line: string): string | undefined => {
1107
    if (
1108
      !readingBibliography &&
1109
      line.match(/^(\\protect)?\\phantomsection\\label{refs}$/)
1110
    ) {
1111
      readingBibliography = true;
1112
      return undefined;
1113
    } else if (readingBibliography && line.match(/^\\end{CSLReferences}$/)) {
1114
      readingBibliography = false;
1115
      return undefined;
1116
    } else if (readingBibliography) {
1117
      const matches = line.match(/\\bibitem\[\\citeproctext\]{ref\-(.*?)}/);
1118
      if (matches && matches[1]) {
1119
        currentCiteKey = matches[1];
1120
        renderedCites[currentCiteKey] = [line];
1121
      } else if (line.length === 0) {
1122
        currentCiteKey = undefined;
1123
      } else if (currentCiteKey) {
1124
        renderedCites[currentCiteKey].push(line);
1125
      }
1126
    }
1127

1128
    if (readingBibliography) {
1129
      return undefined;
1130
    } else {
1131
      return line;
1132
    }
1133
  };
1134
};
1135

1136
const kInSideCaptionRegex = /^\\sidecaption{/;
1137
const kBeginFigureRegex = /^\\begin{figure}\[.*?\]$/;
1138
const kEndFigureRegex = /^\\end{figure}\%?$/;
1139

1140
const placePandocBibliographyEntries = (
1141
  renderedCites: Record<string, string[]>,
1142
) => {
1143
  let biblioEntryState: "scanning" | "in-figure" | "in-sidecaption" =
1144
    "scanning";
1145
  let pendingCiteKeys: string[] = [];
1146

1147
  return (line: string): string | undefined => {
1148
    switch (biblioEntryState) {
1149
      case "scanning": {
1150
        if (line.match(kBeginFigureRegex)) {
1151
          biblioEntryState = "in-figure";
1152
        }
1153
        break;
1154
      }
1155
      case "in-figure": {
1156
        if (line.match(kInSideCaptionRegex)) {
1157
          biblioEntryState = "in-sidecaption";
1158
        } else {
1159
          if (line.match(kEndFigureRegex)) {
1160
            biblioEntryState = "scanning";
1161
          }
1162
        }
1163
        break;
1164
      }
1165
      case "in-sidecaption": {
1166
        if (line.match(kEndFigureRegex)) {
1167
          biblioEntryState = "scanning";
1168
        }
1169
        break;
1170
      }
1171
      default:
1172
        break;
1173
    }
1174

1175
    if (biblioEntryState === "scanning" && pendingCiteKeys.length > 0) {
1176
      const result = [
1177
        line,
1178
        "\n\\begin{CSLReferences}{2}{0}",
1179
        ...pendingCiteKeys,
1180
        "\\end{CSLReferences}\n",
1181
      ].join("\n");
1182
      pendingCiteKeys = [];
1183
      return result;
1184
    }
1185

1186
    return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1187
      const citeLines = renderedCites[citeKey];
1188
      if (citeLines) {
1189
        if (biblioEntryState === "in-sidecaption" && citeLines.length > 0) {
1190
          pendingCiteKeys.push(citeLines[0]);
1191
          return ["", ...citeLines.slice(1)].join("\n");
1192
        } else {
1193
          return [
1194
            "\n\\begin{CSLReferences}{2}{0}",
1195
            ...citeLines,
1196
            "\\end{CSLReferences}\n",
1197
          ].join("\n");
1198
        }
1199
      } else {
1200
        return citeKey;
1201
      }
1202
    });
1203
  };
1204
};
1205

1206
const kCodeAnnotationRegex =
1207
  /(.*)\\CommentTok\{(.*?)[^\s]+? +\\textless\{\}(\d+)\\textgreater\{\}.*\}$/gm;
1208
const kCodePlainAnnotationRegex = /(.*)% \((\d+)\)$/g;
1209
const codeAnnotationPostProcessor = () => {
1210
  let lastAnnotation: string | undefined;
1211

1212
  return (line: string): string | undefined => {
1213
    if (line === "\\begin{Shaded}") {
1214
      lastAnnotation = undefined;
1215
    }
1216

1217
    // Replace colorized code
1218
    line = line.replaceAll(
1219
      kCodeAnnotationRegex,
1220
      (_match, prefix: string, comment: string, annotationNumber: string) => {
1221
        if (annotationNumber !== lastAnnotation) {
1222
          lastAnnotation = annotationNumber;
1223
          if (comment.length > 0) {
1224
            // There is something else inside the comment line so
1225
            // We need to recreate the comment line without the annotation
1226
            prefix = `${prefix}\\CommentTok\{${comment}\}`;
1227
          }
1228
          return `${prefix}\\hspace*{\\fill}\\NormalTok{\\circled{${annotationNumber}}}`;
1229
        } else {
1230
          return `${prefix}`;
1231
        }
1232
      },
1233
    );
1234

1235
    // Replace plain code
1236
    line = line.replaceAll(
1237
      kCodePlainAnnotationRegex,
1238
      (_match, prefix: string, annotationNumber: string) => {
1239
        if (annotationNumber !== lastAnnotation) {
1240
          lastAnnotation = annotationNumber;
1241

1242
          const replaceValue = `(${annotationNumber})`;
1243
          const paddingNumber = Math.max(
1244
            0,
1245
            75 - prefix.length - replaceValue.length,
1246
          );
1247
          const padding = " ".repeat(paddingNumber);
1248
          return `${prefix}${padding}${replaceValue}`;
1249
        } else {
1250
          return `${prefix}`;
1251
        }
1252
      },
1253
    );
1254

1255
    return line;
1256
  };
1257
};
1258

1259
const kListAnnotationRegex = /(.*)5CB6E08D-list-annote-(\d+)(.*)/g;
1260
const codeListAnnotationPostProcessor = () => {
1261
  return (line: string): string | undefined => {
1262
    return line.replaceAll(
1263
      kListAnnotationRegex,
1264
      (_match, prefix: string, annotationNumber: string, suffix: string) => {
1265
        return `${prefix}\\circled{${annotationNumber}}${suffix}`;
1266
      },
1267
    );
1268
  };
1269
};
1270

1271
const kbeginLongTablesideCap = `{
1272
\\makeatletter
1273
\\def\\LT@makecaption#1#2#3{%
1274
  \\noalign{\\smash{\\hbox{\\kern\\textwidth\\rlap{\\kern\\marginparsep
1275
  \\parbox[t]{\\marginparwidth}{%
1276
    \\footnotesize{%
1277
      \\vspace{(1.1\\baselineskip)}
1278
    #1{#2: }\\ignorespaces #3}}}}}}%
1279
    }
1280
\\makeatother`;
1281

1282
const kEndLongTableSideCap = "}";
1283

1284
// LaTeX-supported PDF standards (from latex3/latex2e DocumentMetadata)
1285
// See: https://github.com/latex3/latex2e - documentmetadata-support.dtx
1286
const kLatexSupportedStandards = new Set([
1287
  // PDF/A standards (note: a-1a is NOT supported, only a-1b)
1288
  "a-1b",
1289
  "a-2a",
1290
  "a-2b",
1291
  "a-2u",
1292
  "a-3a",
1293
  "a-3b",
1294
  "a-3u",
1295
  "a-4",
1296
  "a-4e",
1297
  "a-4f",
1298
  // PDF/X standards
1299
  "x-4",
1300
  "x-4p",
1301
  "x-5g",
1302
  "x-5n",
1303
  "x-5pg",
1304
  "x-6",
1305
  "x-6n",
1306
  "x-6p",
1307
  // PDF/UA standards (only ua-2 is supported by LaTeX)
1308
  "ua-2",
1309
]);
1310

1311
// Standards that require PDF tagging (document structure)
1312
// - PDF/A level "a" variants require tagged structure per PDF/A spec
1313
// - PDF/UA standards require tagging for universal accessibility
1314
//   (LaTeX does NOT automatically enable tagging for UA standards)
1315
const kTaggingRequiredStandards = new Set([
1316
  "a-2a",
1317
  "a-3a",
1318
  "ua-1",
1319
  "ua-2",
1320
]);
1321

1322
const kVersionPattern = /^(1\.[4-7]|2\.0)$/;
1323

1324
// PDF version required by each standard (maximum version limits)
1325
// LaTeX defaults to PDF 2.0 with \DocumentMetadata, but some standards
1326
// have maximum version requirements that are incompatible with 2.0
1327
// Note: a-1a is intentionally omitted as LaTeX doesn't support it
1328
const kStandardRequiredVersion: Record<string, string> = {
1329
  // PDF/A-1 requires exactly PDF 1.4 (only a-1b supported by LaTeX)
1330
  "a-1b": "1.4",
1331
  // PDF/A-2 and PDF/A-3 have maximum version of 1.7
1332
  "a-2a": "1.7",
1333
  "a-2b": "1.7",
1334
  "a-2u": "1.7",
1335
  "a-3a": "1.7",
1336
  "a-3b": "1.7",
1337
  "a-3u": "1.7",
1338
  // PDF/A-4, PDF/UA-1, PDF/UA-2 all work with PDF 2.0 (the default)
1339
};
1340

1341
function normalizePdfStandardForLatex(
1342
  standards: unknown[],
1343
): { version?: string; standards: string[]; needsTagging: boolean } {
1344
  let version: string | undefined;
1345
  const result: string[] = [];
1346
  let needsTagging = false;
1347

1348
  for (const s of standards) {
1349
    // Convert to string - YAML may parse versions like 2.0 as integer 2
1350
    let str: string;
1351
    if (typeof s === "number") {
1352
      // Handle YAML numeric parsing: integer 2 -> "2.0", float 1.4 -> "1.4"
1353
      str = Number.isInteger(s) ? `${s}.0` : String(s);
1354
    } else if (typeof s === "string") {
1355
      str = s;
1356
    } else {
1357
      continue;
1358
    }
1359
    // Normalize: lowercase, remove any "pdf" prefix
1360
    const normalized = str.toLowerCase().replace(/^pdf[/-]?/, "");
1361

1362
    if (kVersionPattern.test(normalized)) {
1363
      // Use first explicit version (ignore subsequent ones)
1364
      if (!version) {
1365
        version = normalized;
1366
      }
1367
    } else if (kLatexSupportedStandards.has(normalized)) {
1368
      // LaTeX is case-insensitive, pass through lowercase
1369
      result.push(normalized);
1370
      // Check if this standard requires tagging
1371
      if (kTaggingRequiredStandards.has(normalized)) {
1372
        needsTagging = true;
1373
      }
1374
      // Infer required PDF version from standard (if not explicitly set)
1375
      if (!version && kStandardRequiredVersion[normalized]) {
1376
        version = kStandardRequiredVersion[normalized];
1377
      }
1378
    } else {
1379
      warning(
1380
        `PDF standard '${s}' is not supported by LaTeX and will be ignored`,
1381
      );
1382
    }
1383
  }
1384

1385
  return { version, standards: result, needsTagging };
1386
}
1387

1388
Product

Resources

Company