Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/tests/verify-pdf-metadata.ts
6446 views
1
/*
2
* verify-pdf-metadata.ts
3
*
4
* PDF metadata verification using pdfjs-dist.
5
* Extracts and verifies PDF document metadata (title, author, keywords, etc.).
6
*
7
* Copyright (C) 2020-2025 Posit Software, PBC
8
*/
9
10
import { assert } from "testing/asserts";
11
import { ExecuteOutput, Verify } from "./test.ts";
12
13
// ============================================================================
14
// Type Definitions
15
// ============================================================================
16
17
/**
18
* PDF metadata fields that can be verified.
19
* All fields are optional - only specified fields will be checked.
20
*/
21
export interface PdfMetadataAssertion {
22
title?: string | RegExp;
23
author?: string | RegExp;
24
subject?: string | RegExp;
25
keywords?: string | RegExp | string[];
26
creator?: string | RegExp;
27
producer?: string | RegExp;
28
creationDate?: string | RegExp | Date;
29
modDate?: string | RegExp | Date;
30
}
31
32
// ============================================================================
33
// Helper Functions
34
// ============================================================================
35
36
/**
37
* Match a value against a string, RegExp, or array of strings.
38
*/
39
function matchValue(
40
actual: string | undefined | null,
41
expected: string | RegExp | string[] | Date | undefined,
42
fieldName: string,
43
): string | null {
44
if (expected === undefined) return null;
45
46
const actualStr = actual ?? "";
47
48
if (expected instanceof RegExp) {
49
if (!expected.test(actualStr)) {
50
return `${fieldName}: expected to match ${expected}, got "${actualStr}"`;
51
}
52
} else if (expected instanceof Date) {
53
// For dates, just check if the actual contains the expected date components
54
const expectedStr = expected.toISOString().slice(0, 10); // YYYY-MM-DD
55
if (!actualStr.includes(expectedStr)) {
56
return `${fieldName}: expected to contain date ${expectedStr}, got "${actualStr}"`;
57
}
58
} else if (Array.isArray(expected)) {
59
// For arrays (keywords), check if all expected values are present
60
for (const keyword of expected) {
61
if (!actualStr.toLowerCase().includes(keyword.toLowerCase())) {
62
return `${fieldName}: expected to contain "${keyword}", got "${actualStr}"`;
63
}
64
}
65
} else {
66
// String comparison (case-insensitive contains)
67
if (!actualStr.toLowerCase().includes(expected.toLowerCase())) {
68
return `${fieldName}: expected to contain "${expected}", got "${actualStr}"`;
69
}
70
}
71
72
return null;
73
}
74
75
// ============================================================================
76
// Main Predicate
77
// ============================================================================
78
79
/**
80
* Verify PDF metadata fields match expected values.
81
* Uses pdfjs-dist to extract metadata from PDF files.
82
*
83
* @param file - Path to the PDF file
84
* @param assertions - Metadata fields to verify
85
* @returns Verify object for test framework
86
*/
87
export const ensurePdfMetadata = (
88
file: string,
89
assertions: PdfMetadataAssertion,
90
): Verify => {
91
return {
92
name: `Inspecting ${file} for PDF metadata`,
93
verify: async (_output: ExecuteOutput[]) => {
94
const errors: string[] = [];
95
96
// Load PDF with pdfjs-dist
97
// deno-lint-ignore no-explicit-any
98
const pdfjsLib = await import("pdfjs-dist") as any;
99
const buffer = await Deno.readFile(file);
100
const doc = await pdfjsLib.getDocument({ data: buffer }).promise;
101
102
// Get metadata
103
const { info } = await doc.getMetadata();
104
105
// Verify each specified field
106
const checks = [
107
matchValue(info?.Title, assertions.title, "title"),
108
matchValue(info?.Author, assertions.author, "author"),
109
matchValue(info?.Subject, assertions.subject, "subject"),
110
matchValue(info?.Keywords, assertions.keywords, "keywords"),
111
matchValue(info?.Creator, assertions.creator, "creator"),
112
matchValue(info?.Producer, assertions.producer, "producer"),
113
matchValue(info?.CreationDate, assertions.creationDate, "creationDate"),
114
matchValue(info?.ModDate, assertions.modDate, "modDate"),
115
];
116
117
for (const error of checks) {
118
if (error) {
119
errors.push(error);
120
}
121
}
122
123
// Report errors
124
if (errors.length > 0) {
125
assert(
126
false,
127
`PDF metadata assertions failed in ${file}:\n${errors.map((e, i) => ` ${i + 1}. ${e}`).join("\n")}`,
128
);
129
}
130
},
131
};
132
};
133
134