CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/sync-fs/lib/util.ts
Views: 687
1
import { dynamicImport } from "tsimportlib";
2
import { readdir, rm } from "fs/promises";
3
import { dirname, join } from "path";
4
import { exists } from "@cocalc/backend/misc/async-utils-node";
5
import { createEncoderStream } from "lz4";
6
import { Readable } from "stream";
7
import { createWriteStream } from "fs";
8
9
import getLogger from "@cocalc/backend/logger";
10
const log = getLogger("sync-fs:util").debug;
11
12
export async function execa(cmd, args, options?) {
13
log("execa", cmd, "...", args?.slice(-15)?.join(" "), options);
14
const { execa: execa0 } = (await dynamicImport(
15
"execa",
16
module,
17
)) as typeof import("execa");
18
return await execa0(cmd, args, options);
19
}
20
21
// IMPORTANT: top level hidden subdirectories in path are always ignored, e.g.,
22
// if path is /home/user, then /home/user/.foo is ignored, but /home/user/bar/.foo
23
// is not ignored.
24
export async function metadataFile({
25
path,
26
exclude,
27
}: {
28
path: string;
29
exclude: string[];
30
}): Promise<string> {
31
log("mtimeDirTree", path, exclude);
32
if (!(await exists(path))) {
33
return "";
34
}
35
// Recursively get enough metadata about all non-hidden top level path trees
36
// (this is VASTLY more efficient
37
// than "find . ...", especially on cocalc with it's fuse mounted .snapshots!)
38
// Notes about the find output option to printf:
39
// - We use null characters as separators because they are the ONLY character
40
// that isn't allowed in a filename (besides '/')! Filenames can have newlines
41
// in them!
42
// BUT -- we are assuming filenames can be encoded as utf8; if not, sync will
43
// obviously not work.
44
// - The find output contains more than just what is needed for mtimeDirTree; it contains
45
// everything needed by websocketfs for doing stat, i.e., this output is used
46
// for the metadataFile functionality of websocketfs.
47
// - Just a little fact -- output from find is NOT sorted in any guaranteed way.
48
// Y2K alert -- note the %.10T below truncates times to integers, and will I guess fail a few hundred years from now.
49
const topPaths = (await readdir(path)).filter(
50
(p) => !p.startsWith(".") && !exclude.includes(p),
51
);
52
const { stdout } = await execa(
53
"find",
54
topPaths.concat([
55
// This '-not -readable -prune -o ' excludes directories that we can read, since there is no possible
56
// way to sync them, and a user (not root) might not be able to fix this. See
57
// https://stackoverflow.com/questions/762348/how-can-i-exclude-all-permission-denied-messages-from-find/25234419#25234419
58
"-not",
59
"-readable",
60
"-prune",
61
"-o",
62
...findExclude(exclude),
63
"-printf",
64
"%p\\0%.10T@ %.10A@ %b %s %M\\0\\0",
65
]),
66
{
67
cwd: path,
68
},
69
);
70
return stdout;
71
}
72
73
// Compute the map from paths to their integral mtime for the entire directory tree
74
// NOTE: this could also be done with the walkdir library, but using find
75
// is several times faster in general. This is *the* bottleneck, and the
76
// subprocess IO isn't much, so calling find as a subprocess is the right
77
// solution! This is not a hack at all.
78
// IMPORTANT: top level hidden subdirectories in path are always ignored
79
export async function mtimeDirTree({
80
path,
81
exclude,
82
metadataFile,
83
}: {
84
path: string;
85
exclude: string[];
86
metadataFile?: string;
87
}): Promise<{ [path: string]: number }> {
88
log("mtimeDirTree", path, exclude);
89
if (!(await exists(path))) {
90
return {};
91
}
92
// If the string metadataFile is passed in (as output from metadataFile), then we use that
93
// If it isn't, then we compute just what is needed here.
94
if (metadataFile == null) {
95
const topPaths = (await readdir(path)).filter(
96
(p) => !p.startsWith(".") && !exclude.includes(p),
97
);
98
const args = topPaths.concat([
99
"-not", // '-not -readable -prune -o' - see comment in metadataFile
100
"-readable",
101
"-prune",
102
"-o",
103
...findExclude(exclude),
104
"-printf",
105
"%p\\0%T@\\0\\0",
106
]);
107
const { stdout } = await execa("find", [...args], {
108
cwd: path,
109
});
110
metadataFile = stdout;
111
}
112
const c: { [path: string]: number } = {};
113
const v = metadataFile.split("\0\0");
114
for (const record of v) {
115
if (!record) continue; // trailing blank line of file
116
const [path, meta] = record.split("\0");
117
if (path.startsWith(".")) {
118
// never include top level hidden paths, if they are there for some reason.
119
continue;
120
}
121
// NOTE -- GNU tar discards fractional part of timestamp, thus rounding down,
122
// so this is right, since we will use tar for sending files.
123
c["./" + path] = parseInt(meta.split(" ")[0]);
124
}
125
return c;
126
}
127
128
function findExclude(exclude: string[]): string[] {
129
const v: string[] = [];
130
// We run "find *", not "find .", so no need to exclude hidden files here.
131
// Also, doing it here instead of with "find *" is massively slower in general!
132
for (const path of exclude) {
133
v.push("-not");
134
v.push("-path");
135
v.push(path);
136
v.push("-not");
137
v.push("-path");
138
v.push(`${path}/*`);
139
}
140
return v;
141
}
142
143
export async function remove(paths: string[], rel?: string) {
144
if (!rel) {
145
throw Error("rel must be defined");
146
}
147
// TODO/guess -- by sorting we remove files in directory, then containing directory (?).
148
for (const path of paths.sort().reverse()) {
149
try {
150
await rm(join(rel, path), { recursive: true });
151
} catch (err) {
152
log(`WARNING: issue removing '${path}' -- ${err}`);
153
}
154
}
155
}
156
157
export async function writeFileLz4(path: string, contents: string) {
158
// We use a stream instead of blocking in process for compression
159
// because this code is likely to run in the project's daemon,
160
// and blocking here would block interactive functionality such
161
// as terminals.
162
163
// Create readable stream from the input.
164
const input = new Readable({
165
read() {
166
this.push(contents);
167
this.push(null);
168
},
169
});
170
// lz4 compression encoder
171
const encoder = createEncoderStream();
172
const output = createWriteStream(path);
173
// start writing
174
input.pipe(encoder).pipe(output);
175
// wait until done
176
const waitForFinish = new Promise((resolve, reject) => {
177
encoder.on("error", reject);
178
output.on("finish", resolve);
179
output.on("error", reject);
180
});
181
await waitForFinish;
182
}
183
184
/*
185
Given an array paths of relative paths (relative to my HOME directory),
186
the function parseCommonPrefixes outputs an array of objects
187
188
{prefix:string; paths:string[]}
189
190
where prefix is a common path prefix of all the paths, and paths is what
191
is after that prefix. Thus if the output is x, then
192
193
join(x[0].prefix, x[0].paths[0]), join(x[0].prefix, x[0].paths[1]), ..., join(x[x.length-1].prefix, x[x.length-1].paths[0]), ...
194
195
is exactly the original input string[] paths.
196
*/
197
198
export function parseCommonPrefixes(
199
paths: string[],
200
): { prefix: string; paths: string[] }[] {
201
// This function will slice the sorted path list into groups of paths having
202
// the same prefix, create an object that contains the prefix and the rest of the
203
// path for each group, and collect these objects into the result array. The rest
204
// of the path is created by slicing the common prefix from the absolute path and
205
// prepending '.' to get the relative path.
206
207
// sort the paths to group common prefixes
208
const sortedPaths = paths.slice().sort();
209
const result: { prefix: string; paths: string[] }[] = [];
210
211
let i = 0;
212
while (i < sortedPaths.length) {
213
const commonPrefix = dirname(sortedPaths[i]);
214
let j = i + 1;
215
216
// count the same prefixes
217
while (j < sortedPaths.length && dirname(sortedPaths[j]) == commonPrefix) {
218
j++;
219
}
220
221
// slice the paths with the same prefix and remove the prefix
222
const subPaths = sortedPaths
223
.slice(i, j)
224
.map((p) => "." + p.slice(commonPrefix.length));
225
226
result.push({ prefix: commonPrefix, paths: subPaths });
227
228
i = j;
229
}
230
231
return result;
232
}
233
234