CoCalc -- util.ts

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/sync-fs/lib/util.ts
Views: ⁶⁸⁷
1
import { dynamicImport } from "tsimportlib";
2
import { readdir, rm } from "fs/promises";
3
import { dirname, join } from "path";
4
import { exists } from "@cocalc/backend/misc/async-utils-node";
5
import { createEncoderStream } from "lz4";
6
import { Readable } from "stream";
7
import { createWriteStream } from "fs";
8

9
import getLogger from "@cocalc/backend/logger";
10
const log = getLogger("sync-fs:util").debug;
11

12
export async function execa(cmd, args, options?) {
13
  log("execa", cmd, "...", args?.slice(-15)?.join(" "), options);
14
  const { execa: execa0 } = (await dynamicImport(
15
    "execa",
16
    module,
17
  )) as typeof import("execa");
18
  return await execa0(cmd, args, options);
19
}
20

21
// IMPORTANT: top level hidden subdirectories in path are always ignored, e.g.,
22
// if path is /home/user, then /home/user/.foo is ignored, but /home/user/bar/.foo
23
// is not ignored.
24
export async function metadataFile({
25
  path,
26
  exclude,
27
}: {
28
  path: string;
29
  exclude: string[];
30
}): Promise<string> {
31
  log("mtimeDirTree", path, exclude);
32
  if (!(await exists(path))) {
33
    return "";
34
  }
35
  // Recursively get enough metadata about all non-hidden top level path trees
36
  // (this is VASTLY more efficient
37
  // than "find . ...", especially on cocalc with it's fuse mounted .snapshots!)
38
  // Notes about the find output option to printf:
39
  // - We use null characters as separators because they are the ONLY character
40
  //   that isn't allowed in a filename (besides '/')! Filenames can have newlines
41
  //   in them!
42
  //   BUT -- we are assuming filenames can be encoded as utf8; if not, sync will
43
  //   obviously not work.
44
  // - The find output contains more than just what is needed for mtimeDirTree; it contains
45
  //   everything needed by websocketfs for doing stat, i.e., this output is used
46
  //   for the metadataFile functionality of websocketfs.
47
  // - Just a little fact -- output from find is NOT sorted in any guaranteed way.
48
  // Y2K alert -- note the %.10T below truncates times to integers, and will I guess fail a few hundred years from now.
49
  const topPaths = (await readdir(path)).filter(
50
    (p) => !p.startsWith(".") && !exclude.includes(p),
51
  );
52
  const { stdout } = await execa(
53
    "find",
54
    topPaths.concat([
55
      // This '-not -readable -prune -o ' excludes directories that we can read, since there is no possible
56
      // way to sync them, and a user (not root) might not be able to fix this.  See
57
      // https://stackoverflow.com/questions/762348/how-can-i-exclude-all-permission-denied-messages-from-find/25234419#25234419
58
      "-not",
59
      "-readable",
60
      "-prune",
61
      "-o",
62
      ...findExclude(exclude),
63
      "-printf",
64
      "%p\\0%.10T@ %.10A@ %b %s %M\\0\\0",
65
    ]),
66
    {
67
      cwd: path,
68
    },
69
  );
70
  return stdout;
71
}
72

73
// Compute the map from paths to their integral mtime for the entire directory tree
74
// NOTE: this could also be done with the walkdir library, but using find
75
// is several times faster in general. This is *the* bottleneck, and the
76
// subprocess IO isn't much, so calling find as a subprocess is the right
77
// solution!  This is not a hack at all.
78
// IMPORTANT: top level hidden subdirectories in path are always ignored
79
export async function mtimeDirTree({
80
  path,
81
  exclude,
82
  metadataFile,
83
}: {
84
  path: string;
85
  exclude: string[];
86
  metadataFile?: string;
87
}): Promise<{ [path: string]: number }> {
88
  log("mtimeDirTree", path, exclude);
89
  if (!(await exists(path))) {
90
    return {};
91
  }
92
  // If the string metadataFile is passed in (as output from metadataFile), then we use that
93
  // If it isn't, then we compute just what is needed here.
94
  if (metadataFile == null) {
95
    const topPaths = (await readdir(path)).filter(
96
      (p) => !p.startsWith(".") && !exclude.includes(p),
97
    );
98
    const args = topPaths.concat([
99
      "-not", // '-not -readable -prune -o' - see comment in metadataFile
100
      "-readable",
101
      "-prune",
102
      "-o",
103
      ...findExclude(exclude),
104
      "-printf",
105
      "%p\\0%T@\\0\\0",
106
    ]);
107
    const { stdout } = await execa("find", [...args], {
108
      cwd: path,
109
    });
110
    metadataFile = stdout;
111
  }
112
  const c: { [path: string]: number } = {};
113
  const v = metadataFile.split("\0\0");
114
  for (const record of v) {
115
    if (!record) continue; // trailing blank line of file
116
    const [path, meta] = record.split("\0");
117
    if (path.startsWith(".")) {
118
      // never include top level hidden paths, if they are there for some reason.
119
      continue;
120
    }
121
    // NOTE -- GNU tar discards fractional part of timestamp, thus rounding down,
122
    // so this is right, since we will use tar for sending files.
123
    c["./" + path] = parseInt(meta.split(" ")[0]);
124
  }
125
  return c;
126
}
127

128
function findExclude(exclude: string[]): string[] {
129
  const v: string[] = [];
130
  // We run "find *", not "find .", so no need to exclude hidden files here.
131
  // Also, doing it here instead of with "find *" is massively slower in general!
132
  for (const path of exclude) {
133
    v.push("-not");
134
    v.push("-path");
135
    v.push(path);
136
    v.push("-not");
137
    v.push("-path");
138
    v.push(`${path}/*`);
139
  }
140
  return v;
141
}
142

143
export async function remove(paths: string[], rel?: string) {
144
  if (!rel) {
145
    throw Error("rel must be defined");
146
  }
147
  // TODO/guess -- by sorting we remove files in directory, then containing directory (?).
148
  for (const path of paths.sort().reverse()) {
149
    try {
150
      await rm(join(rel, path), { recursive: true });
151
    } catch (err) {
152
      log(`WARNING: issue removing '${path}' -- ${err}`);
153
    }
154
  }
155
}
156

157
export async function writeFileLz4(path: string, contents: string) {
158
  // We use a stream instead of blocking in process for compression
159
  // because this code is likely to run in the project's daemon,
160
  // and blocking here would block interactive functionality such
161
  // as terminals.
162

163
  // Create readable stream from the input.
164
  const input = new Readable({
165
    read() {
166
      this.push(contents);
167
      this.push(null);
168
    },
169
  });
170
  // lz4 compression encoder
171
  const encoder = createEncoderStream();
172
  const output = createWriteStream(path);
173
  // start writing
174
  input.pipe(encoder).pipe(output);
175
  // wait until done
176
  const waitForFinish = new Promise((resolve, reject) => {
177
    encoder.on("error", reject);
178
    output.on("finish", resolve);
179
    output.on("error", reject);
180
  });
181
  await waitForFinish;
182
}
183

184
/*
185
Given an array paths of relative paths (relative to my HOME directory),
186
the function parseCommonPrefixes outputs an array of objects
187

188
{prefix:string; paths:string[]}
189

190
where prefix is a common path prefix of all the paths, and paths is what
191
is after that prefix.  Thus if the output is x, then
192

193
join(x[0].prefix, x[0].paths[0]), join(x[0].prefix, x[0].paths[1]), ..., join(x[x.length-1].prefix, x[x.length-1].paths[0]), ...
194

195
is exactly the original input string[] paths.
196
*/
197

198
export function parseCommonPrefixes(
199
  paths: string[],
200
): { prefix: string; paths: string[] }[] {
201
  // This function will slice the sorted path list into groups of paths having
202
  // the same prefix, create an object that contains the prefix and the rest of the
203
  // path for each group, and collect these objects into the result array. The rest
204
  // of the path is created by slicing the common prefix from the absolute path and
205
  // prepending '.' to get the relative path.
206

207
  // sort the paths to group common prefixes
208
  const sortedPaths = paths.slice().sort();
209
  const result: { prefix: string; paths: string[] }[] = [];
210

211
  let i = 0;
212
  while (i < sortedPaths.length) {
213
    const commonPrefix = dirname(sortedPaths[i]);
214
    let j = i + 1;
215

216
    // count the same prefixes
217
    while (j < sortedPaths.length && dirname(sortedPaths[j]) == commonPrefix) {
218
      j++;
219
    }
220

221
    // slice the paths with the same prefix and remove the prefix
222
    const subPaths = sortedPaths
223
      .slice(i, j)
224
      .map((p) => "." + p.slice(commonPrefix.length));
225

226
    result.push({ prefix: commonPrefix, paths: subPaths });
227

228
    i = j;
229
  }
230

231
  return result;
232
}
233

234
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.