Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/sync-fs/lib/util.ts
Views: 687
import { dynamicImport } from "tsimportlib";1import { readdir, rm } from "fs/promises";2import { dirname, join } from "path";3import { exists } from "@cocalc/backend/misc/async-utils-node";4import { createEncoderStream } from "lz4";5import { Readable } from "stream";6import { createWriteStream } from "fs";78import getLogger from "@cocalc/backend/logger";9const log = getLogger("sync-fs:util").debug;1011export async function execa(cmd, args, options?) {12log("execa", cmd, "...", args?.slice(-15)?.join(" "), options);13const { execa: execa0 } = (await dynamicImport(14"execa",15module,16)) as typeof import("execa");17return await execa0(cmd, args, options);18}1920// IMPORTANT: top level hidden subdirectories in path are always ignored, e.g.,21// if path is /home/user, then /home/user/.foo is ignored, but /home/user/bar/.foo22// is not ignored.23export async function metadataFile({24path,25exclude,26}: {27path: string;28exclude: string[];29}): Promise<string> {30log("mtimeDirTree", path, exclude);31if (!(await exists(path))) {32return "";33}34// Recursively get enough metadata about all non-hidden top level path trees35// (this is VASTLY more efficient36// than "find . ...", especially on cocalc with it's fuse mounted .snapshots!)37// Notes about the find output option to printf:38// - We use null characters as separators because they are the ONLY character39// that isn't allowed in a filename (besides '/')! Filenames can have newlines40// in them!41// BUT -- we are assuming filenames can be encoded as utf8; if not, sync will42// obviously not work.43// - The find output contains more than just what is needed for mtimeDirTree; it contains44// everything needed by websocketfs for doing stat, i.e., this output is used45// for the metadataFile functionality of websocketfs.46// - Just a little fact -- output from find is NOT sorted in any guaranteed way.47// Y2K alert -- note the %.10T below truncates times to integers, and will I guess fail a few hundred years from now.48const topPaths = (await readdir(path)).filter(49(p) => !p.startsWith(".") && !exclude.includes(p),50);51const { stdout } = await execa(52"find",53topPaths.concat([54// This '-not -readable -prune -o ' excludes directories that we can read, since there is no possible55// way to sync them, and a user (not root) might not be able to fix this. See56// https://stackoverflow.com/questions/762348/how-can-i-exclude-all-permission-denied-messages-from-find/25234419#2523441957"-not",58"-readable",59"-prune",60"-o",61...findExclude(exclude),62"-printf",63"%p\\0%.10T@ %.10A@ %b %s %M\\0\\0",64]),65{66cwd: path,67},68);69return stdout;70}7172// Compute the map from paths to their integral mtime for the entire directory tree73// NOTE: this could also be done with the walkdir library, but using find74// is several times faster in general. This is *the* bottleneck, and the75// subprocess IO isn't much, so calling find as a subprocess is the right76// solution! This is not a hack at all.77// IMPORTANT: top level hidden subdirectories in path are always ignored78export async function mtimeDirTree({79path,80exclude,81metadataFile,82}: {83path: string;84exclude: string[];85metadataFile?: string;86}): Promise<{ [path: string]: number }> {87log("mtimeDirTree", path, exclude);88if (!(await exists(path))) {89return {};90}91// If the string metadataFile is passed in (as output from metadataFile), then we use that92// If it isn't, then we compute just what is needed here.93if (metadataFile == null) {94const topPaths = (await readdir(path)).filter(95(p) => !p.startsWith(".") && !exclude.includes(p),96);97const args = topPaths.concat([98"-not", // '-not -readable -prune -o' - see comment in metadataFile99"-readable",100"-prune",101"-o",102...findExclude(exclude),103"-printf",104"%p\\0%T@\\0\\0",105]);106const { stdout } = await execa("find", [...args], {107cwd: path,108});109metadataFile = stdout;110}111const c: { [path: string]: number } = {};112const v = metadataFile.split("\0\0");113for (const record of v) {114if (!record) continue; // trailing blank line of file115const [path, meta] = record.split("\0");116if (path.startsWith(".")) {117// never include top level hidden paths, if they are there for some reason.118continue;119}120// NOTE -- GNU tar discards fractional part of timestamp, thus rounding down,121// so this is right, since we will use tar for sending files.122c["./" + path] = parseInt(meta.split(" ")[0]);123}124return c;125}126127function findExclude(exclude: string[]): string[] {128const v: string[] = [];129// We run "find *", not "find .", so no need to exclude hidden files here.130// Also, doing it here instead of with "find *" is massively slower in general!131for (const path of exclude) {132v.push("-not");133v.push("-path");134v.push(path);135v.push("-not");136v.push("-path");137v.push(`${path}/*`);138}139return v;140}141142export async function remove(paths: string[], rel?: string) {143if (!rel) {144throw Error("rel must be defined");145}146// TODO/guess -- by sorting we remove files in directory, then containing directory (?).147for (const path of paths.sort().reverse()) {148try {149await rm(join(rel, path), { recursive: true });150} catch (err) {151log(`WARNING: issue removing '${path}' -- ${err}`);152}153}154}155156export async function writeFileLz4(path: string, contents: string) {157// We use a stream instead of blocking in process for compression158// because this code is likely to run in the project's daemon,159// and blocking here would block interactive functionality such160// as terminals.161162// Create readable stream from the input.163const input = new Readable({164read() {165this.push(contents);166this.push(null);167},168});169// lz4 compression encoder170const encoder = createEncoderStream();171const output = createWriteStream(path);172// start writing173input.pipe(encoder).pipe(output);174// wait until done175const waitForFinish = new Promise((resolve, reject) => {176encoder.on("error", reject);177output.on("finish", resolve);178output.on("error", reject);179});180await waitForFinish;181}182183/*184Given an array paths of relative paths (relative to my HOME directory),185the function parseCommonPrefixes outputs an array of objects186187{prefix:string; paths:string[]}188189where prefix is a common path prefix of all the paths, and paths is what190is after that prefix. Thus if the output is x, then191192join(x[0].prefix, x[0].paths[0]), join(x[0].prefix, x[0].paths[1]), ..., join(x[x.length-1].prefix, x[x.length-1].paths[0]), ...193194is exactly the original input string[] paths.195*/196197export function parseCommonPrefixes(198paths: string[],199): { prefix: string; paths: string[] }[] {200// This function will slice the sorted path list into groups of paths having201// the same prefix, create an object that contains the prefix and the rest of the202// path for each group, and collect these objects into the result array. The rest203// of the path is created by slicing the common prefix from the absolute path and204// prepending '.' to get the relative path.205206// sort the paths to group common prefixes207const sortedPaths = paths.slice().sort();208const result: { prefix: string; paths: string[] }[] = [];209210let i = 0;211while (i < sortedPaths.length) {212const commonPrefix = dirname(sortedPaths[i]);213let j = i + 1;214215// count the same prefixes216while (j < sortedPaths.length && dirname(sortedPaths[j]) == commonPrefix) {217j++;218}219220// slice the paths with the same prefix and remove the prefix221const subPaths = sortedPaths222.slice(i, j)223.map((p) => "." + p.slice(commonPrefix.length));224225result.push({ prefix: commonPrefix, paths: subPaths });226227i = j;228}229230return result;231}232233234