Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/sync/listings/index.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { delay } from "awaiting";6import { once } from "@cocalc/util/async-utils";7import { SyncTable, SyncTableState } from "@cocalc/sync/table";8import type { TypedMap } from "@cocalc/util/types/typed-map";9import {10close,11merge,12path_split,13startswith,14field_cmp,15seconds_ago,16} from "@cocalc/util/misc";17import type { Listing } from "@cocalc/util/db-schema/listings";18import {19WATCH_TIMEOUT_MS,20MAX_FILES_PER_PATH,21} from "@cocalc/util/db-schema/listings";22import type { EventEmitter } from "events";23import { DirectoryListingEntry } from "@cocalc/util/types";2425// Update directory listing only when file changes stop for at least this long.26// This is important since we don't want to fire off dozens of changes per second,27// e.g., if a logfile is being updated.28const WATCH_DEBOUNCE_MS = parseInt(29process.env.COCALC_FS_WATCH_DEBOUNCE_MS ?? "500",30);3132// See https://github.com/sagemathinc/cocalc/issues/462333// for one reason to put a slight delay in; basically,34// a change could be to delete and then create a file quickly,35// and that confuses our file deletion detection. A light delay36// is OK given our application. No approach like this can37// ever be perfect, of course.38const DELAY_ON_CHANGE_MS = 50;3940// Watch directories for which some client has shown interest recently:41const INTEREST_THRESH_SECONDS = WATCH_TIMEOUT_MS / 1000;4243// Maximum number of paths to keep in listings tables for this project.44// Periodically, info about older paths beyond this number will be purged45// from the database. NOTE that synctable.delete is "barely" implemented,46// so there may be some issues with this working.47import { MAX_PATHS } from "@cocalc/util/db-schema/listings";4849export type ImmutableListing = TypedMap<Listing>;5051export interface Watcher extends EventEmitter {52close();53}5455interface Options {56table: SyncTable;57project_id: string;58compute_server_id: number;59getListing;60createWatcher;61onDeletePath;62existsSync;63getLogger;64}6566class ListingsTable {67private readonly table?: SyncTable; // will be removed by close()68private project_id: string;69private compute_server_id: number;70private watchers: { [path: string]: Watcher } = {};71private getListing: (72path,73hidden?: boolean,74) => Promise<DirectoryListingEntry[]>;75private createWatcher: (path: string, debounceMs: number) => Watcher;76private onDeletePath: (path: string) => Promise<void>;77private existsSync: (path: string) => boolean;78private log: (...args) => void;7980constructor(opts: Options) {81this.log = opts.getLogger("sync:listings").debug;82this.log("constructor");83this.project_id = opts.project_id;84this.compute_server_id = opts.compute_server_id ?? 0;85this.table = opts.table;86this.getListing = opts.getListing;87this.createWatcher = opts.createWatcher;88this.onDeletePath = opts.onDeletePath;89this.existsSync = opts.existsSync;90this.setupWatchers();91}9293close = () => {94this.log("close");95for (const path in this.watchers) {96this.stopWatching(path);97}98close(this);99};100101// Start watching any paths that have recent interest (so this is not102// in response to a *change* after starting).103private setupWatchers = async () => {104if (this.table == null) return; // closed105if (this.table.get_state() == ("init" as SyncTableState)) {106await once(this.table, "state");107}108if (this.table.get_state() != ("connected" as SyncTableState)) {109return; // game over110}111this.table.get()?.forEach((val) => {112const path = val.get("path");113if (path == null) return;114if (this.watchers[path] == null) return; // already watching -- shouldn't happen115const interest = val.get("interest");116if (interest != null && interest > seconds_ago(INTEREST_THRESH_SECONDS)) {117this.startWatching(path);118}119});120this.table.on("change", this.handleChangeEvent);121122this.removeStaleWatchers();123};124125private removeStaleWatchers = async () => {126if (this.table == null) return; // closed127if (this.table.get_state() == ("connected" as SyncTableState)) {128this.table.get()?.forEach((val) => {129const path = val.get("path");130if (path == null) return;131if (this.watchers[path] == null) return;132const interest = val.get("interest");133if (134interest == null ||135interest <= seconds_ago(INTEREST_THRESH_SECONDS)136) {137this.stopWatching(path);138}139});140}141142// Now get rid of any old paths that are no longer relevant143// to reduce wasted database space, memory, and bandwidth for144// client browsers that are using this project.145try {146await this.trimOldPaths();147} catch (err) {148this.log("WARNING, error trimming old paths -- ", err);149}150151if (this.table == null) return; // closed152if (this.table.get_state() == ("connected" as SyncTableState)) {153await delay(1000 * INTEREST_THRESH_SECONDS);154if (this.table == null) return; // closed155if (this.table.get_state() != ("connected" as SyncTableState)) return;156this.removeStaleWatchers();157}158};159160private isReady = (): boolean => {161return !!this.table?.is_ready();162};163164private getTable = (): SyncTable => {165if (!this.isReady() || this.table == null) {166throw Error("table not ready");167}168return this.table;169};170171set = async (obj: Listing) => {172this.getTable().set(173merge(174{175project_id: this.project_id,176compute_server_id: this.compute_server_id,177},178obj,179),180"shallow",181);182await this.getTable().save();183};184185get = (path: string): ImmutableListing | undefined => {186path = canonicalPath(path);187const x = this.getTable().get(188JSON.stringify([this.project_id, path, this.compute_server_id]),189);190if (x == null) return x;191return x as unknown as ImmutableListing;192// NOTE: That we have to use JSON.stringify above is an ugly shortcoming193// of the get method in @cocalc/sync/table/synctable.ts194// that could probably be relatively easily fixed.195};196197private handleChangeEvent = (keys: string[]) => {198this.log("handleChangeEvent", JSON.stringify(keys));199for (const key of keys) {200this.handleChange(JSON.parse(key)[1]);201}202};203204private handleChange = (path: string): void => {205this.log("handleChange", path);206const cur = this.get(path);207if (cur == null) return;208let interest: undefined | Date = cur.get("interest");209if (interest == null) return;210if (interest >= seconds_ago(INTEREST_THRESH_SECONDS)) {211// Ensure any possible client clock skew "issue" has no trivial bad impact.212const time = new Date();213if (interest > time) {214interest = time;215this.set({ path, interest });216}217// Make sure we watch this path for updates, since there is genuine current interest.218this.ensureWatching(path);219}220};221222private ensureWatching = async (path: string): Promise<void> => {223path = canonicalPath(path);224if (this.watchers[path] != null) {225// We are already watching this path226if (this.get(path)?.get("error")) {227this.log("ensureWatching -- removing old watcher due to error", path);228this.stopWatching(path);229} else {230return;231}232}233234// Fire off computing of directory listing for this path,235// and start watching for changes.236try {237await this.computeListing(path);238} catch (err) {239this.log(240"ensureWatching -- failed to compute listing so not starting watching",241err,242);243return;244}245try {246this.startWatching(path);247} catch (err) {248this.log("failed to start watching", err);249}250};251252private computeListing = async (path: string): Promise<void> => {253path = canonicalPath(path);254const time = new Date();255let listing;256try {257listing = await this.getListing(path, true);258if (!this.isReady()) return;259} catch (err) {260if (!this.isReady()) return;261this.set({ path, time, error: `${err}` });262throw err;263}264let missing: number | undefined = undefined;265266const y = this.get(path);267const previous_listing = y?.get("listing")?.toJS() as any;268let deleted: any = y?.get("deleted")?.toJS() as any;269if (previous_listing != null) {270// Check to see to what extend change in the listing is due to files271// being deleted. Note that in case of a directory with a large272// number of files we only know about recent files (since we don't)273// store the full listing, so deleting a non-recent file won't get274// detected here -- which is fine, since deletion tracking is important275// mainly for recently files.276const cur = new Set();277for (const x of listing) {278cur.add(x.name);279}280for (const x of previous_listing) {281if (!cur.has(x.name)) {282// x.name is suddenly gone... so deleted283if (deleted == null) {284deleted = [x.name];285} else {286if (deleted.indexOf(x.name) == -1) {287deleted.push(x.name);288}289}290}291}292}293294// Shrink listing length295if (listing.length > MAX_FILES_PER_PATH) {296listing.sort(field_cmp("mtime"));297listing.reverse();298missing = listing.length - MAX_FILES_PER_PATH;299listing = listing.slice(0, MAX_FILES_PER_PATH);300}301// We want to clear the error, but just clearning it in synctable doesn't302// clear to database, so if there is an error, we set it to "" which does303// save fine to the database. (TODO: this is just a workaround.)304const error = y?.get("error") != null ? "" : undefined;305306this.set({ path, listing, time, missing, deleted, error });307};308309private startWatching = (path: string): void => {310path = canonicalPath(path);311if (this.watchers[path] != null) return;312if (process.env.HOME == null) {313throw Error("HOME env variable must be defined");314}315this.watchers[path] = this.createWatcher(path, WATCH_DEBOUNCE_MS);316this.watchers[path].on("change", async () => {317try {318await delay(DELAY_ON_CHANGE_MS);319if (!this.isReady()) return;320await this.computeListing(path);321} catch (err) {322this.log(`computeListing("${path}") error: "${err}"`);323}324});325};326327private stopWatching = (path: string): void => {328path = canonicalPath(path);329const w = this.watchers[path];330if (w == null) return;331delete this.watchers[path];332w.close();333};334335private trimOldPaths = async (): Promise<void> => {336this.log("trimOldPaths");337if (!this.isReady()) return;338const table = this.getTable();339let num_to_remove = table.size() - MAX_PATHS;340this.log("trimOldPaths", num_to_remove);341if (num_to_remove <= 0) {342// definitely nothing to do343return;344}345346// Check to see if we can trim some paths. We sort the paths347// by "interest" timestamp, and eliminate the oldest ones that are348// not *currently* being watched.349const paths: { path: string; interest: Date }[] = [];350table.get()?.forEach((val) => {351const path = val.get("path");352if (this.watchers[path] != null) {353num_to_remove -= 1;354// paths we are watching are not eligible to be removed.355return;356}357const interest = val.get("interest", new Date(0));358paths.push({ path, interest });359});360this.log("trimOldPaths", JSON.stringify(paths));361this.log("trimOldPaths", num_to_remove);362363if (num_to_remove <= 0) return;364paths.sort(field_cmp("interest"));365// Now remove the first num_to_remove paths.366for (let i = 0; i < num_to_remove; i++) {367this.log("trimOldPaths -- removing", paths[i].path);368await this.removePath(paths[i].path);369}370};371372private removePath = async (path: string): Promise<void> => {373if (!this.isReady()) return;374this.log("removePath", path);375await this.getTable().delete({ project_id: this.project_id, path });376};377378// Given a "filename", add it to deleted if there is already a record379// for the containing path in the database. (TODO: we may change this380// to create the record if it doesn't exist.)381setDeleted = async (filename: string): Promise<void> => {382this.log("setDeleted:", filename);383if (!this.isReady()) {384// setDeleted is a convenience, so dropping it in case of a project385// with no network is OK.386this.log(`setDeleted: skipping since not ready`);387return;388}389if (filename[0] == "/") {390// absolute path391if (process.env.HOME == null || !startswith(filename, process.env.HOME)) {392// can't do anything with this.393return;394}395filename = filename.slice(process.env.HOME.length + 1);396}397const { head, tail } = path_split(filename);398const x = this.get(head);399if (x != null) {400// TODO/edge case: if x is null we *could* create the path here...401let deleted: any = x.get("deleted");402if (deleted == null) {403deleted = [tail];404} else {405if (deleted.indexOf(tail) != -1) return;406deleted = deleted.toJS();407deleted.push(tail);408}409this.log(`setDeleted: recording "${deleted}" in "${head}"`);410await this.set({ path: head, deleted });411if (!this.isReady()) return;412}413414await this.onDeletePath(filename);415};416417// Returns true if definitely known to be deleted.418// Returns false if definitely known to not be deleted419// Returns null if we don't know for sure, e.g., not in listing table or listings not ready.420isDeleted = (filename: string): boolean | null => {421if (!this.isReady()) {422// in case that listings are not available, return null -- we don't know.423return null;424}425const { head, tail } = path_split(filename);426if (head != "" && this.isDeleted(head)) {427// recursively check if filename is contained in a428// directory tree that go deleted.429return true;430}431const x = this.get(head);432if (x == null) {433// we don't know.434return null;435}436const deleted = x.get("deleted");437if (deleted == null) {438// we don't know439return null;440}441// table is available and has deleted info for the directory -- let's see:442if (deleted.indexOf(tail) != -1) {443// it was explicitly deleted at some point.444// It *might* still be deleted. Check on disk now445// via a synchronous check.446if (this.existsSync(filename)) {447// it now exists -- return false but also update the table since448// path is no longer deleted449this.set({450path: head,451deleted: deleted.toJS().filter((x) => x != tail),452});453return false;454} else {455// definitely explicitly deleted and not back on disk for some reason,456return true;457}458}459return false;460};461}462463let listingsTable: { [compute_server_id: number]: ListingsTable } = {};464export function registerListingsTable(opts: Options): void {465const { compute_server_id = 0 } = opts;466if (listingsTable[compute_server_id] != null) {467// There was one sitting around wasting space so clean it up468// before making a new one.469listingsTable[compute_server_id].close();470}471listingsTable[compute_server_id] = new ListingsTable(opts);472}473474export function getListingsTable(475compute_server_id: number = 0,476): ListingsTable | undefined {477return listingsTable[compute_server_id];478}479480// this does a tiny amount to make paths more canonical.481function canonicalPath(path: string): string {482if (path == "." || path == "~") {483return "";484}485return path;486}487488489