Path: blob/master/src/packages/project/conat/open-files.ts
5843 views
/*1Handle opening files in a project to save/load from disk and also enable compute capabilities.23DEVELOPMENT:450. From the browser with the project opened, terminate the open-files api service:678await cc.client.conat_client.projectApi(cc.current()).system.terminate({service:'open-files'})9101112Set env variables as in a project (see api/index.ts ), then in nodejs:1314DEBUG_CONSOLE=yes DEBUG=cocalc:debug:project:conat:* node1516x = await require("@cocalc/project/conat/open-files").init(); Object.keys(x)171819[ 'openFiles', 'openDocs', 'formatter', 'terminate', 'computeServers', 'cc' ]2021> x.openFiles.getAll();2223> Object.keys(x.openDocs)2425> s = x.openDocs['z4.tasks']26// now you can directly work with the syncdoc for a given file,27// but from the perspective of the project, not the browser!28//29//3031OR:3233echo "require('@cocalc/project/conat/open-files').init(); require('@cocalc/project/bug-counter').init()" | node3435COMPUTE SERVER:3637To simulate a compute server, do exactly as above, but also set the environment38variable COMPUTE_SERVER_ID to the *global* (not project specific) id of the compute39server:4041COMPUTE_SERVER_ID=84 node4243In this case, you aso don't need to use the terminate command if the compute44server isn't actually running. To terminate a compute server open files service though:4546(TODO)474849EDITOR ACTIONS:5051Stop the open-files server and define x as above in a terminal. You can52then get the actions or store in a nodejs terminal for a particular document53as follows:5455project_id = '00847397-d6a8-4cb0-96a8-6ef64ac3e6cf'; path = '2025-03-21-100921.ipynb';56redux = require("@cocalc/jupyter/redux/app").redux; a = redux.getEditorActions(project_id, path); s = redux.getEditorStore(project_id, path); 0;575859IN A LIVE RUNNING PROJECT IN KUCALC:6061Ssh in to the project itself. You can use a terminal because that very terminal will be broken by62doing this! Then:6364/cocalc/github/src/packages/project$ . /cocalc/nvm/nvm.sh65/cocalc/github/src/packages/project$ COCALC_PROJECT_ID=... COCALC_SECRET_TOKEN="/secrets/secret-token/token" CONAT_SERVER=hub-conat node # not sure about CONAT_SERVER66Welcome to Node.js v20.19.0.67Type ".help" for more information.68> x = await require("@cocalc/project/conat/open-files").init(); Object.keys(x)69[ 'openFiles', 'openDocs', 'formatter', 'terminate', 'computeServers' ]70>717273*/7475import {76openFiles as createOpenFiles,77type OpenFiles,78type OpenFileEntry,79} from "@cocalc/project/conat/sync";80import { CONAT_OPEN_FILE_TOUCH_INTERVAL } from "@cocalc/util/conat";81import { compute_server_id, project_id } from "@cocalc/project/data";82import type { SyncDoc } from "@cocalc/sync/editor/generic/sync-doc";83import { getClient } from "@cocalc/project/client";84import { SyncString } from "@cocalc/sync/editor/string/sync";85import { SyncDB } from "@cocalc/sync/editor/db/sync";86import getLogger from "@cocalc/backend/logger";87import { reuseInFlight } from "@cocalc/util/reuse-in-flight";88import { delay } from "awaiting";89import { initJupyterRedux, removeJupyterRedux } from "@cocalc/jupyter/kernel";90import { filename_extension, original_path } from "@cocalc/util/misc";91import { createFormatterService } from "./formatter";92import { type ConatService } from "@cocalc/conat/service/service";93import { exists } from "@cocalc/backend/misc/async-utils-node";94import { map as awaitMap } from "awaiting";95import { unlink } from "fs/promises";96import { join } from "path";97import {98computeServerManager,99ComputeServerManager,100} from "@cocalc/conat/compute/manager";101import { JUPYTER_SYNCDB_EXTENSIONS } from "@cocalc/util/jupyter/names";102import { connectToConat } from "@cocalc/project/conat/connection";103104// ensure conat connection stuff is initialized105import "@cocalc/project/conat/env";106import { chdir } from "node:process";107108const logger = getLogger("project:conat:open-files");109110// @ts-ignore111function startOpenFilesStatsLoop(openFiles: OpenFiles) {112const intervalMs = 15000;113logger.debug("open-files stats enabled", { intervalMs });114const interval = setInterval(() => {115logger.debug("open-files stats", {116intervalMs,117openDocs: Object.keys(openDocs).length,118stats: openFiles.debugStats(),119rssMiB: Math.round(process.memoryUsage().rss / (1024 * 1024)),120});121}, intervalMs);122openFiles.on("closed", () => clearInterval(interval));123}124125// we check all files we are currently managing this frequently to126// see if they exist on the filesystem:127const FILE_DELETION_CHECK_INTERVAL = 5000;128129// once we determine that a file does not exist for some reason, we130// wait this long and check *again* just to be sure. If it is still missing,131// then we close the file in memory and set the file as deleted in the132// shared openfile state.133const FILE_DELETION_GRACE_PERIOD = 2000;134135// We NEVER check a file for deletion for this long after first opening it.136// This is VERY important, since some documents, e.g., jupyter notebooks,137// can take a while to get created on disk the first time.138const FILE_DELETION_INITIAL_DELAY = 15000;139140let openFiles: OpenFiles | null = null;141let formatter: any = null;142const openDocs: { [path: string]: SyncDoc | ConatService } = {};143let computeServers: ComputeServerManager | null = null;144const openTimes: { [path: string]: number } = {};145146export function getSyncDoc(path: string): SyncDoc | undefined {147const doc = openDocs[path];148if (doc instanceof SyncString || doc instanceof SyncDB) {149return doc;150}151return undefined;152}153154export async function init() {155logger.debug("init");156157if (process.env.HOME) {158chdir(process.env.HOME);159}160161openFiles = await createOpenFiles();162// Use this to debug potential memory leaks163// https://github.com/sagemathinc/cocalc/issues/8702164// startOpenFilesStatsLoop(openFiles);165166computeServers = computeServerManager({ project_id });167await computeServers.waitUntilReady();168computeServers.on("change", async ({ path, id }) => {169if (openFiles == null) {170return;171}172const entry = openFiles?.get(path);173if (entry != null) {174await handleChange({ ...entry, id });175} else {176await closeDoc(path);177}178});179180// initialize181for (const entry of openFiles.getAll()) {182handleChange(entry);183}184185// start loop to watch for and close files that aren't touched frequently:186closeIgnoredFilesLoop();187188// periodically update timestamp on backend for files we have open189touchOpenFilesLoop();190// watch if any file that is currently opened on this host gets deleted,191// and if so, mark it as such, and set it to closed.192watchForFileDeletionLoop();193194// handle changes195openFiles.on("change", (entry) => {196// we ONLY actually try to open the file here if there197// is a doctype set. When it is first being created,198// the doctype won't be the first field set, and we don't199// want to launch this until it is set.200if (entry.doctype) {201handleChange(entry);202}203});204205formatter = await createFormatterService({ openSyncDocs: openDocs });206207// useful for development208return {209openFiles,210openDocs,211formatter,212terminate,213computeServers,214cc: connectToConat(),215};216}217218export function terminate() {219logger.debug("terminating open-files service");220for (const path in openDocs) {221closeDoc(path);222}223openFiles?.close();224openFiles = null;225226formatter?.close();227formatter = null;228229computeServers?.close();230computeServers = null;231}232233function getCutoff(): number {234return Date.now() - 2.5 * CONAT_OPEN_FILE_TOUCH_INTERVAL;235}236237function computeServerId(path: string): number {238return computeServers?.get(path) ?? 0;239}240241async function handleChange({242path,243time,244deleted,245backend,246doctype,247id,248}: OpenFileEntry & { id?: number }) {249try {250if (id == null) {251id = computeServerId(path);252}253logger.debug("handleChange", { path, time, deleted, backend, doctype, id });254const syncDoc = openDocs[path];255const isOpenHere = syncDoc != null;256257if (id != compute_server_id) {258if (backend?.id == compute_server_id) {259// we are definitely not the backend right now.260openFiles?.setNotBackend(path, compute_server_id);261}262// only thing we should do is close it if it is open.263if (isOpenHere) {264await closeDoc(path);265}266return;267}268269if (deleted?.deleted) {270if (await exists(path)) {271// it's back272openFiles?.setNotDeleted(path);273} else {274if (isOpenHere) {275await closeDoc(path);276}277return;278}279}280281if (time != null && time >= getCutoff()) {282if (!isOpenHere) {283logger.debug("handleChange: opening", { path });284// users actively care about this file being opened HERE, but it isn't285await openDoc(path);286}287return;288}289} catch (err) {290console.trace(err);291logger.debug(`handleChange: WARNING - error opening ${path} -- ${err}`);292}293}294295function supportAutoclose(path: string): boolean {296// this feels way too "hard coded"; alternatively, maybe we make the kernel or whatever297// actually update the interest? or something else...298if (299path.endsWith("." + JUPYTER_SYNCDB_EXTENSIONS) ||300path.endsWith(".sagews") ||301path.endsWith(".term")302) {303return false;304}305return true;306}307308async function closeIgnoredFilesLoop() {309while (openFiles?.state == "connected") {310await delay(CONAT_OPEN_FILE_TOUCH_INTERVAL);311if (openFiles?.state != "connected") {312return;313}314const paths = Object.keys(openDocs);315if (paths.length == 0) {316logger.debug("closeIgnoredFiles: no paths currently open");317continue;318}319logger.debug(320"closeIgnoredFiles: checking",321paths.length,322"currently open paths...",323);324const cutoff = getCutoff();325for (const entry of openFiles.getAll()) {326if (327entry != null &&328entry.time != null &&329openDocs[entry.path] != null &&330entry.time <= cutoff &&331supportAutoclose(entry.path)332) {333logger.debug("closeIgnoredFiles: closing due to inactivity", entry);334closeDoc(entry.path);335}336}337}338}339340async function touchOpenFilesLoop() {341while (openFiles?.state == "connected" && openDocs != null) {342for (const path in openDocs) {343openFiles.setBackend(path, compute_server_id);344}345await delay(CONAT_OPEN_FILE_TOUCH_INTERVAL);346}347}348349async function checkForFileDeletion(path: string) {350if (openFiles == null) {351return;352}353if (Date.now() - (openTimes[path] ?? 0) <= FILE_DELETION_INITIAL_DELAY) {354return;355}356const id = computeServerId(path);357if (id != compute_server_id) {358// not our concern359return;360}361362if (path.endsWith(".term")) {363// term files are exempt -- we don't save data in them and often364// don't actually make the hidden ones for each frame in the365// filesystem at all.366return;367}368const entry = openFiles.get(path);369if (entry == null) {370return;371}372if (entry.deleted?.deleted) {373// already set as deleted -- shouldn't still be opened374await closeDoc(entry.path);375} else {376if (!process.env.HOME) {377// too dangerous378return;379}380const fullPath = join(process.env.HOME, entry.path);381// if file doesn't exist and still doesn't exist in a while,382// mark deleted, which also causes a close.383if (await exists(fullPath)) {384return;385}386// still doesn't exist?387// We must give things a reasonable amount of time, e.g., otherwise388// creating a file (e.g., jupyter notebook) might take too long and389// we randomly think it is deleted before we even make it!390await delay(FILE_DELETION_GRACE_PERIOD);391if (await exists(fullPath)) {392return;393}394// still doesn't exist395if (openFiles != null) {396logger.debug("checkForFileDeletion: marking as deleted -- ", entry);397openFiles.setDeleted(entry.path);398await closeDoc(fullPath);399// closing a file may cause it to try to save to disk the last version,400// so we delete it if that happens.401// TODO: add an option to close everywhere to not do this, and/or make402// it not save on close if the file doesn't exist.403try {404if (await exists(fullPath)) {405await unlink(fullPath);406}407} catch {}408}409}410}411412async function watchForFileDeletionLoop() {413while (openFiles != null && openFiles.state == "connected") {414await delay(FILE_DELETION_CHECK_INTERVAL);415if (openFiles?.state != "connected") {416return;417}418const paths = Object.keys(openDocs);419if (paths.length == 0) {420// logger.debug("watchForFileDeletionLoop: no paths currently open");421continue;422}423// logger.debug(424// "watchForFileDeletionLoop: checking",425// paths.length,426// "currently open paths to see if any were deleted",427// );428await awaitMap(paths, 20, checkForFileDeletion);429}430}431432const closeDoc = reuseInFlight(async (path: string) => {433logger.debug("close", { path });434try {435const doc = openDocs[path];436if (doc == null) {437return;438}439delete openDocs[path];440delete openTimes[path];441try {442await doc.close();443} catch (err) {444logger.debug(`WARNING -- issue closing doc -- ${err}`);445openFiles?.setError(path, err);446}447} finally {448if (openDocs[path] == null) {449openFiles?.setNotBackend(path, compute_server_id);450}451}452});453454const openDoc = reuseInFlight(async (path: string) => {455logger.debug("openDoc", { path });456try {457const doc = openDocs[path];458if (doc != null) {459return;460}461openTimes[path] = Date.now();462463if (path.endsWith(".term")) {464// terminals are handled directly by the project api -- also since465// doctype probably not set for them, they won't end up here.466// (this could change though, e.g., we might use doctype to467// set the terminal command).468return;469}470471const client = getClient();472let doctype: any = openFiles?.get(path)?.doctype;473logger.debug("openDoc: open files table knows ", openFiles?.get(path), {474path,475});476if (doctype == null) {477logger.debug("openDoc: doctype must be set but isn't, so bailing", {478path,479});480} else {481logger.debug("openDoc: got doctype from openFiles table", {482path,483doctype,484});485}486487let syncdoc;488if (doctype.type == "string") {489syncdoc = new SyncString({490...doctype.opts,491project_id,492path,493client,494});495} else {496syncdoc = new SyncDB({497...doctype.opts,498project_id,499path,500client,501});502}503openDocs[path] = syncdoc;504505syncdoc.on("error", (err) => {506closeDoc(path);507openFiles?.setError(path, err);508logger.debug(`syncdoc error -- ${err}`, path);509});510511// Extra backend support in some cases, e.g., Jupyter, Sage, etc.512const ext = filename_extension(path);513switch (ext) {514case JUPYTER_SYNCDB_EXTENSIONS:515logger.debug("initializing Jupyter backend for ", path);516await initJupyterRedux(syncdoc, client);517const path1 = original_path(syncdoc.get_path());518syncdoc.on("closed", async () => {519logger.debug("removing Jupyter backend for ", path1);520await removeJupyterRedux(path1, project_id);521});522break;523}524} finally {525if (openDocs[path] != null) {526openFiles?.setBackend(path, compute_server_id);527}528}529});530531532