Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/project/conat/open-files.ts
5843 views
1
/*
2
Handle opening files in a project to save/load from disk and also enable compute capabilities.
3
4
DEVELOPMENT:
5
6
0. From the browser with the project opened, terminate the open-files api service:
7
8
9
await cc.client.conat_client.projectApi(cc.current()).system.terminate({service:'open-files'})
10
11
12
13
Set env variables as in a project (see api/index.ts ), then in nodejs:
14
15
DEBUG_CONSOLE=yes DEBUG=cocalc:debug:project:conat:* node
16
17
x = await require("@cocalc/project/conat/open-files").init(); Object.keys(x)
18
19
20
[ 'openFiles', 'openDocs', 'formatter', 'terminate', 'computeServers', 'cc' ]
21
22
> x.openFiles.getAll();
23
24
> Object.keys(x.openDocs)
25
26
> s = x.openDocs['z4.tasks']
27
// now you can directly work with the syncdoc for a given file,
28
// but from the perspective of the project, not the browser!
29
//
30
//
31
32
OR:
33
34
echo "require('@cocalc/project/conat/open-files').init(); require('@cocalc/project/bug-counter').init()" | node
35
36
COMPUTE SERVER:
37
38
To simulate a compute server, do exactly as above, but also set the environment
39
variable COMPUTE_SERVER_ID to the *global* (not project specific) id of the compute
40
server:
41
42
COMPUTE_SERVER_ID=84 node
43
44
In this case, you aso don't need to use the terminate command if the compute
45
server isn't actually running. To terminate a compute server open files service though:
46
47
(TODO)
48
49
50
EDITOR ACTIONS:
51
52
Stop the open-files server and define x as above in a terminal. You can
53
then get the actions or store in a nodejs terminal for a particular document
54
as follows:
55
56
project_id = '00847397-d6a8-4cb0-96a8-6ef64ac3e6cf'; path = '2025-03-21-100921.ipynb';
57
redux = require("@cocalc/jupyter/redux/app").redux; a = redux.getEditorActions(project_id, path); s = redux.getEditorStore(project_id, path); 0;
58
59
60
IN A LIVE RUNNING PROJECT IN KUCALC:
61
62
Ssh in to the project itself. You can use a terminal because that very terminal will be broken by
63
doing this! Then:
64
65
/cocalc/github/src/packages/project$ . /cocalc/nvm/nvm.sh
66
/cocalc/github/src/packages/project$ COCALC_PROJECT_ID=... COCALC_SECRET_TOKEN="/secrets/secret-token/token" CONAT_SERVER=hub-conat node # not sure about CONAT_SERVER
67
Welcome to Node.js v20.19.0.
68
Type ".help" for more information.
69
> x = await require("@cocalc/project/conat/open-files").init(); Object.keys(x)
70
[ 'openFiles', 'openDocs', 'formatter', 'terminate', 'computeServers' ]
71
>
72
73
74
*/
75
76
import {
77
openFiles as createOpenFiles,
78
type OpenFiles,
79
type OpenFileEntry,
80
} from "@cocalc/project/conat/sync";
81
import { CONAT_OPEN_FILE_TOUCH_INTERVAL } from "@cocalc/util/conat";
82
import { compute_server_id, project_id } from "@cocalc/project/data";
83
import type { SyncDoc } from "@cocalc/sync/editor/generic/sync-doc";
84
import { getClient } from "@cocalc/project/client";
85
import { SyncString } from "@cocalc/sync/editor/string/sync";
86
import { SyncDB } from "@cocalc/sync/editor/db/sync";
87
import getLogger from "@cocalc/backend/logger";
88
import { reuseInFlight } from "@cocalc/util/reuse-in-flight";
89
import { delay } from "awaiting";
90
import { initJupyterRedux, removeJupyterRedux } from "@cocalc/jupyter/kernel";
91
import { filename_extension, original_path } from "@cocalc/util/misc";
92
import { createFormatterService } from "./formatter";
93
import { type ConatService } from "@cocalc/conat/service/service";
94
import { exists } from "@cocalc/backend/misc/async-utils-node";
95
import { map as awaitMap } from "awaiting";
96
import { unlink } from "fs/promises";
97
import { join } from "path";
98
import {
99
computeServerManager,
100
ComputeServerManager,
101
} from "@cocalc/conat/compute/manager";
102
import { JUPYTER_SYNCDB_EXTENSIONS } from "@cocalc/util/jupyter/names";
103
import { connectToConat } from "@cocalc/project/conat/connection";
104
105
// ensure conat connection stuff is initialized
106
import "@cocalc/project/conat/env";
107
import { chdir } from "node:process";
108
109
const logger = getLogger("project:conat:open-files");
110
111
// @ts-ignore
112
function startOpenFilesStatsLoop(openFiles: OpenFiles) {
113
const intervalMs = 15000;
114
logger.debug("open-files stats enabled", { intervalMs });
115
const interval = setInterval(() => {
116
logger.debug("open-files stats", {
117
intervalMs,
118
openDocs: Object.keys(openDocs).length,
119
stats: openFiles.debugStats(),
120
rssMiB: Math.round(process.memoryUsage().rss / (1024 * 1024)),
121
});
122
}, intervalMs);
123
openFiles.on("closed", () => clearInterval(interval));
124
}
125
126
// we check all files we are currently managing this frequently to
127
// see if they exist on the filesystem:
128
const FILE_DELETION_CHECK_INTERVAL = 5000;
129
130
// once we determine that a file does not exist for some reason, we
131
// wait this long and check *again* just to be sure. If it is still missing,
132
// then we close the file in memory and set the file as deleted in the
133
// shared openfile state.
134
const FILE_DELETION_GRACE_PERIOD = 2000;
135
136
// We NEVER check a file for deletion for this long after first opening it.
137
// This is VERY important, since some documents, e.g., jupyter notebooks,
138
// can take a while to get created on disk the first time.
139
const FILE_DELETION_INITIAL_DELAY = 15000;
140
141
let openFiles: OpenFiles | null = null;
142
let formatter: any = null;
143
const openDocs: { [path: string]: SyncDoc | ConatService } = {};
144
let computeServers: ComputeServerManager | null = null;
145
const openTimes: { [path: string]: number } = {};
146
147
export function getSyncDoc(path: string): SyncDoc | undefined {
148
const doc = openDocs[path];
149
if (doc instanceof SyncString || doc instanceof SyncDB) {
150
return doc;
151
}
152
return undefined;
153
}
154
155
export async function init() {
156
logger.debug("init");
157
158
if (process.env.HOME) {
159
chdir(process.env.HOME);
160
}
161
162
openFiles = await createOpenFiles();
163
// Use this to debug potential memory leaks
164
// https://github.com/sagemathinc/cocalc/issues/8702
165
// startOpenFilesStatsLoop(openFiles);
166
167
computeServers = computeServerManager({ project_id });
168
await computeServers.waitUntilReady();
169
computeServers.on("change", async ({ path, id }) => {
170
if (openFiles == null) {
171
return;
172
}
173
const entry = openFiles?.get(path);
174
if (entry != null) {
175
await handleChange({ ...entry, id });
176
} else {
177
await closeDoc(path);
178
}
179
});
180
181
// initialize
182
for (const entry of openFiles.getAll()) {
183
handleChange(entry);
184
}
185
186
// start loop to watch for and close files that aren't touched frequently:
187
closeIgnoredFilesLoop();
188
189
// periodically update timestamp on backend for files we have open
190
touchOpenFilesLoop();
191
// watch if any file that is currently opened on this host gets deleted,
192
// and if so, mark it as such, and set it to closed.
193
watchForFileDeletionLoop();
194
195
// handle changes
196
openFiles.on("change", (entry) => {
197
// we ONLY actually try to open the file here if there
198
// is a doctype set. When it is first being created,
199
// the doctype won't be the first field set, and we don't
200
// want to launch this until it is set.
201
if (entry.doctype) {
202
handleChange(entry);
203
}
204
});
205
206
formatter = await createFormatterService({ openSyncDocs: openDocs });
207
208
// useful for development
209
return {
210
openFiles,
211
openDocs,
212
formatter,
213
terminate,
214
computeServers,
215
cc: connectToConat(),
216
};
217
}
218
219
export function terminate() {
220
logger.debug("terminating open-files service");
221
for (const path in openDocs) {
222
closeDoc(path);
223
}
224
openFiles?.close();
225
openFiles = null;
226
227
formatter?.close();
228
formatter = null;
229
230
computeServers?.close();
231
computeServers = null;
232
}
233
234
function getCutoff(): number {
235
return Date.now() - 2.5 * CONAT_OPEN_FILE_TOUCH_INTERVAL;
236
}
237
238
function computeServerId(path: string): number {
239
return computeServers?.get(path) ?? 0;
240
}
241
242
async function handleChange({
243
path,
244
time,
245
deleted,
246
backend,
247
doctype,
248
id,
249
}: OpenFileEntry & { id?: number }) {
250
try {
251
if (id == null) {
252
id = computeServerId(path);
253
}
254
logger.debug("handleChange", { path, time, deleted, backend, doctype, id });
255
const syncDoc = openDocs[path];
256
const isOpenHere = syncDoc != null;
257
258
if (id != compute_server_id) {
259
if (backend?.id == compute_server_id) {
260
// we are definitely not the backend right now.
261
openFiles?.setNotBackend(path, compute_server_id);
262
}
263
// only thing we should do is close it if it is open.
264
if (isOpenHere) {
265
await closeDoc(path);
266
}
267
return;
268
}
269
270
if (deleted?.deleted) {
271
if (await exists(path)) {
272
// it's back
273
openFiles?.setNotDeleted(path);
274
} else {
275
if (isOpenHere) {
276
await closeDoc(path);
277
}
278
return;
279
}
280
}
281
282
if (time != null && time >= getCutoff()) {
283
if (!isOpenHere) {
284
logger.debug("handleChange: opening", { path });
285
// users actively care about this file being opened HERE, but it isn't
286
await openDoc(path);
287
}
288
return;
289
}
290
} catch (err) {
291
console.trace(err);
292
logger.debug(`handleChange: WARNING - error opening ${path} -- ${err}`);
293
}
294
}
295
296
function supportAutoclose(path: string): boolean {
297
// this feels way too "hard coded"; alternatively, maybe we make the kernel or whatever
298
// actually update the interest? or something else...
299
if (
300
path.endsWith("." + JUPYTER_SYNCDB_EXTENSIONS) ||
301
path.endsWith(".sagews") ||
302
path.endsWith(".term")
303
) {
304
return false;
305
}
306
return true;
307
}
308
309
async function closeIgnoredFilesLoop() {
310
while (openFiles?.state == "connected") {
311
await delay(CONAT_OPEN_FILE_TOUCH_INTERVAL);
312
if (openFiles?.state != "connected") {
313
return;
314
}
315
const paths = Object.keys(openDocs);
316
if (paths.length == 0) {
317
logger.debug("closeIgnoredFiles: no paths currently open");
318
continue;
319
}
320
logger.debug(
321
"closeIgnoredFiles: checking",
322
paths.length,
323
"currently open paths...",
324
);
325
const cutoff = getCutoff();
326
for (const entry of openFiles.getAll()) {
327
if (
328
entry != null &&
329
entry.time != null &&
330
openDocs[entry.path] != null &&
331
entry.time <= cutoff &&
332
supportAutoclose(entry.path)
333
) {
334
logger.debug("closeIgnoredFiles: closing due to inactivity", entry);
335
closeDoc(entry.path);
336
}
337
}
338
}
339
}
340
341
async function touchOpenFilesLoop() {
342
while (openFiles?.state == "connected" && openDocs != null) {
343
for (const path in openDocs) {
344
openFiles.setBackend(path, compute_server_id);
345
}
346
await delay(CONAT_OPEN_FILE_TOUCH_INTERVAL);
347
}
348
}
349
350
async function checkForFileDeletion(path: string) {
351
if (openFiles == null) {
352
return;
353
}
354
if (Date.now() - (openTimes[path] ?? 0) <= FILE_DELETION_INITIAL_DELAY) {
355
return;
356
}
357
const id = computeServerId(path);
358
if (id != compute_server_id) {
359
// not our concern
360
return;
361
}
362
363
if (path.endsWith(".term")) {
364
// term files are exempt -- we don't save data in them and often
365
// don't actually make the hidden ones for each frame in the
366
// filesystem at all.
367
return;
368
}
369
const entry = openFiles.get(path);
370
if (entry == null) {
371
return;
372
}
373
if (entry.deleted?.deleted) {
374
// already set as deleted -- shouldn't still be opened
375
await closeDoc(entry.path);
376
} else {
377
if (!process.env.HOME) {
378
// too dangerous
379
return;
380
}
381
const fullPath = join(process.env.HOME, entry.path);
382
// if file doesn't exist and still doesn't exist in a while,
383
// mark deleted, which also causes a close.
384
if (await exists(fullPath)) {
385
return;
386
}
387
// still doesn't exist?
388
// We must give things a reasonable amount of time, e.g., otherwise
389
// creating a file (e.g., jupyter notebook) might take too long and
390
// we randomly think it is deleted before we even make it!
391
await delay(FILE_DELETION_GRACE_PERIOD);
392
if (await exists(fullPath)) {
393
return;
394
}
395
// still doesn't exist
396
if (openFiles != null) {
397
logger.debug("checkForFileDeletion: marking as deleted -- ", entry);
398
openFiles.setDeleted(entry.path);
399
await closeDoc(fullPath);
400
// closing a file may cause it to try to save to disk the last version,
401
// so we delete it if that happens.
402
// TODO: add an option to close everywhere to not do this, and/or make
403
// it not save on close if the file doesn't exist.
404
try {
405
if (await exists(fullPath)) {
406
await unlink(fullPath);
407
}
408
} catch {}
409
}
410
}
411
}
412
413
async function watchForFileDeletionLoop() {
414
while (openFiles != null && openFiles.state == "connected") {
415
await delay(FILE_DELETION_CHECK_INTERVAL);
416
if (openFiles?.state != "connected") {
417
return;
418
}
419
const paths = Object.keys(openDocs);
420
if (paths.length == 0) {
421
// logger.debug("watchForFileDeletionLoop: no paths currently open");
422
continue;
423
}
424
// logger.debug(
425
// "watchForFileDeletionLoop: checking",
426
// paths.length,
427
// "currently open paths to see if any were deleted",
428
// );
429
await awaitMap(paths, 20, checkForFileDeletion);
430
}
431
}
432
433
const closeDoc = reuseInFlight(async (path: string) => {
434
logger.debug("close", { path });
435
try {
436
const doc = openDocs[path];
437
if (doc == null) {
438
return;
439
}
440
delete openDocs[path];
441
delete openTimes[path];
442
try {
443
await doc.close();
444
} catch (err) {
445
logger.debug(`WARNING -- issue closing doc -- ${err}`);
446
openFiles?.setError(path, err);
447
}
448
} finally {
449
if (openDocs[path] == null) {
450
openFiles?.setNotBackend(path, compute_server_id);
451
}
452
}
453
});
454
455
const openDoc = reuseInFlight(async (path: string) => {
456
logger.debug("openDoc", { path });
457
try {
458
const doc = openDocs[path];
459
if (doc != null) {
460
return;
461
}
462
openTimes[path] = Date.now();
463
464
if (path.endsWith(".term")) {
465
// terminals are handled directly by the project api -- also since
466
// doctype probably not set for them, they won't end up here.
467
// (this could change though, e.g., we might use doctype to
468
// set the terminal command).
469
return;
470
}
471
472
const client = getClient();
473
let doctype: any = openFiles?.get(path)?.doctype;
474
logger.debug("openDoc: open files table knows ", openFiles?.get(path), {
475
path,
476
});
477
if (doctype == null) {
478
logger.debug("openDoc: doctype must be set but isn't, so bailing", {
479
path,
480
});
481
} else {
482
logger.debug("openDoc: got doctype from openFiles table", {
483
path,
484
doctype,
485
});
486
}
487
488
let syncdoc;
489
if (doctype.type == "string") {
490
syncdoc = new SyncString({
491
...doctype.opts,
492
project_id,
493
path,
494
client,
495
});
496
} else {
497
syncdoc = new SyncDB({
498
...doctype.opts,
499
project_id,
500
path,
501
client,
502
});
503
}
504
openDocs[path] = syncdoc;
505
506
syncdoc.on("error", (err) => {
507
closeDoc(path);
508
openFiles?.setError(path, err);
509
logger.debug(`syncdoc error -- ${err}`, path);
510
});
511
512
// Extra backend support in some cases, e.g., Jupyter, Sage, etc.
513
const ext = filename_extension(path);
514
switch (ext) {
515
case JUPYTER_SYNCDB_EXTENSIONS:
516
logger.debug("initializing Jupyter backend for ", path);
517
await initJupyterRedux(syncdoc, client);
518
const path1 = original_path(syncdoc.get_path());
519
syncdoc.on("closed", async () => {
520
logger.debug("removing Jupyter backend for ", path1);
521
await removeJupyterRedux(path1, project_id);
522
});
523
break;
524
}
525
} finally {
526
if (openDocs[path] != null) {
527
openFiles?.setBackend(path, compute_server_id);
528
}
529
}
530
});
531
532