Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/sync/editor/generic/sync-doc.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45/*6SyncDoc -- the core class for editing with a synchronized document.78This code supports both string-doc and db-doc, for editing both9strings and small database tables efficiently, with history,10undo, save to disk, etc.1112This code is run *both* in browser clients and under node.js13in projects, and behaves slightly differently in each case.1415EVENTS:1617- before-change: fired before merging in changes from upstream18- ... TODO19*/2021/* OFFLINE_THRESH_S - If the client becomes disconnected from22the backend for more than this long then---on reconnect---do23extra work to ensure that all snapshots are up to date (in24case snapshots were made when we were offline), and mark the25sent field of patches that weren't saved. I.e., we rebase26all offline changes. */27const OFFLINE_THRESH_S = 5 * 60; // 5 minutes.2829/* How often the local hub will autosave this file to disk if30it has it open and there are unsaved changes. This is very31important since it ensures that a user that edits a file but32doesn't click "Save" and closes their browser (right after33their edits have gone to the database), still has their34file saved to disk soon. This is important, e.g., for homework35getting collected and not missing the last few changes. It turns36out this is what people expect.37Set to 0 to disable. (But don't do that.) */38const FILE_SERVER_AUTOSAVE_S = 45;39// const FILE_SERVER_AUTOSAVE_S = 5;4041// How big of files we allow users to open using syncstrings.42const MAX_FILE_SIZE_MB = 8;4344// How frequently to check if file is or is not read only.45// The filesystem watcher is NOT sufficient for this, because46// it is NOT triggered on permissions changes. Thus we must47// poll for read only status periodically, unfortunately.48const READ_ONLY_CHECK_INTERVAL_MS = 7500;4950// This parameter determines throttling when broadcasting cursor position51// updates. Make this larger to reduce bandwidth at the expense of making52// cursors less responsive.53const CURSOR_THROTTLE_MS = 750;5455// Ignore file changes for this long after save to disk.56const RECENT_SAVE_TO_DISK_MS = 2000;5758import {59COMPUTE_THRESH_MS,60COMPUTER_SERVER_CURSOR_TYPE,61decodeUUIDtoNum,62SYNCDB_PARAMS as COMPUTE_SERVE_MANAGER_SYNCDB_PARAMS,63} from "@cocalc/util/compute/manager";6465type XPatch = any;6667import { reuseInFlight } from "@cocalc/util/reuse-in-flight";68import { SyncTable } from "@cocalc/sync/table/synctable";69import {70callback2,71cancel_scheduled,72once,73retry_until_success,74reuse_in_flight_methods,75} from "@cocalc/util/async-utils";76import { wait } from "@cocalc/util/async-wait";77import {78auxFileToOriginal,79ISO_to_Date,80assertDefined,81close,82cmp_Date,83endswith,84filename_extension,85hash_string,86is_date,87keys,88minutes_ago,89uuid,90} from "@cocalc/util/misc";91import * as schema from "@cocalc/util/schema";92import { delay } from "awaiting";93import { EventEmitter } from "events";94import { Map, fromJS } from "immutable";95import { debounce, throttle } from "lodash";96import { Evaluator } from "./evaluator";97import { HistoryEntry, HistoryExportOptions, export_history } from "./export";98import { IpywidgetsState } from "./ipywidgets-state";99import { SortedPatchList } from "./sorted-patch-list";100import {101Client,102CompressedPatch,103DocType,104Document,105FileWatcher,106Patch,107} from "./types";108import { patch_cmp } from "./util";109110export type State = "init" | "ready" | "closed";111export type DataServer = "project" | "database";112113export interface SyncOpts0 {114project_id: string;115path: string;116client: Client;117patch_interval?: number;118119// file_use_interval defaults to 60000.120// Specify 0 to disable.121file_use_interval?: number;122123string_id?: string;124cursors?: boolean;125change_throttle?: number;126127// persistent backend session in project, so only close128// backend when explicitly requested:129persistent?: boolean;130131// If true, entire sync-doc is assumed ephemeral, in the132// sense that no edit history gets saved via patches to133// the database. The one syncstring record for coordinating134// users does get created in the database.135ephemeral?: boolean;136137// which data/changefeed server to use138data_server?: DataServer;139}140141export interface SyncOpts extends SyncOpts0 {142from_str: (str: string) => Document;143doctype: DocType;144}145146export interface UndoState {147my_times: Date[];148pointer: number;149without: Date[];150final?: CompressedPatch;151}152153export class SyncDoc extends EventEmitter {154public readonly project_id: string; // project_id that contains the doc155public readonly path: string; // path of the file corresponding to the doc156private string_id: string;157private my_user_id: number;158159// This id is used for equality test and caching.160private id: string = uuid();161162private client: Client;163private _from_str: (str: string) => Document; // creates a doc from a string.164165// Throttling of incoming upstream patches from project to client.166private patch_interval: number = 250;167168// This is what's actually output by setInterval -- it's169// not an amount of time.170private fileserver_autosave_timer: number = 0;171172private read_only_timer: number = 0;173174// throttling of change events -- e.g., is useful for course175// editor where we have hundreds of changes and the UI gets176// overloaded unless we throttle and group them.177private change_throttle: number = 0;178179// file_use_interval throttle: default is 60s for everything180private file_use_interval: number;181private throttled_file_use?: Function;182183private cursors: boolean = false; // if true, also provide cursor tracking functionality184private cursor_map: Map<string, any> = Map();185private cursor_last_time: Date = new Date(0);186187// doctype: object describing document constructor188// (used by project to open file)189private doctype: DocType;190191private state: State = "init";192193private syncstring_table: SyncTable;194private patches_table: SyncTable;195private cursors_table: SyncTable;196197public evaluator?: Evaluator;198199public ipywidgets_state?: IpywidgetsState;200201private patch_list?: SortedPatchList;202203private last: Document;204private doc: Document;205private before_change?: Document;206207private last_user_change: Date = minutes_ago(60);208private last_save_to_disk_time: Date = new Date(0);209210private last_snapshot: Date | undefined;211private snapshot_interval: number;212213private users: string[];214215private settings: Map<string, any> = Map();216217private syncstring_save_state: string = "";218private load_full_history_done: boolean = false;219220// patches that this client made during this editing session.221private my_patches: { [time: string]: XPatch } = {};222223private watch_path?: string;224private file_watcher?: FileWatcher;225226private handle_patch_update_queue_running: boolean;227private patch_update_queue: string[] = [];228229private undo_state: UndoState | undefined;230231private save_patch_prev: Date | undefined;232233private save_to_disk_start_ctime: number | undefined;234private save_to_disk_end_ctime: number | undefined;235236private persistent: boolean = false;237public readonly data_server: DataServer = "project";238239private last_has_unsaved_changes?: boolean = undefined;240241private ephemeral: boolean = false;242243private sync_is_disabled: boolean = false;244private delay_sync_timer: any;245246// static because we want exactly one across all docs!247private static computeServerManagerDoc?: SyncDoc;248249constructor(opts: SyncOpts) {250super();251if (opts.string_id === undefined) {252this.string_id = schema.client_db.sha1(opts.project_id, opts.path);253} else {254this.string_id = opts.string_id;255}256257for (const field of [258"project_id",259"path",260"client",261"patch_interval",262"file_use_interval",263"change_throttle",264"cursors",265"doctype",266"from_patch_str",267"persistent",268"data_server",269"ephemeral",270]) {271if (opts[field] != undefined) {272this[field] = opts[field];273}274}275if (this.ephemeral) {276// So the doctype written to the database reflects the277// ephemeral state. Here ephemeral determines whether278// or not patches are written to the database by the279// project.280this.doctype.opts = { ...this.doctype.opts, ephemeral: true };281}282if (this.cursors) {283// similarly to ephemeral, but for cursors. We track them284// on the backend since they can also be very useful, e.g.,285// with jupyter they are used for connecting remote compute,286// and **should** also be used for broadcasting load and other287// status information (TODO).288this.doctype.opts = { ...this.doctype.opts, cursors: true };289}290this._from_str = opts.from_str;291292// Initialize to time when we create the syncstring, so we don't293// see our own cursor when we refresh the browser (before we move294// to update this).295this.cursor_last_time = this.client?.server_time();296297reuse_in_flight_methods(this, [298"save",299"save_to_disk",300"load_from_disk",301"handle_patch_update_queue",302]);303304if (this.change_throttle) {305this.emit_change = throttle(this.emit_change, this.change_throttle);306}307308this.setMaxListeners(100);309310this.init();311}312313/*314Initialize everything.315This should be called *exactly* once by the constructor,316and no other time. It tries to set everything up. If317the browser isn't connected to the network, it'll wait318until it is (however long, etc.). If this fails, it closes319this SyncDoc.320*/321private async init(): Promise<void> {322this.assert_not_closed("init");323const log = this.dbg("init");324325log("initializing all tables...");326try {327//const t0 = new Date();328await this.init_all();329//console.log( // TODO remove at some point.330// `time to open file ${this.path}: ${Date.now() - t0.valueOf()}`331//);332} catch (err) {333if (this.state == "closed") {334return;335}336log(`WARNING -- error initializing ${err}`);337// completely normal that this could happen on frontend - it just means338// that we closed the file before finished opening it...339if (this.state != ("closed" as State)) {340log(341"Error -- NOT caused by closing during the init_all, so we report it.",342);343this.emit("error", err);344}345await this.close();346return;347}348349// Success -- everything perfectly initialized with no issues.350this.set_state("ready");351this.init_watch();352this.emit_change(); // from nothing to something.353}354355// True if this client is responsible for managing356// the state of this document with respect to357// the file system. By default, the project is responsible,358// but it could be something else (e.g., a compute server!). It's359// important that whatever algorithm determines this, it is360// a function of state that is eventually consistent.361// IMPORTANT: whether or not we are the file server can362// change over time, so if you call isFileServer and363// set something up (e.g., autosave or a watcher), based364// on the result, you need to clear it when the state365// changes. See the function handleComputeServerManagerChange.366private isFileServer = reuseInFlight(async () => {367if (this.client.is_browser()) {368// browser is never the file server (yet), and doesn't need to do369// anything related to watching for changes in state.370// Someday via webassembly or browsers making users files availabl,371// etc., we will have this. Not today.372return false;373}374const computeServerManagerDoc = this.getComputeServerManagerDoc();375const log = this.dbg("isFileServer");376if (computeServerManagerDoc == null) {377log("not using compute server manager for this doc");378return this.client.is_project();379}380381const state = computeServerManagerDoc.get_state();382log("compute server manager doc state: ", state);383if (state == "closed") {384log("compute server manager is closed");385// something really messed up386return this.client.is_project();387}388if (state != "ready") {389try {390log(391"waiting for compute server manager doc to be ready; current state=",392state,393);394await once(computeServerManagerDoc, "ready", 15000);395log("compute server manager is ready");396} catch (err) {397log(398"WARNING -- failed to initialize computeServerManagerDoc -- err=",399err,400);401return this.client.is_project();402}403}404405// id of who the user *wants* to be the file server.406const path = this.getFileServerPath();407const fileServerId =408computeServerManagerDoc.get_one({ path })?.get("id") ?? 0;409if (this.client.is_project()) {410log(411"we are project, so we are fileserver if fileServerId=0 and it is ",412fileServerId,413);414return fileServerId == 0;415}416// at this point we have to be a compute server417const computeServerId = decodeUUIDtoNum(this.client.client_id());418// this is usually true -- but might not be if we are switching419// directly from one compute server to another.420log("we are compute server and ", { fileServerId, computeServerId });421return fileServerId == computeServerId;422});423424private getFileServerPath = () => {425if (this.path?.endsWith(".sage-jupyter2")) {426// treating jupyter as a weird special case here.427return auxFileToOriginal(this.path);428}429return this.path;430};431432private getComputeServerManagerDoc = () => {433if (this.path == COMPUTE_SERVE_MANAGER_SYNCDB_PARAMS.path) {434// don't want to recursively explode!435return null;436}437if (SyncDoc.computeServerManagerDoc == null) {438if (this.client.is_project()) {439// @ts-ignore: TODO!440SyncDoc.computeServerManagerDoc = this.client.syncdoc({441path: COMPUTE_SERVE_MANAGER_SYNCDB_PARAMS.path,442});443} else {444// @ts-ignore: TODO!445SyncDoc.computeServerManagerDoc = this.client.sync_client.sync_db({446project_id: this.project_id,447...COMPUTE_SERVE_MANAGER_SYNCDB_PARAMS,448});449}450if (451SyncDoc.computeServerManagerDoc != null &&452!this.client.is_browser()453) {454// start watching for state changes455SyncDoc.computeServerManagerDoc.on(456"change",457this.handleComputeServerManagerChange,458);459}460}461return SyncDoc.computeServerManagerDoc;462};463464private handleComputeServerManagerChange = async (keys) => {465if (SyncDoc.computeServerManagerDoc == null) {466return;467}468let relevant = false;469for (const key of keys ?? []) {470if (key.get("path") == this.path) {471relevant = true;472break;473}474}475if (!relevant) {476return;477}478const path = this.getFileServerPath();479const fileServerId =480SyncDoc.computeServerManagerDoc.get_one({ path })?.get("id") ?? 0;481const ourId = this.client.is_project()482? 0483: decodeUUIDtoNum(this.client.client_id());484// we are considering ourself the file server already if we have485// either a watcher or autosave on.486const thinkWeAreFileServer =487this.file_watcher != null || this.fileserver_autosave_timer;488const weAreFileServer = fileServerId == ourId;489if (thinkWeAreFileServer != weAreFileServer) {490// life has changed! Let's adapt.491if (thinkWeAreFileServer) {492// we were acting as the file server, but now we are not.493await this.save_to_disk_filesystem_owner();494// Stop doing things we are no longer supposed to do.495clearInterval(this.fileserver_autosave_timer as any);496this.fileserver_autosave_timer = 0;497// stop watching filesystem498await this.update_watch_path();499} else {500// load our state from the disk501await this.load_from_disk();502// we were not acting as the file server, but now we need. Let's503// step up to the plate.504// start watching filesystem505await this.update_watch_path(this.path);506// enable autosave507await this.init_file_autosave();508}509}510};511512// Return id of ACTIVE remote compute server, if one is connected and pinging, or 0513// if none is connected. This is used by Jupyter to determine who514// should evaluate code.515// We always take the smallest id of the remote516// compute servers, in case there is more than one, so exactly one of them517// takes control. Always returns 0 if cursors are not enabled for this518// document, since the cursors table is used to coordinate the compute519// server.520getComputeServerId = (): number => {521if (!this.cursors) {522return 0;523}524// This info is in the "cursors" table instead of the document itself525// to avoid wasting space in the database longterm. Basically a remote526// Jupyter client that can provide compute announces this by reporting it's527// cursor to look a certain way.528const cursors = this.get_cursors({529maxAge: COMPUTE_THRESH_MS,530// don't exclude self since getComputeServerId called from the compute531// server also to know if it is the chosen one.532excludeSelf: "never",533});534const dbg = this.dbg("getComputeServerId");535dbg("num cursors = ", cursors.size);536let minId = Infinity;537// NOTE: similar code is in frontend/jupyter/cursor-manager.ts538for (const [client_id, cursor] of cursors) {539if (cursor.getIn(["locs", 0, "type"]) == COMPUTER_SERVER_CURSOR_TYPE) {540try {541minId = Math.min(minId, decodeUUIDtoNum(client_id));542} catch (err) {543// this should never happen unless a client were being malicious.544dbg(545"WARNING -- client_id should encode server id, but is",546client_id,547);548}549}550}551552return isFinite(minId) ? minId : 0;553};554555registerAsComputeServer = () => {556this.setCursorLocsNoThrottle([{ type: COMPUTER_SERVER_CURSOR_TYPE }]);557};558559/* Set this user's cursors to the given locs. */560setCursorLocsNoThrottle = async (561// locs is 'any' and not any[] because of a codemirror syntax highlighting bug!562locs: any,563side_effect: boolean = false,564) => {565if (this.state != "ready") {566return;567}568if (this.cursors_table == null) {569if (!this.cursors) {570throw Error("cursors are not enabled");571}572// table not initialized yet573return;574}575const x: {576string_id: string;577user_id: number;578locs: any[];579time?: Date;580} = {581string_id: this.string_id,582user_id: this.my_user_id,583locs,584};585const now = this.client.server_time();586if (!side_effect || (x.time ?? now) >= now) {587// the now comparison above is in case the cursor time588// is in the future (due to clock issues) -- always fix that.589x.time = now;590}591if (x.time != null) {592// will actually always be non-null due to above593this.cursor_last_time = x.time;594}595this.cursors_table.set(x, "none");596await this.cursors_table.save();597};598599set_cursor_locs = throttle(this.setCursorLocsNoThrottle, CURSOR_THROTTLE_MS, {600leading: true,601trailing: true,602});603604private init_file_use_interval(): void {605if (this.file_use_interval == null) {606this.file_use_interval = 60 * 1000;607}608609if (!this.file_use_interval || !this.client.is_browser()) {610// file_use_interval has to be nonzero, and we only do611// this for browser user.612return;613}614615const file_use = async () => {616await delay(100); // wait a little so my_patches and gets updated.617// We ONLY count this and record that the file was618// edited if there was an actual change record in the619// patches log, by this user, since last time.620let user_is_active: boolean = false;621for (const tm in this.my_patches) {622if (new Date(parseInt(tm)) > this.last_user_change) {623user_is_active = true;624break;625}626}627if (!user_is_active) {628return;629}630this.last_user_change = new Date();631this.client.mark_file({632project_id: this.project_id,633path: this.path,634action: "edit",635ttl: this.file_use_interval,636});637};638this.throttled_file_use = throttle(file_use, this.file_use_interval, {639leading: true,640});641642this.on("user-change", this.throttled_file_use as any);643}644645private set_state(state: State): void {646this.state = state;647this.emit(state);648}649650public get_state = (): State => {651return this.state;652};653654public get_project_id = (): string => {655return this.project_id;656};657658public get_path = (): string => {659return this.path;660};661662public get_string_id = (): string => {663return this.string_id;664};665666public get_my_user_id = (): number => {667return this.my_user_id != null ? this.my_user_id : 0;668};669670// This gets used by clients that are connected to a backend671// with state in the project (e.g., jupyter). Basically this672// is a special websocket channel just for this syncdoc, which673// uses the cursors table.674public sendMessageToProject = async (data) => {675const send = this.patches_table?.sendMessageToProject;676if (send == null || this.patches_table.channel == null) {677throw Error("sending messages to project not available");678}679if (!this.patches_table.channel.is_connected()) {680await once(this.patches_table.channel, "connected");681}682send(data);683};684685private assert_not_closed(desc: string): void {686if (this.state === "closed") {687//console.trace();688throw Error(`must not be closed -- ${desc}`);689}690}691692public set_doc = (doc: Document, exit_undo_mode: boolean = true): void => {693if (doc.is_equal(this.doc)) {694// no change.695return;696}697if (exit_undo_mode) this.undo_state = undefined;698// console.log(`sync-doc.set_doc("${doc.to_str()}")`);699this.doc = doc;700701// debounced, so don't immediately alert, in case there are many702// more sets comming in the same loop:703this.emit_change_debounced();704};705706// Convenience function to avoid having to do707// get_doc and set_doc constantly.708public set = (x: any): void => {709this.set_doc(this.doc.set(x));710};711712public delete = (x?: any): void => {713this.set_doc(this.doc.delete(x));714};715716public get = (x?: any): any => {717return this.doc.get(x);718};719720public get_one(x?: any): any {721return this.doc.get_one(x);722}723724// Return underlying document, or undefined if document725// hasn't been set yet.726public get_doc = (): Document => {727if (this.doc == null) {728throw Error("doc must be set");729}730return this.doc;731};732733// Set this doc from its string representation.734public from_str = (value: string): void => {735// console.log(`sync-doc.from_str("${value}")`);736this.doc = this._from_str(value);737};738739// Return string representation of this doc,740// or exception if not yet ready.741public to_str = (): string => {742if (this.doc == null) {743throw Error("doc must be set");744}745return this.doc.to_str();746};747748public count = (): number => {749return this.doc.count();750};751752// Version of the document at a given point in time; if no753// time specified, gives the version right now.754// If not fully initialized, will throw exception.755public version = (time?: Date): Document => {756this.assert_table_is_ready("patches");757assertDefined(this.patch_list);758return this.patch_list.value(time);759};760761/* Compute version of document if the patches at the given times762were simply not included. This is a building block that is763used for implementing undo functionality for client editors. */764public version_without = (times: Date[]): Document => {765this.assert_table_is_ready("patches");766assertDefined(this.patch_list);767return this.patch_list.value(undefined, undefined, times);768};769770// Revert document to what it was at the given point in time.771// There doesn't have to be a patch at exactly that point in772// time -- if there isn't it just uses the patch before that773// point in time.774public revert = (time: Date): void => {775this.set_doc(this.version(time));776};777778/* Undo/redo public api.779Calling this.undo and this.redo returns the version of780the document after the undo or redo operation, and records781a commit changing to that.782The first time calling this.undo switches into undo783state in which additional784calls to undo/redo move up and down the stack of changes made785by this user during this session.786787Call this.exit_undo_mode() to exit undo/redo mode.788789Undo and redo *only* impact changes made by this user during790this session. Other users edits are unaffected, and work by791this same user working from another browser tab or session is792also unaffected.793794Finally, undo of a past patch by definition means "the state795of the document" if that patch was not applied. The impact796of undo is NOT that the patch is removed from the patch history.797Instead, it records a new patch that is what would have happened798had we replayed history with the patches being undone not there.799800Doing any set_doc explicitly exits undo mode automatically.801*/802public undo = (): Document => {803const prev = this._undo();804this.set_doc(prev, false);805this.commit();806return prev;807};808809public redo = (): Document => {810const next = this._redo();811this.set_doc(next, false);812this.commit();813return next;814};815816private _undo(): Document {817this.assert_is_ready("_undo");818let state = this.undo_state;819if (state == null) {820// not in undo mode821state = this.undo_state = this.init_undo_state();822}823if (state.pointer === state.my_times.length) {824// pointing at live state (e.g., happens on entering undo mode)825const value: Document = this.version(); // last saved version826const live: Document = this.doc;827if (!live.is_equal(value)) {828// User had unsaved changes, so last undo is to revert to version without those.829state.final = value.make_patch(live); // live redo if needed830state.pointer -= 1; // most recent timestamp831return value;832} else {833// User had no unsaved changes, so last undo is version without last saved change.834const tm = state.my_times[state.pointer - 1];835state.pointer -= 2;836if (tm != null) {837state.without.push(tm);838return this.version_without(state.without);839} else {840// no undo information during this session841return value;842}843}844} else {845// pointing at particular timestamp in the past846if (state.pointer >= 0) {847// there is still more to undo848state.without.push(state.my_times[state.pointer]);849state.pointer -= 1;850}851return this.version_without(state.without);852}853}854855private _redo(): Document {856this.assert_is_ready("_redo");857const state = this.undo_state;858if (state == null) {859// nothing to do but return latest live version860return this.get_doc();861}862if (state.pointer === state.my_times.length) {863// pointing at live state -- nothing to do864return this.get_doc();865} else if (state.pointer === state.my_times.length - 1) {866// one back from live state, so apply unsaved patch to live version867const value = this.version();868if (value == null) {869// see remark in undo -- do nothing870return this.get_doc();871}872state.pointer += 1;873return value.apply_patch(state.final);874} else {875// at least two back from live state876state.without.pop();877state.pointer += 1;878if (state.final == null && state.pointer === state.my_times.length - 1) {879// special case when there wasn't any live change880state.pointer += 1;881}882return this.version_without(state.without);883}884}885886public in_undo_mode = (): boolean => {887return this.undo_state != null;888};889890public exit_undo_mode = (): void => {891this.undo_state = undefined;892};893894private init_undo_state(): UndoState {895if (this.undo_state != null) {896return this.undo_state;897}898const my_times = keys(this.my_patches).map((x) => new Date(parseInt(x)));899my_times.sort(cmp_Date);900return (this.undo_state = {901my_times,902pointer: my_times.length,903without: [],904});905}906907private save_to_disk_autosave = async (): Promise<void> => {908if (this.state !== "ready") {909return;910}911const dbg = this.dbg("save_to_disk_autosave");912dbg();913try {914await this.save_to_disk();915} catch (err) {916dbg(`failed -- ${err}`);917}918};919920/* Make it so the local hub project will automatically save921the file to disk periodically. */922private async init_file_autosave() {923// Do not autosave sagews until we resolve924// https://github.com/sagemathinc/cocalc/issues/974925// Similarly, do not autosave ipynb because of926// https://github.com/sagemathinc/cocalc/issues/5216927if (928!FILE_SERVER_AUTOSAVE_S ||929!(await this.isFileServer()) ||930this.fileserver_autosave_timer ||931endswith(this.path, ".sagews") ||932endswith(this.path, ".ipynb.sage-jupyter2")933) {934return;935}936937// Explicit cast due to node vs browser typings.938this.fileserver_autosave_timer = <any>(939setInterval(this.save_to_disk_autosave, FILE_SERVER_AUTOSAVE_S * 1000)940);941}942943// account_id of the user who made the edit at944// the given point in time.945public account_id = (time: Date): string => {946this.assert_is_ready("account_id");947return this.users[this.user_id(time)];948};949950/* Approximate time when patch with given timestamp was951actually sent to the server; returns undefined if time952sent is approximately the timestamp time. Only defined953when there is a significant difference, due to editing954when offline! */955public time_sent = (time: Date): Date | undefined => {956this.assert_table_is_ready("patches");957assertDefined(this.patch_list);958return this.patch_list.time_sent(time);959};960961// Integer index of user who made the edit at given962// point in time.963public user_id = (time: Date): number => {964this.assert_table_is_ready("patches");965assertDefined(this.patch_list);966return this.patch_list.user_id(time);967};968969private syncstring_table_get_one(): Map<string, any> {970if (this.syncstring_table == null) {971throw Error("syncstring_table must be defined");972}973const t = this.syncstring_table.get_one();974if (t == null) {975// project has not initialized it yet.976return Map();977}978return t;979}980981/* The project calls set_initialized once it has checked for982the file on disk; this way the frontend knows that the983syncstring has been initialized in the database, and also984if there was an error doing the check.985*/986private async set_initialized(987error: string,988read_only: boolean,989size: number,990): Promise<void> {991this.assert_table_is_ready("syncstring");992this.dbg("set_initialized")({ error, read_only, size });993const init = { time: this.client.server_time(), size, error };994await this.set_syncstring_table({995init,996read_only,997last_active: this.client.server_time(),998});999}10001001/* List of timestamps of the versions of this string in the sync1002table that we opened to start editing (so starts with what was1003the most recent snapshot when we started). The list of timestamps1004is sorted from oldest to newest. */1005public versions = (): Date[] => {1006this.assert_table_is_ready("patches");1007const v: Date[] = [];1008const s: Map<string, any> | undefined = this.patches_table.get();1009if (s == null) {1010// shouldn't happen do to assert_is_ready above.1011throw Error("patches_table must be initialized");1012}1013s.map((x, _) => {1014v.push(x.get("time"));1015});1016v.sort(cmp_Date);1017return v;1018};10191020/* List of all known timestamps of versions of this string, including1021possibly much older versions than returned by this.versions(), in1022case the full history has been loaded. The list of timestamps1023is sorted from oldest to newest. */1024public all_versions = (): Date[] => {1025this.assert_table_is_ready("patches");1026assertDefined(this.patch_list);1027return this.patch_list.versions();1028};10291030public last_changed = (): Date => {1031const v = this.versions();1032if (v.length > 0) {1033return v[v.length - 1];1034} else {1035return new Date(0);1036}1037};10381039private init_table_close_handlers(): void {1040for (const x of ["syncstring", "patches", "cursors"]) {1041const t = this[x + "_table"];1042if (t != null) {1043t.on("close", () => this.close());1044}1045}1046}10471048// Close synchronized editing of this string; this stops listening1049// for changes and stops broadcasting changes.1050public close = reuseInFlight(async () => {1051if (this.state == "closed") {1052return;1053}1054const dbg = this.dbg("close");1055dbg("close");1056if (this.client.is_browser() && this.state == "ready") {1057try {1058await this.save_to_disk();1059} catch (err) {1060// has to be non-fatal since we are closing the document,1061// and of couse we need to clear up everything else.1062// Do nothing here.1063}1064}1065SyncDoc.computeServerManagerDoc?.removeListener(1066"change",1067this.handleComputeServerManagerChange,1068);1069//1070// SYNC STUFF1071//10721073// WARNING: that 'closed' is emitted at the beginning of the1074// close function (before anything async) for the project is1075// assumed in src/packages/project/sync/sync-doc.ts, because1076// that ensures that the moment close is called we lock trying1077// try create the syncdoc again until closing is finished.1078// (This set_state call emits "closed"):1079this.set_state("closed");10801081this.emit("close");10821083// must be after the emits above, so clients know1084// what happened and can respond.1085this.removeAllListeners();10861087if (this.throttled_file_use != null) {1088// Cancel any pending file_use calls.1089cancel_scheduled(this.throttled_file_use);1090(this.throttled_file_use as any).cancel();1091}10921093if (this.emit_change != null) {1094// Cancel any pending change emit calls.1095cancel_scheduled(this.emit_change);1096}10971098if (this.fileserver_autosave_timer) {1099clearInterval(this.fileserver_autosave_timer as any);1100this.fileserver_autosave_timer = 0;1101}11021103if (this.read_only_timer) {1104clearInterval(this.read_only_timer as any);1105this.read_only_timer = 0;1106}11071108this.patch_update_queue = [];11091110// Stop watching for file changes. It's important to1111// do this *before* all the await's below, since1112// this syncdoc can't do anything in response to a1113// a file change in its current state.1114this.update_watch_path(); // no input = closes it, if open11151116if (this.patch_list != null) {1117// not async -- just a data structure in memory1118this.patch_list.close();1119}11201121//1122// ASYNC STUFF - in particular, these may all1123// attempt to do some last attempt to send changes1124// to the database.1125//1126try {1127await this.async_close();1128dbg("async_close -- successfully saved all data to database");1129} catch (err) {1130dbg("async_close -- ERROR -- ", err);1131}1132// this avoids memory leaks:1133close(this);11341135// after doing that close, we need to keep the state (which just got deleted) as 'closed'1136this.set_state("closed");1137dbg("close done");1138});11391140private async async_close() {1141const promises: Promise<any>[] = [];11421143if (this.syncstring_table != null) {1144promises.push(this.syncstring_table.close());1145}11461147if (this.patches_table != null) {1148promises.push(this.patches_table.close());1149}11501151if (this.cursors_table != null) {1152promises.push(this.cursors_table.close());1153}11541155if (this.evaluator != null) {1156promises.push(this.evaluator.close());1157}11581159if (this.ipywidgets_state != null) {1160promises.push(this.ipywidgets_state.close());1161}11621163const results = await Promise.allSettled(promises);11641165results.forEach((result) => {1166if (result.status === "rejected") {1167throw Error(result.reason);1168}1169});1170}11711172// TODO: We **have** to do this on the client, since the backend1173// **security model** for accessing the patches table only1174// knows the string_id, but not the project_id/path. Thus1175// there is no way currently to know whether or not the client1176// has access to the patches, and hence the patches table1177// query fails. This costs significant time -- a roundtrip1178// and write to the database -- whenever the user opens a file.1179// This fix should be to change the patches schema somehow1180// to have the user also provide the project_id and path, thus1181// proving they have access to the sha1 hash (string_id), but1182// don't actually use the project_id and path as columns in1183// the table. This requires some new idea I guess of virtual1184// fields....1185// Also, this also establishes the correct doctype.11861187// Since this MUST succeed before doing anything else. This is critical1188// because the patches table can't be opened anywhere if the syncstring1189// object doesn't exist, due to how our security works, *AND* that the1190// patches table uses the string_id, which is a SHA1 hash.1191private async ensure_syncstring_exists_in_db(): Promise<void> {1192const dbg = this.dbg("ensure_syncstring_exists_in_db");11931194if (!this.client.is_connected()) {1195dbg("wait until connected...", this.client.is_connected());1196await once(this.client, "connected");1197}11981199if (this.client.is_browser() && !this.client.is_signed_in()) {1200// the browser has to sign in, unlike the project (and compute servers)1201await once(this.client, "signed_in");1202}12031204if (this.state == ("closed" as State)) return;12051206dbg("do syncstring write query...");12071208await callback2(this.client.query, {1209query: {1210syncstrings: {1211string_id: this.string_id,1212project_id: this.project_id,1213path: this.path,1214doctype: JSON.stringify(this.doctype),1215},1216},1217});1218dbg("wrote syncstring to db - done.");1219}12201221private async synctable(1222query,1223options: any[],1224throttle_changes?: undefined | number,1225): Promise<SyncTable> {1226this.assert_not_closed("synctable");1227const dbg = this.dbg("synctable");1228if (!this.ephemeral && this.persistent && this.data_server == "project") {1229// persistent table in a non-ephemeral syncdoc, so ensure that table is1230// persisted to database (not just in memory).1231options = options.concat([{ persistent: true }]);1232}1233if (this.ephemeral && this.data_server == "project") {1234options.push({ ephemeral: true });1235}1236let synctable;1237switch (this.data_server) {1238case "project":1239synctable = await this.client.synctable_project(1240this.project_id,1241query,1242options,1243throttle_changes,1244this.id,1245);1246break;1247case "database":1248synctable = await this.client.synctable_database(1249query,1250options,1251throttle_changes,1252);1253break;1254default:1255throw Error(`uknown server ${this.data_server}`);1256}1257// We listen and log error events. This is useful because in some settings, e.g.,1258// in the project, an eventemitter with no listener for errors, which has an error,1259// will crash the entire process.1260synctable.on("error", (error) => dbg("ERROR", error));1261return synctable;1262}12631264private async init_syncstring_table(): Promise<void> {1265const query = {1266syncstrings: [1267{1268string_id: this.string_id,1269project_id: this.project_id,1270path: this.path,1271users: null,1272last_snapshot: null,1273snapshot_interval: null,1274save: null,1275last_active: null,1276init: null,1277read_only: null,1278last_file_change: null,1279doctype: null,1280archived: null,1281settings: null,1282},1283],1284};1285const dbg = this.dbg("init_syncstring_table");12861287dbg("getting table...");1288this.syncstring_table = await this.synctable(query, []);1289if (this.ephemeral && this.client.is_project()) {1290await this.set_syncstring_table({1291doctype: JSON.stringify(this.doctype),1292});1293} else {1294dbg("waiting for, then handling the first update...");1295await this.handle_syncstring_update();1296}1297this.syncstring_table.on(1298"change",1299this.handle_syncstring_update.bind(this),1300);13011302// Wait until syncstring is not archived -- if we open an1303// older syncstring, the patches may be archived,1304// and we have to wait until1305// after they have been pulled from blob storage before1306// we init the patch table, load from disk, etc.1307const is_not_archived: () => boolean = () => {1308const ss = this.syncstring_table_get_one();1309if (ss != null) {1310return !ss.get("archived");1311} else {1312return false;1313}1314};1315dbg("waiting for syncstring to be not archived");1316await this.syncstring_table.wait(is_not_archived, 120);1317}13181319// Used for internal debug logging1320private dbg = (f: string = ""): Function => {1321return this.client?.dbg(`SyncDoc('${this.path}').${f}`);1322};13231324private async init_all(): Promise<void> {1325if (this.state !== "init") {1326throw Error("connect can only be called in init state");1327}1328const log = this.dbg("init_all");13291330log("ensure syncstring exists in database");1331this.assert_not_closed("init_all -- before ensuring syncstring exists");1332await this.ensure_syncstring_exists_in_db();13331334log("syncstring_table");1335this.assert_not_closed("init_all -- before init_syncstring_table");1336await this.init_syncstring_table();13371338log("patch_list, cursors, evaluator, ipywidgets");1339this.assert_not_closed(1340"init_all -- before init patch_list, cursors, evaluator, ipywidgets",1341);1342await Promise.all([1343this.init_patch_list(),1344this.init_cursors(),1345this.init_evaluator(),1346this.init_ipywidgets(),1347]);1348this.assert_not_closed("init_all -- after init patch_list");13491350this.init_table_close_handlers();13511352log("file_use_interval");1353this.init_file_use_interval();13541355if (await this.isFileServer()) {1356log("load_from_disk");1357// This sets initialized, which is needed to be fully ready.1358// We keep trying this load from disk until sync-doc is closed1359// or it succeeds. It may fail if, e.g., the file is too1360// large or is not readable by the user. They are informed to1361// fix the problem... and once they do (and wait up to 10s),1362// this will finish.1363// if (!this.client.is_browser() && !this.client.is_project()) {1364// // FAKE DELAY!!! Just to simulate flakiness / slow network!!!!1365// await delay(10000);1366// }1367await retry_until_success({1368f: this.init_load_from_disk,1369max_delay: 10000,1370desc: "syncdoc -- load_from_disk",1371});1372log("done loading from disk");1373this.assert_not_closed("init_all -- load from disk");1374}13751376log("wait_until_fully_ready");1377await this.wait_until_fully_ready();13781379this.assert_not_closed("init_all -- after waiting until fully ready");13801381if (await this.isFileServer()) {1382log("init file autosave");1383this.init_file_autosave();1384}1385this.update_has_unsaved_changes();1386log("done");1387}13881389private init_error(): string | undefined {1390let x;1391try {1392x = this.syncstring_table.get_one();1393} catch (_err) {1394// if the table hasn't been initialized yet,1395// it can't be in error state.1396return undefined;1397}1398return x?.get("init")?.get("error");1399}14001401// wait until the syncstring table is ready to be1402// used (so extracted from archive, etc.),1403private async wait_until_fully_ready(): Promise<void> {1404this.assert_not_closed("wait_until_fully_ready");1405const dbg = this.dbg("wait_until_fully_ready");1406dbg();14071408if (this.client.is_browser() && this.init_error()) {1409// init is set and is in error state. Give the backend a few seconds1410// to try to fix this error before giving up. The browser client1411// can close and open the file to retry this (as instructed).1412try {1413await this.syncstring_table.wait(() => !this.init_error(), 5);1414} catch (err) {1415// fine -- let the code below deal with this problem...1416}1417}14181419const is_init_and_not_archived = (t: SyncTable) => {1420this.assert_not_closed("is_init_and_not_archived");1421const tbl = t.get_one();1422if (tbl == null) {1423dbg("null");1424return false;1425}1426// init must be set in table and archived must NOT be1427// set, so patches are loaded from blob store.1428const init = tbl.get("init");1429if (init && !tbl.get("archived")) {1430dbg("good to go");1431return init.toJS();1432} else {1433dbg("not init yet");1434return false;1435}1436};1437dbg("waiting for init...");1438const init = await this.syncstring_table.wait(1439is_init_and_not_archived.bind(this),14400,1441);1442dbg("init done");1443if (init.error) {1444throw Error(init.error);1445}14461447assertDefined(this.patch_list);1448if (1449!this.client.is_project() &&1450this.patch_list.count() === 0 &&1451init.size1452) {1453dbg("waiting for patches for nontrivial file");1454// normally this only happens in a later event loop,1455// so force it now.1456dbg("handling patch update queue since", this.patch_list.count());1457await this.handle_patch_update_queue();1458assertDefined(this.patch_list);1459dbg("done handling, now ", this.patch_list.count());1460if (this.patch_list.count() === 0) {1461// wait for a change -- i.e., project loading the file from1462// disk and making available... Because init.size > 0, we know that1463// there must be SOMETHING in the patches table once initialization is done.1464// This is the root cause of https://github.com/sagemathinc/cocalc/issues/23821465await once(this.patches_table, "change");1466dbg("got patches_table change");1467await this.handle_patch_update_queue();1468dbg("handled update queue");1469}1470}1471this.emit("init");1472}14731474private assert_table_is_ready(table: string): void {1475const t = this[table + "_table"]; // not using string template only because it breaks codemirror!1476if (t == null || t.get_state() != "connected") {1477throw Error(1478`Table ${table} must be connected. string_id=${this.string_id}`,1479);1480}1481}14821483public assert_is_ready = (desc: string): void => {1484if (this.state != "ready") {1485throw Error(`must be ready -- ${desc}`);1486}1487};14881489public wait_until_ready = async (): Promise<void> => {1490this.assert_not_closed("wait_until_ready");1491if (this.state !== ("ready" as State)) {1492// wait for a state change to ready.1493await once(this, "ready");1494}1495};14961497/* Calls wait for the corresponding patches SyncTable, if1498it has been defined. If it hasn't been defined, it waits1499until it is defined, then calls wait. Timeout only starts1500when patches_table is already initialized.1501*/1502public wait = async (until: Function, timeout: number = 30): Promise<any> => {1503await this.wait_until_ready();1504//console.trace("SYNC WAIT -- start...");1505const result = await wait({1506obj: this,1507until,1508timeout,1509change_event: "change",1510});1511//console.trace("SYNC WAIT -- got it!");1512return result;1513};15141515/* Delete the synchronized string and **all** patches from the database1516-- basically delete the complete history of editing this file.1517WARNINGS:1518(1) If a project has this string open, then things may be messed1519up, unless that project is restarted.1520(2) Only available for an **admin** user right now!15211522To use: from a javascript console in the browser as admin, do:15231524await smc.client.sync_string({1525project_id:'9f2e5869-54b8-4890-8828-9aeba9a64af4',1526path:'a.txt'}).delete_from_database()15271528Then make sure project and clients refresh.15291530WORRY: Race condition where constructor might write stuff as1531it is being deleted?1532*/1533public delete_from_database = async (): Promise<void> => {1534const queries: object[] = this.ephemeral1535? []1536: [1537{1538patches_delete: {1539id: [this.string_id],1540dummy: null,1541},1542},1543];1544queries.push({1545syncstrings_delete: {1546project_id: this.project_id,1547path: this.path,1548},1549});15501551const v: Promise<any>[] = [];1552for (let i = 0; i < queries.length; i++) {1553v.push(callback2(this.client.query, { query: queries[i] }));1554}1555await Promise.all(v);1556};15571558private pathExistsAndIsReadOnly = async (path): Promise<boolean> => {1559try {1560await callback2(this.client.path_access, {1561path,1562mode: "w",1563});1564// clearly exists and is NOT read only:1565return false;1566} catch (err) {1567// either it doesn't exist or it is read only1568if (await callback2(this.client.path_exists, { path })) {1569// it exists, so is read only and exists1570return true;1571}1572// doesn't exist1573return false;1574}1575};15761577private file_is_read_only = async (): Promise<boolean> => {1578if (await this.pathExistsAndIsReadOnly(this.path)) {1579return true;1580}1581const path = this.getFileServerPath();1582if (path != this.path) {1583if (await this.pathExistsAndIsReadOnly(path)) {1584return true;1585}1586}1587return false;1588};15891590private update_if_file_is_read_only = async (): Promise<void> => {1591this.set_read_only(await this.file_is_read_only());1592};15931594private init_load_from_disk = async (): Promise<void> => {1595if (this.state == "closed") {1596// stop trying, no error -- this is assumed1597// in a retry_until_success elsewhere.1598return;1599}1600if (await this.load_from_disk_if_newer()) {1601throw Error("failed to load from disk");1602}1603};16041605private async load_from_disk_if_newer(): Promise<boolean> {1606const last_changed = this.last_changed();1607const firstLoad = this.versions().length == 0;1608const dbg = this.dbg("load_from_disk_if_newer");1609let is_read_only: boolean = false;1610let size: number = 0;1611let error: string = "";1612try {1613dbg("check if path exists");1614if (await callback2(this.client.path_exists, { path: this.path })) {1615// the path exists1616dbg("path exists -- stat file");1617const stats = await callback2(this.client.path_stat, {1618path: this.path,1619});1620if (firstLoad || stats.ctime > last_changed) {1621dbg(1622`disk file changed more recently than edits (or first load), so loading, ${stats.ctime} > ${last_changed}; firstLoad=${firstLoad}`,1623);1624size = await this.load_from_disk();1625if (firstLoad) {1626dbg("emitting first-load event");1627// this event is emited the first time the document is ever loaded from disk.1628this.emit("first-load");1629}1630dbg("loaded");1631} else {1632dbg("stick with database version");1633}1634dbg("checking if read only");1635is_read_only = await this.file_is_read_only();1636dbg("read_only", is_read_only);1637}1638} catch (err) {1639error = `${err.toString()} -- ${err.stack}`;1640}16411642await this.set_initialized(error, is_read_only, size);1643dbg("done");1644return !!error;1645}16461647private patch_table_query(cutoff?: Date) {1648const query = {1649string_id: this.string_id,1650time: cutoff ? { ">=": cutoff } : null,1651// compressed format patch as a JSON *string*1652patch: null,1653// integer id of user (maps to syncstring table)1654user_id: null,1655// (optional) a snapshot at this point in time1656snapshot: null,1657// (optional) when patch actually sent, which may1658// be later than when made1659sent: null,1660// (optional) timestamp of previous patch sent1661// from this session1662prev: null,1663};1664if (this.doctype.patch_format != null) {1665(query as any).format = this.doctype.patch_format;1666}1667return query;1668}16691670private async init_patch_list(): Promise<void> {1671this.assert_not_closed("init_patch_list - start");1672const dbg = this.dbg("init_patch_list");1673dbg();16741675// CRITICAL: note that handle_syncstring_update checks whether1676// init_patch_list is done by testing whether this.patch_list is defined!1677// That is why we first define "patch_list" below, then set this.patch_list1678// to it only after we're done.1679delete this.patch_list;16801681const patch_list = new SortedPatchList(this._from_str);16821683dbg("opening the table...");1684this.patches_table = await this.synctable(1685{ patches: [this.patch_table_query(this.last_snapshot)] },1686[],1687this.patch_interval,1688);1689this.assert_not_closed("init_patch_list -- after making synctable");16901691const update_has_unsaved_changes = debounce(1692this.update_has_unsaved_changes.bind(this),1693500,1694{ leading: true, trailing: true },1695);16961697this.patches_table.on("has-uncommitted-changes", (val) => {1698this.emit("has-uncommitted-changes", val);1699});17001701this.on("change", () => {1702update_has_unsaved_changes();1703});17041705this.syncstring_table.on("change", () => {1706update_has_unsaved_changes();1707});17081709dbg("adding patches");1710patch_list.add(this.get_patches());17111712const doc = patch_list.value();1713this.last = this.doc = doc;1714this.patches_table.on("change", this.handle_patch_update.bind(this));1715this.patches_table.on("saved", this.handle_offline.bind(this));1716this.patch_list = patch_list;17171718// this only potentially happens for tables in the project,1719// e.g., jupyter and compute servers:1720// see packages/project/sync/server.ts1721this.patches_table.on("message", (...args) => {1722dbg("received message", args);1723this.emit("message", ...args);1724});17251726dbg("done");17271728/*1729TODO/CRITICAL: We are temporarily disabling same-user1730collision detection, since this seems to be leading to1731serious issues involving a feedback loop, which may1732be way worse than the 1 in a million issue1733that this addresses. This only address the *same*1734account being used simultaneously on the same file1735by multiple people. which isn't something users should1736ever do (but they might do in big public demos?).17371738this.patch_list.on 'overwrite', (t) =>1739* ensure that any outstanding save is done1740this.patches_table.save () =>1741this.check_for_timestamp_collision(t)1742*/1743}17441745/*1746_check_for_timestamp_collision: (t) =>1747obj = this._my_patches[t]1748if not obj?1749return1750key = this._patches_table.key(obj)1751if obj.patch != this._patches_table.get(key)?.get('patch')1752*console.log("COLLISION! #{t}, #{obj.patch}, #{this._patches_table.get(key).get('patch')}")1753* We fix the collision by finding the nearest time after time that1754* is available, and reinserting our patch at that new time.1755this._my_patches[t] = 'killed'1756new_time = this.patch_list.next_available_time(new Date(t), this._user_id, this._users.length)1757this._save_patch(new_time, JSON.parse(obj.patch))1758*/17591760private async init_evaluator(): Promise<void> {1761const dbg = this.dbg("init_evaluator");1762const ext = filename_extension(this.path);1763if (ext !== "sagews") {1764dbg("done -- only use init_evaluator for sagews");1765return;1766}1767dbg("creating the evaluator and waiting for init");1768this.evaluator = new Evaluator(1769this,1770this.client,1771this.synctable.bind(this),1772);1773await this.evaluator.init();1774dbg("done");1775}17761777private async init_ipywidgets(): Promise<void> {1778const dbg = this.dbg("init_evaluator");1779const ext = filename_extension(this.path);1780if (ext != "sage-jupyter2") {1781dbg("done -- only use ipywidgets for jupyter");1782return;1783}1784dbg("creating the ipywidgets state table, and waiting for init");1785this.ipywidgets_state = new IpywidgetsState(1786this,1787this.client,1788this.synctable.bind(this),1789);1790await this.ipywidgets_state.init();1791dbg("done");1792}17931794private async init_cursors(): Promise<void> {1795const dbg = this.dbg("init_cursors");1796if (!this.cursors) {1797dbg("done -- do not care about cursors for this syncdoc.");1798return;1799}1800dbg("getting cursors ephemeral table");1801const query = {1802cursors: [1803{1804string_id: this.string_id,1805user_id: null,1806locs: null,1807time: null,1808},1809],1810};1811// We make cursors an ephemeral table, since there is no1812// need to persist it to the database, obviously!1813// Also, queue_size:1 makes it so only the last cursor position is1814// saved, e.g., in case of disconnect and reconnect.1815let options;1816if (this.data_server == "project") {1817options = [{ ephemeral: true }, { queue_size: 1 }];1818} else {1819options = [];1820}1821this.cursors_table = await this.synctable(query, options, 1000);1822this.assert_not_closed("init_cursors -- after making synctable");18231824// cursors now initialized; first initialize the1825// local this._cursor_map, which tracks positions1826// of cursors by account_id:1827dbg("loading initial state");1828const s = this.cursors_table.get();1829if (s == null) {1830throw Error("bug -- get should not return null once table initialized");1831}1832s.forEach((locs: any, k: string) => {1833if (locs == null) {1834return;1835}1836const u = JSON.parse(k);1837if (u != null) {1838this.cursor_map = this.cursor_map.set(this.users[u[1]], locs);1839}1840});1841this.cursors_table.on("change", this.handle_cursors_change.bind(this));18421843if (this.cursors_table.setOnDisconnect != null) {1844// setOnDisconnect is available, so clear our1845// cursor positions when we disconnect for any reason.1846this.cursors_table.setOnDisconnect(1847{1848string_id: this.string_id,1849user_id: this.my_user_id,1850locs: [],1851},1852"none",1853);1854}18551856dbg("done");1857}18581859private handle_cursors_change(keys): void {1860if (this.state === "closed") {1861return;1862}1863for (const k of keys) {1864const u = JSON.parse(k);1865if (u == null) {1866continue;1867}1868const account_id = this.users[u[1]];1869const locs = this.cursors_table.get(k);1870if (locs == null && !this.cursor_map.has(account_id)) {1871// gone, and already gone.1872continue;1873}1874if (locs != null) {1875// changed1876this.cursor_map = this.cursor_map.set(account_id, locs);1877} else {1878// deleted1879this.cursor_map = this.cursor_map.delete(account_id);1880}1881this.emit("cursor_activity", account_id);1882}1883}18841885/* Returns *immutable* Map from account_id to list1886of cursor positions, if cursors are enabled.18871888- excludeSelf: do not include our own cursor1889- maxAge: only include cursors that have been updated with maxAge ms from now.1890*/1891get_cursors = ({1892maxAge = 60 * 1000,1893// excludeSelf:1894// 'always' -- *always* exclude self1895// 'never' -- never exclude self1896// 'heuristic' -- exclude self is older than last set from here, e.g., useful on1897// frontend so we don't see our own cursor unless more than one browser.1898excludeSelf = "always",1899}: {1900maxAge?: number;1901excludeSelf?: "always" | "never" | "heuristic";1902} = {}): Map<string, any> => {1903this.assert_not_closed("get_cursors");1904if (!this.cursors) {1905throw Error("cursors are not enabled");1906}1907if (this.cursors_table == null) {1908return Map(); // not loaded yet -- so no info yet.1909}1910const account_id: string = this.client_id();1911let map = this.cursor_map;1912if (map.has(account_id) && excludeSelf != "never") {1913if (1914excludeSelf == "always" ||1915(excludeSelf == "heuristic" &&1916this.cursor_last_time >=1917(map.getIn([account_id, "time"], new Date(0)) as Date))1918) {1919map = map.delete(account_id);1920}1921}1922// Remove any old cursors, where "old" is by default more than maxAge old.1923const now = Date.now();1924for (const [client_id, value] of map as any) {1925const time = value.get("time");1926if (time == null) {1927// this should always be set.1928map = map.delete(client_id);1929continue;1930}1931if (maxAge) {1932// we use abs to implicitly exclude a bad value that is somehow in the future,1933// if that were to happen.1934if (Math.abs(now - time.valueOf()) >= maxAge) {1935map = map.delete(client_id);1936continue;1937}1938}1939if (time >= now + 10 * 1000) {1940// We *always* delete any cursors more than 10 seconds in the future, since1941// that can only happen if a client inserts invalid data (e.g., clock not1942// yet synchronized). See https://github.com/sagemathinc/cocalc/issues/79691943map = map.delete(client_id);1944continue;1945}1946}1947return map;1948};19491950/* Set settings map. Used for custom configuration just for1951this one file, e.g., overloading the spell checker language.1952*/1953set_settings = async (obj): Promise<void> => {1954this.assert_is_ready("set_settings");1955await this.set_syncstring_table({1956settings: obj,1957});1958};19591960client_id = () => {1961return this.client.client_id();1962};19631964// get settings object1965public get_settings = (): Map<string, any> => {1966this.assert_is_ready("get_settings");1967return this.syncstring_table_get_one().get("settings", Map());1968};19691970/*1971Commits and saves current live syncdoc to backend.19721973Function only returns when there is nothing needing1974saving.19751976Save any changes we have as a new patch.1977*/1978public save = reuseInFlight(async () => {1979const dbg = this.dbg("save");1980dbg();1981if (this.client.is_deleted(this.path, this.project_id)) {1982dbg("not saving because deleted");1983return;1984}1985// We just keep trying while syncdoc is ready and there1986// are changes that have not been saved (due to this.doc1987// changing during the while loop!).1988if (this.doc == null || this.last == null) {1989dbg("bug -- not ready");1990// I'm making this non-fatal. It'll get called later when init is done.1991// I was seeing this when automating opening an autogenerated document with1992// the ChatGPT jupyter notebook generator.1993return;1994//throw Error("bug -- cannot save if doc and last are not initialized");1995}1996if (this.state == "closed") {1997// There's nothing to do regarding save if the table is1998// already closed. Note that we *do* have to save when1999// the table is init stage, since the project has to2000// record the newly opened version of the file to the2001// database! See2002// https://github.com/sagemathinc/cocalc/issues/49862003dbg(`state=${this.state} not ready so not saving`);2004return;2005}2006// Compute any patches.2007while (!this.doc.is_equal(this.last)) {2008dbg("something to save");2009this.emit("user-change");2010const doc = this.doc;2011// TODO: put in a delay if just saved too recently?2012// Or maybe won't matter since not using database?2013if (this.handle_patch_update_queue_running) {2014dbg("wait until the update queue is done");2015await once(this, "handle_patch_update_queue_done");2016// but wait until next loop (so as to check that needed2017// and state still ready).2018continue;2019}2020dbg("Compute new patch.");2021this.sync_remote_and_doc(false);2022// Emit event since this syncstring was2023// changed locally (or we wouldn't have had2024// to save at all).2025if (doc.is_equal(this.doc)) {2026dbg("no change during loop -- done!");2027break;2028}2029}2030if (this.state != "ready") {2031// above async waits could have resulted in state change.2032return;2033}2034// Ensure all patches are saved to backend.2035// We do this after the above, so that creating the newest patch2036// happens immediately on save, which makes it possible for clients2037// to save current state without having to wait on an async, which is2038// useful to ensure specific undo points (e.g., right before a paste).2039await this.patches_table.save();2040});20412042private next_patch_time(): Date {2043let time = this.client.server_time();2044assertDefined(this.patch_list);2045const min_time = this.patch_list.newest_patch_time();2046if (min_time != null && min_time >= time) {2047time = new Date(min_time.valueOf() + 1);2048}2049time = this.patch_list.next_available_time(2050time,2051this.my_user_id,2052this.users.length,2053);2054return time;2055}20562057private commit_patch(time: Date, patch: XPatch): void {2058this.assert_not_closed("commit_patch");2059const obj: any = {2060// version for database2061string_id: this.string_id,2062time,2063patch: JSON.stringify(patch),2064user_id: this.my_user_id,2065};20662067this.my_patches[time.valueOf()] = obj;20682069if (this.doctype.patch_format != null) {2070obj.format = this.doctype.patch_format;2071}2072if (this.save_patch_prev != null) {2073// timestamp of last saved patch during this session2074obj.prev = this.save_patch_prev;2075}2076this.save_patch_prev = time;20772078// If in undo mode put the just-created patch in our2079// without timestamp list, so it won't be included2080// when doing undo/redo.2081if (this.undo_state != null) {2082this.undo_state.without.unshift(time);2083}20842085//console.log 'saving patch with time ', time.valueOf()2086const x = this.patches_table.set(obj, "none");2087const y = this.process_patch(x, undefined, undefined, patch);2088if (y != null) {2089assertDefined(this.patch_list);2090this.patch_list.add([y]);2091}2092}20932094/* Create and store in the database a snapshot of the state2095of the string at the given point in time. This should2096be the time of an existing patch.2097*/2098private async snapshot(time: Date, force: boolean = false): Promise<void> {2099assertDefined(this.patch_list);2100const x = this.patch_list.patch(time);2101if (x == null) {2102throw Error(`no patch at time ${time}`);2103}2104if (x.snapshot != null && !force) {2105// there is already a snapshot at this point in time,2106// so nothing further to do.2107return;2108}21092110const snapshot: string = this.patch_list.value(time, force).to_str();2111// save the snapshot itself in the patches table.2112const obj: any = {2113string_id: this.string_id,2114time,2115patch: JSON.stringify(x.patch),2116snapshot,2117user_id: x.user_id,2118};2119if (force) {2120/* CRITICAL: We are sending the patch/snapshot later, but2121it was valid. It's important to make this clear or2122this.handle_offline will recompute this snapshot and2123try to update sent on it again, which leads to serious2124problems!2125*/2126obj.sent = time;2127}2128// also set snapshot in the this.patch_list, which2129// helps with optimization2130x.snapshot = obj.snapshot;2131this.patches_table.set(obj, "none");2132await this.patches_table.save();2133if (this.state != "ready") return;21342135/* CRITICAL: Only save the snapshot time in the database2136after the set in the patches table was definitely saved2137-- otherwise if the user refreshes their2138browser (or visits later) they lose all their2139early work due to trying to apply patches2140to a blank snapshot. That would be VERY bad.2141*/2142if (!this.ephemeral) {2143/*2144PARANOID: We are extra paranoid and ensure the2145snapshot is definitely stored in the database2146before we change the syncstrings table's last_snapshot time.2147Indeed, we do a query to the database itself2148to ensure that the snapshot was really saved2149before changing last_snapshot, since the above2150patches_table.save only ensures that the snapshot2151was (presumably) saved *from the browser to the project*.2152We do give this several chances, since it might2153take a little while for the project to save it.2154*/2155let success: boolean = false;2156for (let i = 0; i < 6; i++) {2157const x = await callback2(this.client.query, {2158project_id: this.project_id,2159query: {2160patches: {2161string_id: this.string_id,2162time,2163snapshot: null,2164},2165},2166});2167if (this.state != "ready") return;2168if (x.query.patches == null || x.query.patches.snapshot != snapshot) {2169await delay((i + 1) * 3000);2170} else {2171success = true;2172break;2173}2174}2175if (!success) {2176// We make this non-fatal to not crash the entire project2177// throw Error(2178// "unable to confirm that snapshot was saved to the database"2179// );21802181// We make this non-fatal, because throwing an exception here WOULD2182// DEFINITELY break other things. Everything is saved to a file system2183// after all, so there's no major data loss potential at present.2184console.warn(2185"ERROR: (nonfatal) unable to confirm that snapshot was saved to the database",2186);2187const dbg = this.dbg("snapshot");2188dbg(2189"ERROR: (nonfatal) unable to confirm that snapshot was saved to the database",2190);2191return;2192}2193}21942195if (this.state != "ready") return;2196await this.set_syncstring_table({2197last_snapshot: time,2198});2199this.last_snapshot = time;2200}22012202// Have a snapshot every this.snapshot_interval patches, except2203// for the very last interval.2204private async snapshot_if_necessary(): Promise<void> {2205if (this.get_state() !== "ready") return;2206const dbg = this.dbg("snapshot_if_necessary");2207const max_size = Math.floor(1.2 * MAX_FILE_SIZE_MB * 1000000);2208const interval = this.snapshot_interval;2209dbg("check if we need to make a snapshot:", { interval, max_size });2210assertDefined(this.patch_list);2211const time = this.patch_list.time_of_unmade_periodic_snapshot(2212interval,2213max_size,2214);2215if (time != null) {2216dbg("yes, make a snapshot at time", time);2217await this.snapshot(time);2218} else {2219dbg("no need to make a snapshot yet");2220}2221}22222223/*- x - patch object2224- time0, time1: optional range of times2225return undefined if patch not in this range2226- patch: if given will be used as an actual patch2227instead of x.patch, which is a JSON string.2228*/2229private process_patch(2230x: Map<string, any>,2231time0?: Date,2232time1?: Date,2233patch?: any,2234): Patch | undefined {2235let t = x.get("time");2236if (!is_date(t)) {2237// who knows what is in the database...2238try {2239t = ISO_to_Date(t);2240if (isNaN(t)) {2241// ignore patches with bad times2242return;2243}2244} catch (err) {2245// ignore patches with invalid times2246return;2247}2248}2249const time: Date = t;2250if ((time0 != null && time < time0) || (time1 != null && time > time1)) {2251// out of range2252return;2253}22542255const user_id: number = x.get("user_id");2256const sent: Date = x.get("sent");2257const prev: Date | undefined = x.get("prev");2258let size: number;2259if (patch == null) {2260/* Do **NOT** use misc.from_json, since we definitely2261do not want to unpack ISO timestamps as Date,2262since patch just contains the raw patches from2263user editing. This was done for a while, which2264led to horrific bugs in some edge cases...2265See https://github.com/sagemathinc/cocalc/issues/17712266*/2267if (x.has("patch")) {2268const p: string = x.get("patch");2269patch = JSON.parse(p);2270size = p.length;2271} else {2272patch = [];2273size = 2;2274}2275} else {2276const p = x.get("patch");2277// Looking at other code, I think this JSON.stringify (which2278// would be a waste of time) never gets called in practice.2279size = p != null ? p.length : JSON.stringify(patch).length;2280}22812282const obj: any = {2283time,2284user_id,2285patch,2286size,2287};2288const snapshot: string = x.get("snapshot");2289if (sent != null) {2290obj.sent = sent;2291}2292if (prev != null) {2293obj.prev = prev;2294}2295if (snapshot != null) {2296obj.snapshot = snapshot;2297}2298return obj;2299}23002301/* Return all patches with time such that2302time0 <= time <= time1;2303If time0 undefined then sets time0 equal to time of last_snapshot.2304If time1 undefined treated as +oo.2305*/2306private get_patches(time0?: Date, time1?: Date): Patch[] {2307this.assert_table_is_ready("patches");23082309if (time0 == null) {2310time0 = this.last_snapshot;2311}2312// m below is an immutable map with keys the string that2313// is the JSON version of the primary key2314// [string_id, timestamp, user_number].2315const m: Map<string, any> | undefined = this.patches_table.get();2316if (m == null) {2317// won't happen because of assert above.2318throw Error("patches_table must be initialized");2319}2320const v: Patch[] = [];2321m.forEach((x, _) => {2322const p = this.process_patch(x, time0, time1);2323if (p != null) {2324return v.push(p);2325}2326});2327v.sort(patch_cmp);2328return v;2329}23302331public has_full_history = (): boolean => {2332return !this.last_snapshot || this.load_full_history_done;2333};23342335public load_full_history = async (): Promise<void> => {2336if (this.has_full_history() || this.ephemeral) {2337return;2338}2339const query = this.patch_table_query();2340const result = await callback2(this.client.query, {2341project_id: this.project_id,2342query: { patches: [query] },2343});2344const v: Patch[] = [];2345// process_patch assumes immutable objects2346fromJS(result.query.patches).forEach((x) => {2347const p = this.process_patch(x, new Date(0), this.last_snapshot);2348if (p != null) {2349v.push(p);2350}2351});2352assertDefined(this.patch_list);2353this.patch_list.add(v);2354this.load_full_history_done = true;2355return;2356};23572358public show_history = (opts = {}): void => {2359assertDefined(this.patch_list);2360this.patch_list.show_history(opts);2361};23622363public set_snapshot_interval = async (n: number): Promise<void> => {2364await this.set_syncstring_table({2365snapshot_interval: n,2366});2367await this.syncstring_table.save();2368};23692370/* Check if any patches that just got confirmed as saved2371are relatively old; if so, we mark them as such and2372also possibly recompute snapshots.2373*/2374private async handle_offline(data): Promise<void> {2375this.assert_not_closed("handle_offline");2376const now: Date = this.client.server_time();2377let oldest: Date | undefined = undefined;2378for (const obj of data) {2379if (obj.sent) {2380// CRITICAL: ignore anything already processed! (otherwise, infinite loop)2381continue;2382}2383if (now.valueOf() - obj.time.valueOf() >= 1000 * OFFLINE_THRESH_S) {2384// patch is "old" -- mark it as likely being sent as a result of being2385// offline, so clients could potentially discard it.2386obj.sent = now;2387this.patches_table.set(obj);2388this.patches_table.save();2389if (oldest == null || obj.time < oldest) {2390oldest = obj.time;2391}2392}2393}2394if (oldest) {2395//dbg("oldest=#{oldest}, so check whether any snapshots need to be recomputed")2396assertDefined(this.patch_list);2397for (const snapshot_time of this.patch_list.snapshot_times()) {2398if (snapshot_time >= oldest) {2399//console.log("recomputing snapshot #{snapshot_time}")2400await this.snapshot(snapshot_time, true);2401}2402}2403}2404}24052406public get_last_save_to_disk_time = (): Date => {2407return this.last_save_to_disk_time;2408};24092410private handle_syncstring_save_state = async (2411state: string,2412time: Date,2413): Promise<void> => {2414// Called when the save state changes.24152416/* this.syncstring_save_state is used to make it possible to emit a2417'save-to-disk' event, whenever the state changes2418to indicate a save completed.24192420NOTE: it is intentional that this.syncstring_save_state is not defined2421the first time this function is called, so that save-to-disk2422with last save time gets emitted on initial load (which, e.g., triggers2423latex compilation properly in case of a .tex file).2424*/2425if (state === "done" && this.syncstring_save_state !== "done") {2426this.last_save_to_disk_time = time;2427this.emit("save-to-disk", time);2428}2429const dbg = this.dbg("handle_syncstring_save_state");2430dbg(2431`state=${state}; this.syncstring_save_state=${this.syncstring_save_state}; this.state=${state}`,2432);2433if (2434this.state === "ready" &&2435(await this.isFileServer()) &&2436this.syncstring_save_state !== "requested" &&2437state === "requested"2438) {2439this.syncstring_save_state = state; // only used in the if above2440dbg("requesting save to disk -- calling save_to_disk");2441// state just changed to requesting a save to disk...2442// so we do it (unless of course syncstring is still2443// being initialized).2444try {2445// Uncomment the following to test simulating a2446// random failure in save_to_disk:2447// if (Math.random() < 0.5) throw Error("CHAOS MONKEY!"); // FOR TESTING ONLY.2448await this.save_to_disk();2449} catch (err) {2450// CRITICAL: we must unset this.syncstring_save_state (and set the save state);2451// otherwise, it stays as "requested" and this if statement would never get2452// run again, thus completely breaking saving this doc to disk.2453// It is normal behavior that *sometimes* this.save_to_disk might2454// throw an exception, e.g., if the file is temporarily deleted2455// or save it called before everything is initialized, or file2456// is temporarily set readonly, or maybe there is a file system error.2457// Of course, the finally below will also take care of this. However,2458// it's nice to record the error here.2459this.syncstring_save_state = "done";2460await this.set_save({ state: "done", error: `${err}` });2461dbg(`ERROR saving to disk in handle_syncstring_save_state-- ${err}`);2462} finally {2463// No matter what, after the above code is run,2464// the save state in the table better be "done".2465// We triple check that here, though of course2466// we believe the logic in save_to_disk and above2467// should always accomplish this.2468dbg("had to set the state to done in finally block");2469if (2470this.state === "ready" &&2471(this.syncstring_save_state != "done" ||2472this.syncstring_table_get_one().getIn(["save", "state"]) != "done")2473) {2474this.syncstring_save_state = "done";2475await this.set_save({ state: "done", error: "" });2476}2477}2478}2479};24802481private async handle_syncstring_update(): Promise<void> {2482if (this.state === "closed") {2483return;2484}2485const dbg = this.dbg("handle_syncstring_update");2486dbg();24872488const data = this.syncstring_table_get_one();2489const x: any = data != null ? data.toJS() : undefined;24902491if (x != null && x.save != null) {2492this.handle_syncstring_save_state(x.save.state, x.save.time);2493}24942495dbg(JSON.stringify(x));2496if (x == null || x.users == null) {2497dbg("new_document");2498await this.handle_syncstring_update_new_document();2499} else {2500dbg("update_existing");2501await this.handle_syncstring_update_existing_document(x, data);2502}2503}25042505private async handle_syncstring_update_new_document(): Promise<void> {2506// Brand new document2507this.emit("load-time-estimate", { type: "new", time: 1 });2508this.last_snapshot = undefined;2509this.snapshot_interval =2510schema.SCHEMA.syncstrings.user_query?.get?.fields.snapshot_interval;25112512// Brand new syncstring2513// TODO: worry about race condition with everybody making themselves2514// have user_id 0... ?2515this.my_user_id = 0;2516this.users = [this.client.client_id()];2517const obj = {2518string_id: this.string_id,2519project_id: this.project_id,2520path: this.path,2521last_snapshot: this.last_snapshot,2522users: this.users,2523doctype: JSON.stringify(this.doctype),2524last_active: this.client.server_time(),2525};2526this.syncstring_table.set(obj);2527await this.syncstring_table.save();2528this.settings = Map();2529this.emit("metadata-change");2530this.emit("settings-change", this.settings);2531}25322533private async handle_syncstring_update_existing_document(2534x: any,2535data: Map<string, any>,2536): Promise<void> {2537// Existing document.25382539if (this.path == null) {2540// We just opened the file -- emit a load time estimate.2541if (x.archived) {2542this.emit("load-time-estimate", { type: "archived", time: 3 });2543} else {2544this.emit("load-time-estimate", { type: "ready", time: 1 });2545}2546}2547// TODO: handle doctype change here (?)2548this.last_snapshot = x.last_snapshot;2549this.snapshot_interval = x.snapshot_interval;2550this.users = x.users;2551// @ts-ignore2552this.project_id = x.project_id;2553// @ts-ignore2554this.path = x.path;25552556const settings = data.get("settings", Map());2557if (settings !== this.settings) {2558this.settings = settings;2559this.emit("settings-change", settings);2560}25612562// Ensure that this client is in the list of clients2563const client_id: string = this.client_id();2564this.my_user_id = this.users.indexOf(client_id);2565if (this.my_user_id === -1) {2566this.my_user_id = this.users.length;2567this.users.push(client_id);2568await this.set_syncstring_table({2569users: this.users,2570});2571}25722573this.emit("metadata-change");2574}25752576private async init_watch(): Promise<void> {2577if (!(await this.isFileServer())) {2578// ensures we are NOT watching anything2579await this.update_watch_path();2580return;2581}25822583// If path isn't being properly watched, make it so.2584if (this.watch_path !== this.path) {2585await this.update_watch_path(this.path);2586}25872588await this.pending_save_to_disk();2589}25902591private async pending_save_to_disk(): Promise<void> {2592this.assert_table_is_ready("syncstring");2593if (!(await this.isFileServer())) {2594return;2595}25962597const x = this.syncstring_table.get_one();2598// Check if there is a pending save-to-disk that is needed.2599if (x != null && x.getIn(["save", "state"]) === "requested") {2600try {2601await this.save_to_disk();2602} catch (err) {2603const dbg = this.dbg("pending_save_to_disk");2604dbg(`ERROR saving to disk in pending_save_to_disk -- ${err}`);2605}2606}2607}26082609private async update_watch_path(path?: string): Promise<void> {2610const dbg = this.dbg("update_watch_path");2611if (this.file_watcher != null) {2612// clean up2613dbg("close");2614this.file_watcher.close();2615delete this.file_watcher;2616delete this.watch_path;2617}2618if (path != null && this.client.is_deleted(path, this.project_id)) {2619dbg(`not setting up watching since "${path}" is explicitly deleted`);2620return;2621}2622if (path == null) {2623dbg("not opening another watcher since path is null");2624this.watch_path = path;2625return;2626}2627if (this.watch_path != null) {2628// this case is impossible since we deleted it above if it is was defined.2629dbg("watch_path already defined");2630return;2631}2632dbg("opening watcher...");2633if (this.state === "closed") {2634throw Error("must not be closed");2635}2636this.watch_path = path;2637try {2638if (!(await callback2(this.client.path_exists, { path }))) {2639if (this.client.is_deleted(path, this.project_id)) {2640dbg(`not setting up watching since "${path}" is explicitly deleted`);2641return;2642}2643// path does not exist2644dbg(2645`write '${path}' to disk from syncstring in-memory database version`,2646);2647const data = this.to_str();2648await callback2(this.client.write_file, { path, data });2649dbg(`wrote '${path}' to disk`);2650}2651} catch (err) {2652// This can happen, e.g, if path is read only.2653dbg(`could NOT write '${path}' to disk -- ${err}`);2654await this.update_if_file_is_read_only();2655// In this case, can't really setup a file watcher.2656return;2657}26582659dbg("now requesting to watch file");2660this.file_watcher = this.client.watch_file({ path });2661this.file_watcher.on("change", this.handle_file_watcher_change);2662this.file_watcher.on("delete", this.handle_file_watcher_delete);2663this.setupReadOnlyTimer();2664}26652666private setupReadOnlyTimer = () => {2667if (this.read_only_timer) {2668clearInterval(this.read_only_timer as any);2669this.read_only_timer = 0;2670}2671this.read_only_timer = <any>(2672setInterval(this.update_if_file_is_read_only, READ_ONLY_CHECK_INTERVAL_MS)2673);2674};26752676private handle_file_watcher_change = async (ctime: Date): Promise<void> => {2677const dbg = this.dbg("handle_file_watcher_change");2678const time: number = ctime.valueOf();2679dbg(2680`file_watcher: change, ctime=${time}, this.save_to_disk_start_ctime=${this.save_to_disk_start_ctime}, this.save_to_disk_end_ctime=${this.save_to_disk_end_ctime}`,2681);2682if (2683this.save_to_disk_start_ctime == null ||2684(this.save_to_disk_end_ctime != null &&2685time - this.save_to_disk_end_ctime >= RECENT_SAVE_TO_DISK_MS)2686) {2687// Either we never saved to disk, or the last attempt2688// to save was at least RECENT_SAVE_TO_DISK_MS ago, and it finished,2689// so definitely this change event was not caused by it.2690dbg("load_from_disk since no recent save to disk");2691await this.load_from_disk();2692return;2693}2694};26952696private handle_file_watcher_delete = async (): Promise<void> => {2697this.assert_is_ready("handle_file_watcher_delete");2698const dbg = this.dbg("handle_file_watcher_delete");2699dbg("delete: set_deleted and closing");2700await this.client.set_deleted(this.path, this.project_id);2701this.close();2702};27032704private load_from_disk = async (): Promise<number> => {2705const path = this.path;2706const dbg = this.dbg("load_from_disk");2707dbg();2708const exists: boolean = await callback2(this.client.path_exists, { path });2709let size: number;2710if (!exists) {2711dbg("file no longer exists -- setting to blank");2712size = 0;2713this.from_str("");2714} else {2715dbg("file exists");2716await this.update_if_file_is_read_only();27172718const data = await callback2<string>(this.client.path_read, {2719path,2720maxsize_MB: MAX_FILE_SIZE_MB,2721});27222723size = data.length;2724dbg(`got it -- length=${size}`);2725this.from_str(data);2726// we also know that this is the version on disk, so we update the hash2727this.commit();2728await this.set_save({2729state: "done",2730error: "",2731hash: hash_string(data),2732});2733}2734// save new version to database, which we just set via from_str.2735await this.save();2736return size;2737};27382739private set_save = async (save: {2740state: string;2741error: string;2742hash?: number;2743expected_hash?: number;2744time?: number;2745}): Promise<void> => {2746this.assert_table_is_ready("syncstring");2747// set timestamp of when the save happened; this can be useful2748// for coordinating running code, etc.... and is just generally useful.2749if (!save.time) {2750save.time = Date.now();2751}2752await this.set_syncstring_table({ save });2753};27542755private set_read_only = async (read_only: boolean): Promise<void> => {2756this.assert_table_is_ready("syncstring");2757await this.set_syncstring_table({ read_only });2758};27592760public is_read_only = (): boolean => {2761this.assert_table_is_ready("syncstring");2762return this.syncstring_table_get_one().get("read_only");2763};27642765public wait_until_read_only_known = async (): Promise<void> => {2766await this.wait_until_ready();2767function read_only_defined(t: SyncTable): boolean {2768const x = t.get_one();2769if (x == null) {2770return false;2771}2772return x.get("read_only") != null;2773}2774await this.syncstring_table.wait(read_only_defined, 5 * 60);2775};27762777/* Returns true if the current live version of this document has2778a different hash than the version mostly recently saved to disk.2779I.e., if there are changes that have not yet been **saved to2780disk**. See the other function has_uncommitted_changes below2781for determining whether there are changes that haven't been2782commited to the database yet. Returns *undefined* if2783initialization not even done yet. */2784public has_unsaved_changes = (): boolean | undefined => {2785if (this.state !== "ready") {2786return;2787}2788const dbg = this.dbg("has_unsaved_changes");2789try {2790return this.hash_of_saved_version() !== this.hash_of_live_version();2791} catch (err) {2792dbg(2793"exception computing hash_of_saved_version and hash_of_live_version",2794err,2795);2796// This could happen, e.g. when syncstring_table isn't connected2797// in some edge case. Better to just say we don't know then crash2798// everything. See https://github.com/sagemathinc/cocalc/issues/35772799return;2800}2801};28022803// Returns hash of last version saved to disk (as far as we know).2804public hash_of_saved_version = (): number | undefined => {2805if (this.state !== "ready") {2806return;2807}2808return this.syncstring_table_get_one().getIn(["save", "hash"]) as2809| number2810| undefined;2811};28122813/* Return hash of the live version of the document,2814or undefined if the document isn't loaded yet.2815(TODO: write faster version of this for syncdb, which2816avoids converting to a string, which is a waste of time.) */2817public hash_of_live_version = (): number | undefined => {2818if (this.state !== "ready") {2819return;2820}2821return hash_string(this.doc.to_str());2822};28232824/* Return true if there are changes to this syncstring that2825have not been committed to the database (with the commit2826acknowledged). This does not mean the file has been2827written to disk; however, it does mean that it safe for2828the user to close their browser.2829*/2830public has_uncommitted_changes = (): boolean => {2831if (this.state !== "ready") {2832return false;2833}2834return this.patches_table.has_uncommitted_changes();2835};28362837// Commit any changes to the live document to2838// history as a new patch. Returns true if there2839// were changes and false otherwise. This works2840// fine offline, and does not wait until anything2841// is saved to the network, etc.2842public commit = (emitChangeImmediately = false): boolean => {2843if (this.last == null || this.doc == null || this.last.is_equal(this.doc)) {2844return false;2845}2846// console.trace('commit');28472848if (emitChangeImmediately) {2849// used for local clients. NOTE: don't do this without explicit2850// request, since it could in some cases cause serious trouble.2851// E.g., for the jupyter backend doing this by default causes2852// an infinite recurse. Having this as an option is important, e.g.,2853// to avoid flicker/delay in the UI.2854this.emit_change();2855}28562857// Now save to backend as a new patch:2858this.emit("user-change");2859const patch = this.last.make_patch(this.doc); // must be nontrivial2860this.last = this.doc;2861// ... and save that to patches table2862const time = this.next_patch_time();2863this.commit_patch(time, patch);2864this.save(); // so eventually also gets sent out.2865return true;2866};28672868/* Initiates a save of file to disk, then waits for the2869state to change. */2870public save_to_disk = async (): Promise<void> => {2871if (this.state != "ready") {2872// We just make save_to_disk a successful2873// no operation, if the document is either2874// closed or hasn't finished opening, since2875// there's a lot of code that tries to save2876// on exit/close or automatically, and it2877// is difficult to ensure it all checks state2878// properly.2879return;2880}2881const dbg = this.dbg("save_to_disk");2882if (this.client.is_deleted(this.path, this.project_id)) {2883dbg("not saving to disk because deleted");2884await this.set_save({ state: "done", error: "" });2885return;2886}28872888// Make sure to include changes to the live document.2889// A side effect of save if we didn't do this is potentially2890// discarding them, which is obviously not good.2891this.commit();28922893dbg("initiating the save");2894if (!this.has_unsaved_changes()) {2895dbg("no unsaved changes, so don't save");2896// CRITICAL: this optimization is assumed by2897// autosave, etc.2898await this.set_save({ state: "done", error: "" });2899return;2900}29012902if (this.is_read_only()) {2903dbg("read only, so can't save to disk");2904// save should fail if file is read only and there are changes2905throw Error("can't save readonly file with changes to disk");2906}29072908// First make sure any changes are saved to the database.2909// One subtle case where this matters is that loading a file2910// with \r's into codemirror changes them to \n...2911if (!(await this.isFileServer())) {2912dbg("browser client -- sending any changes over network");2913await this.save();2914dbg("save done; now do actual save to the *disk*.");2915this.assert_is_ready("save_to_disk - after save");2916}29172918try {2919await this.save_to_disk_aux();2920} catch (err) {2921const error = `save to disk failed -- ${err}`;2922dbg(error);2923if (await this.isFileServer()) {2924this.set_save({ error, state: "done" });2925}2926}29272928if (!(await this.isFileServer())) {2929dbg("now wait for the save to disk to finish");2930this.assert_is_ready("save_to_disk - waiting to finish");2931await this.wait_for_save_to_disk_done();2932}2933this.update_has_unsaved_changes();2934};29352936/* Export the (currently loaded) history of editing of this2937document to a simple JSON-able object. */2938public export_history = (2939options: HistoryExportOptions = {},2940): HistoryEntry[] => {2941this.assert_is_ready("export_history");2942const info = this.syncstring_table.get_one();2943if (info == null || !info.has("users")) {2944throw Error("syncstring table must be defined and users initialized");2945}2946const account_ids: string[] = info.get("users").toJS();2947assertDefined(this.patch_list);2948return export_history(account_ids, this.patch_list, options);2949};29502951private update_has_unsaved_changes(): void {2952if (this.state != "ready") {2953// This can happen, since this is called by a debounced function.2954// Make it a no-op in case we're not ready.2955// See https://github.com/sagemathinc/cocalc/issues/35772956return;2957}2958const cur = this.has_unsaved_changes();2959if (cur !== this.last_has_unsaved_changes) {2960this.emit("has-unsaved-changes", cur);2961this.last_has_unsaved_changes = cur;2962}2963}29642965// wait for save.state to change state.2966private async wait_for_save_to_disk_done(): Promise<void> {2967const dbg = this.dbg("wait_for_save_to_disk_done");2968dbg();2969function until(table): boolean {2970const done = table.get_one().getIn(["save", "state"]) === "done";2971dbg("checking... done=", done);2972return done;2973}29742975let last_err: string | undefined = undefined;2976const f = async () => {2977dbg("f");2978if (2979this.state != "ready" ||2980this.client.is_deleted(this.path, this.project_id)2981) {2982dbg("not ready or deleted - no longer trying to save.");2983return;2984}2985try {2986dbg("waiting until done...");2987await this.syncstring_table.wait(until, 15);2988} catch (err) {2989dbg("timed out after 15s");2990throw Error("timed out");2991}2992if (2993this.state != "ready" ||2994this.client.is_deleted(this.path, this.project_id)2995) {2996dbg("not ready or deleted - no longer trying to save.");2997return;2998}2999const err = this.syncstring_table_get_one().getIn(["save", "error"]) as3000| string3001| undefined;3002if (err) {3003dbg("error", err);3004last_err = err;3005throw Error(err);3006}3007dbg("done, with no error.");3008last_err = undefined;3009return;3010};3011await retry_until_success({3012f,3013max_tries: 8,3014desc: "wait_for_save_to_disk_done",3015});3016if (3017this.state != "ready" ||3018this.client.is_deleted(this.path, this.project_id)3019) {3020return;3021}3022if (last_err && typeof this.client.log_error === "function") {3023this.client.log_error({3024string_id: this.string_id,3025path: this.path,3026project_id: this.project_id,3027error: `Error saving file -- ${last_err}`,3028});3029}3030}30313032/* Auxiliary function 2 for saving to disk:3033If this is associated with3034a project and has a filename.3035A user (web browsers) sets the save state to requested.3036The project sets the state to saving, does the save3037to disk, then sets the state to done.3038*/3039private async save_to_disk_aux(): Promise<void> {3040this.assert_is_ready("save_to_disk_aux");30413042if (!(await this.isFileServer())) {3043return await this.save_to_disk_non_filesystem_owner();3044}30453046try {3047return await this.save_to_disk_filesystem_owner();3048} catch (err) {3049this.emit("save_to_disk_filesystem_owner", err);3050throw err;3051}3052}30533054private async save_to_disk_non_filesystem_owner(): Promise<void> {3055this.assert_is_ready("save_to_disk_non_filesystem_owner");30563057if (!this.has_unsaved_changes()) {3058/* Browser client has no unsaved changes,3059so don't need to save --3060CRITICAL: this optimization is assumed by autosave.3061*/3062return;3063}3064const x = this.syncstring_table.get_one();3065if (x != null && x.getIn(["save", "state"]) === "requested") {3066// Nothing to do -- save already requested, which is3067// all the browser client has to do.3068return;3069}30703071// string version of this doc3072const data: string = this.to_str();3073const expected_hash = hash_string(data);3074await this.set_save({ state: "requested", error: "", expected_hash });3075}30763077private async save_to_disk_filesystem_owner(): Promise<void> {3078this.assert_is_ready("save_to_disk_filesystem_owner");3079const dbg = this.dbg("save_to_disk_filesystem_owner");30803081// check if on-disk version is same as in memory, in3082// which case no save is needed.3083const data = this.to_str(); // string version of this doc3084const hash = hash_string(data);3085dbg("hash = ", hash);30863087/*3088// TODO: put this consistency check back in (?).3089const expected_hash = this.syncstring_table3090.get_one()3091.getIn(["save", "expected_hash"]);3092*/30933094if (hash === this.hash_of_saved_version()) {3095// No actual save to disk needed; still we better3096// record this fact in table in case it3097// isn't already recorded3098this.set_save({ state: "done", error: "", hash });3099return;3100}31013102const path = this.path;3103if (!path) {3104const err = "cannot save without path";3105this.set_save({ state: "done", error: err });3106throw Error(err);3107}31083109dbg("project - write to disk file", path);3110// set window to slightly earlier to account for clock3111// imprecision.3112// Over an sshfs mount, all stats info is **rounded down3113// to the nearest second**, which this also takes care of.3114this.save_to_disk_start_ctime = Date.now() - 1500;3115this.save_to_disk_end_ctime = undefined;3116try {3117await callback2(this.client.write_file, { path, data });3118this.assert_is_ready("save_to_disk_filesystem_owner -- after write_file");3119const stat = await callback2(this.client.path_stat, { path });3120this.assert_is_ready("save_to_disk_filesystem_owner -- after path_state");3121this.save_to_disk_end_ctime = stat.ctime.valueOf() + 1500;3122this.set_save({3123state: "done",3124error: "",3125hash: hash_string(data),3126});3127} catch (err) {3128this.set_save({ state: "done", error: JSON.stringify(err) });3129throw err;3130}3131}31323133/*3134When the underlying synctable that defines the state3135of the document changes due to new remote patches, this3136function is called.3137It handles update of the remote version, updating our3138live version as a result.3139*/3140private async handle_patch_update(changed_keys): Promise<void> {3141if (changed_keys == null || changed_keys.length === 0) {3142// this happens right now when we do a save.3143return;3144}31453146const dbg = this.dbg("handle_patch_update");3147//dbg(changed_keys);3148if (this.patch_update_queue == null) {3149this.patch_update_queue = [];3150}3151for (const key of changed_keys) {3152this.patch_update_queue.push(key);3153}31543155dbg("Clear patch update_queue in a later event loop...");3156await delay(1);3157await this.handle_patch_update_queue();3158dbg("done");3159}31603161/*3162Whenever new patches are added to this.patches_table,3163their timestamp gets added to this.patch_update_queue.3164*/3165private async handle_patch_update_queue(): Promise<void> {3166const dbg = this.dbg("handle_patch_update_queue");3167try {3168this.handle_patch_update_queue_running = true;3169while (this.state != "closed" && this.patch_update_queue.length > 0) {3170dbg("queue size = ", this.patch_update_queue.length);3171const v: Patch[] = [];3172for (const key of this.patch_update_queue) {3173const x = this.patches_table.get(key);3174if (x != null) {3175// may be null, e.g., when deleted.3176const t = x.get("time");3177// Only need to process patches that we didn't3178// create ourselves.3179if (t && !this.my_patches[`${t.valueOf()}`]) {3180const p = this.process_patch(x);3181//dbg(`patch=${JSON.stringify(p)}`);3182if (p != null) {3183v.push(p);3184}3185}3186}3187}3188this.patch_update_queue = [];3189assertDefined(this.patch_list);3190this.patch_list.add(v);31913192dbg("waiting for remote and doc to sync...");3193this.sync_remote_and_doc(v.length > 0);3194await this.patches_table.save();3195if (this.state === ("closed" as State)) return; // closed during await; nothing further to do3196dbg("remote and doc now synced");31973198if (this.patch_update_queue.length > 0) {3199// It is very important that next loop happen in a later3200// event loop to avoid the this.sync_remote_and_doc call3201// in this.handle_patch_update_queue above from causing3202// sync_remote_and_doc to get called from within itself,3203// due to synctable changes being emited on save.3204dbg("wait for next event loop");3205await delay(1);3206} else {3207dbg("Patch sent, now make a snapshot if we are due for one.");3208await this.snapshot_if_necessary();3209}3210}3211} finally {3212if (this.state == "closed") return; // got closed, so nothing further to do32133214// OK, done and nothing in the queue3215// Notify save() to try again -- it may have3216// paused waiting for this to clear.3217dbg("done");3218this.handle_patch_update_queue_running = false;3219this.emit("handle_patch_update_queue_done");3220}3221}32223223/* Disable and enable sync. When disabled we still3224collect patches from upstream (but do not apply them3225locally), and changes we make are broadcast into3226the patch stream. When we re-enable sync, all3227patches are put together in the stream and3228everything is synced as normal. This is useful, e.g.,3229to make it so a user **actively** editing a document is3230not interrupted by being forced to sync (in particular,3231by the 'before-change' event that they use to update3232the live document).32333234Also, delay_sync will delay syncing local with upstream3235for the given number of ms. Calling it regularly while3236user is actively editing to avoid them being bothered3237by upstream patches getting merged in.32383239IMPORTANT: I implemented this, but it is NOT used anywhere3240else in the codebase, so don't trust that it works.3241*/32423243public disable_sync = (): void => {3244this.sync_is_disabled = true;3245};32463247public enable_sync = (): void => {3248this.sync_is_disabled = false;3249this.sync_remote_and_doc(true);3250};32513252public delay_sync = (timeout_ms = 2000): void => {3253clearTimeout(this.delay_sync_timer);3254this.disable_sync();3255this.delay_sync_timer = setTimeout(() => {3256this.enable_sync();3257}, timeout_ms);3258};32593260/*3261Merge remote patches and live version to create new live version,3262which is equal to result of applying all patches.3263*/3264private sync_remote_and_doc(upstreamPatches: boolean): void {3265if (this.last == null || this.doc == null || this.sync_is_disabled) {3266return;3267}32683269// Critical to save what we have now so it doesn't get overwritten during3270// before-change or setting this.doc below. This caused3271// https://github.com/sagemathinc/cocalc/issues/58713272this.commit();32733274if (upstreamPatches && this.state == "ready") {3275// First save any unsaved changes from the live document, which this3276// sync-doc doesn't acutally know the state of. E.g., this is some3277// rapidly changing live editor with changes not yet saved here.3278this.emit("before-change");3279// As a result of the emit in the previous line, all kinds of3280// nontrivial listener code probably just ran, and it should3281// have updated this.doc. We commit this.doc, so that the3282// upstream patches get applied against the correct live this.doc.3283this.commit();3284}32853286// Compute the global current state of the document,3287// which is got by applying all patches in order.3288// It is VERY important to do this, even if the3289// document is not yet ready, since it is critical3290// to properly set the state of this.doc to the value3291// of the patch list (e.g., not doing this 100% breaks3292// opening a file for the first time on cocalc-docker).3293assertDefined(this.patch_list);3294const new_remote = this.patch_list.value();3295if (!this.doc.is_equal(new_remote)) {3296// There is a possibility that live document changed, so3297// set to new version.3298this.last = this.doc = new_remote;3299if (this.state == "ready") {3300this.emit("after-change");3301this.emit_change();3302}3303}3304}33053306// Immediately alert all watchers of all changes since3307// last time.3308private emit_change(): void {3309this.emit("change", this.doc?.changes(this.before_change));3310this.before_change = this.doc;3311}33123313// Alert to changes soon, but debounced in case there are a large3314// number of calls in a group. This is called by default.3315// The debounce param is 0, since the idea is that this just waits3316// until the next "render loop" to avoid huge performance issues3317// with a nested for loop of sets. Doing it this way, massively3318// simplifies client code.3319emit_change_debounced = debounce(this.emit_change.bind(this), 0);33203321private set_syncstring_table = async (obj, save = true) => {3322let value = this.syncstring_table_get_one();3323const value0 = value;3324for (const key in obj) {3325value = value.set(key, obj[key]);3326}3327if (value0.equals(value)) {3328return;3329}3330this.syncstring_table.set(value);3331if (save) {3332await this.syncstring_table.save();3333}3334};3335}333633373338