Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/sync/editor/db/doc.ts
Views: 687
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { CompressedPatch, Document } from "../generic/types";6import * as immutable from "immutable";7import { isEqual } from "lodash";8import { is_array, is_object, copy_without, len } from "@cocalc/util/misc";9import {10make_patch as string_make_patch,11apply_patch as string_apply_patch,12} from "../generic/util";13import {14map_merge_patch,15merge_set,16nonnull_cols,17to_key,18to_str,19} from "./util";2021type Record = immutable.Map<string, any> | undefined;22type Records = immutable.List<Record>;23type Index = immutable.Map<string, immutable.Set<number>>;24type Indexes = immutable.Map<string, Index>;2526type jsmap = { [field: string]: any };2728export type WhereCondition = { [field: string]: any };29export type SetCondition =30| immutable.Map<string, any>31| { [field: string]: any };3233interface ChangeTracker {34changes: immutable.Set<immutable.Map<string, any>>; // primary keys that changed35from_db: Document; // DBDocument object where change tracking started.36}3738// Immutable DB document39export class DBDocument implements Document {40private primary_keys: Set<string>;4142// Columns that should be treated as non-atomic strings.43// This means simultaneous changes by multiple clients are44// merged, instead of last-write-wins. Also, large changes45// are propagated at patches, rather than sending46// complete string.47private string_cols: Set<string>;4849// list of records -- each is assumed to be an immutable.Map.50private records: immutable.List<Record>;5152// set of numbers n such that records.get(n) is defined.53private everything: immutable.Set<number>;5455// TODO: describe56private indexes: Indexes;5758// Change tracking.59private change_tracker: ChangeTracker;6061public readonly size: number;6263private to_str_cache?: string;6465constructor(66primary_keys: Set<string>,67string_cols: Set<string>,68records: Records = immutable.List(),69everything?: immutable.Set<number>,70indexes?: Indexes,71change_tracker?: ChangeTracker,72) {73this.set = this.set.bind(this);74this.delete_array = this.delete_array.bind(this);75this.primary_key_cols = this.primary_key_cols.bind(this);76this.primary_key_part = this.primary_key_part.bind(this);7778this.primary_keys = new Set(primary_keys);79if (this.primary_keys.size === 0) {80throw Error("there must be at least one primary key");81}82this.string_cols = new Set(string_cols);83this.records = records;84this.everything = everything == null ? this.init_everything() : everything;85this.size = this.everything.size;86this.indexes = indexes == null ? this.init_indexes() : indexes;87this.change_tracker =88change_tracker == null ? this.init_change_tracker() : change_tracker;89}9091// sorted immutable Set of i such that this.records.get(i) is defined.92private init_everything(): immutable.Set<number> {93const v: number[] = [];94for (let n = 0; n < this.records.size; n++) {95if (this.records.get(n) != undefined) {96v.push(n);97}98}99return immutable.Set(v);100}101102private init_indexes(): Indexes {103// Build indexes104let indexes: Indexes = immutable.Map();105for (const field of this.primary_keys) {106const index: Index = immutable.Map();107indexes = indexes.set(field, index);108}109this.records.map((record: Record, n: number) => {110if (record == null) {111// null records are sentinels for deletions.112return;113}114indexes.map((index: Index, field: string) => {115const val = record.get(field);116if (val != null) {117const k: string = to_key(val);118let matches = index.get(k);119if (matches != null) {120matches = matches.add(n);121} else {122matches = immutable.Set([n]);123}124indexes = indexes.set(field, index.set(k, matches));125}126});127});128return indexes;129}130131private init_change_tracker(): ChangeTracker {132return { changes: immutable.Set(), from_db: this };133}134135public to_str(): string {136if (this.to_str_cache != null) {137// We can use a cache, since this is an immutable object138return this.to_str_cache;139}140const obj = this.get({}).toJS();141return (this.to_str_cache = to_str(obj));142}143144public is_equal(other?: DBDocument): boolean {145if (other == null) {146// Definitely not equal if not defined.147return false;148}149if (this.records === other.records) {150return true;151}152if (this.size !== other.size) {153return false;154}155// We include undefineds in the sets below156// since records is a List of Record or undefined, i.e.,157// we use undefined as a sentinel in order to158// make deletes be efficient.159return immutable160.Set(this.records)161.add(undefined)162.equals(immutable.Set(other.records).add(undefined));163}164165public apply_patch(patch: CompressedPatch): DBDocument {166let i = 0;167let db: DBDocument = this;168while (i < patch.length) {169if (patch[i] === -1) {170db = db.delete(patch[i + 1]);171} else if (patch[i] === 1) {172db = db.set(patch[i + 1]);173}174i += 2;175}176return db;177}178179public make_patch(other: DBDocument): CompressedPatch {180if (other.size === 0) {181// Special case -- delete everything182return [-1, [{}]];183}184185let t0 = immutable.Set(this.records);186let t1 = immutable.Set(other.records);187// Remove the common intersection -- nothing going on there.188// Doing this greatly reduces the complexity in the common189// case in which little has changed190const common = t0.intersect(t1).add(undefined);191t0 = t0.subtract(common);192t1 = t1.subtract(common);193194// Easy very common special cases195if (t0.size === 0) {196// Special case: t0 is empty -- insert all the records.197return [1, t1.toJS()];198}199if (t1.size === 0) {200// Special case: t1 is empty -- bunch of deletes201const v: jsmap[] = [];202t0.map((x) => {203if (x != null) {204v.push(this.primary_key_part(x.toJS()));205}206});207return [-1, v];208}209210// compute the key parts of t0 and t1 as sets211// means -- set got from t0 by taking only the primary_key columns212// (Why the "as"? Typescript makes the output of the map be of type213// Iterable<Record, Iterable<string, any>>214// But, it's just a set. So cast it.)215const k0 = t0.map(this.primary_key_cols) as immutable.Set<Record>;216const k1 = t1.map(this.primary_key_cols) as immutable.Set<Record>;217218const add: any[] = [];219let remove: any[] | undefined = undefined;220221// Deletes: everything in k0 that is not in k1222const deletes = k0.subtract(k1);223if (deletes.size > 0) {224remove = deletes.toJS();225}226227// Inserts: everything in k1 that is not in k0228const inserts = k1.subtract(k0);229if (inserts.size > 0) {230inserts.map((k) => {231if (k != null) {232const x = other.get_one(k);233if (x != null) {234add.push(x.toJS());235}236}237});238}239240// Everything in k1 that is also in k0 -- these241// must have all changed242const changed = k1.intersect(k0);243if (changed.size > 0) {244changed.map((k) => {245if (k == null) {246return;247}248const obj = k.toJS();249const obj0 = this.primary_key_part(obj);250const from0 = this.get_one(obj0);251const to0 = other.get_one(obj0);252if (from0 == null || to0 == null) {253// just to make typescript happy254return;255}256const from = from0.toJS();257const to = to0.toJS();258// undefined for each key of from not in to259for (const key in from) {260if (to[key] == null) {261obj[key] = null;262}263}264// Explicitly set each key of `to` that is different265// than corresponding key of `from`:266for (const key in to) {267const v = to[key];268if (!isEqual(from[key], v)) {269if (this.string_cols.has(key) && from[key] != null && v != null) {270if (typeof from[key] == "string" && typeof v == "string") {271// A valid string patch, converting one string to another.272obj[key] = string_make_patch(from[key], v);273} else {274/* This should be impossible, due to the type checking that275I've added to set in this same commit. However, it can't276hurt to put in the above check on types, just in case.277https://github.com/sagemathinc/cocalc/issues/3625278A string col actually contains something that is not279a string. (Maybe it's due to loading something old?)280In any case, it's better to "best effort" this, rather281than to make the entire document be un-savable and282un-usable to the user.283We just give up and record no change in this case, so284when doc is read in later (or by another user), there285will be no weird corruption.286This case will probably go away completely when all287client code is written with proper typing.288*/289}290} else if (is_object(from[key]) && is_object(v)) {291// Changing from one map to another, where they are not292// equal -- can use a merge to make this more efficient.293// This is an important optimization, to avoid making294// patches HUGE.295obj[key] = map_merge_patch(from[key], v);296} else {297obj[key] = v;298}299}300}301add.push(obj);302});303}304305const patch: any[] = [];306if (remove != null) {307patch.push(-1);308patch.push(remove);309}310if (add.length > 0) {311patch.push(1);312patch.push(add);313}314315return patch;316}317318// Given an immutable map f, return its restriction319// to the primary keys.320private primary_key_cols(321f: immutable.Map<string, any>,322): immutable.Map<string, any> {323return f.filter(324(_, k) => k != null && this.primary_keys.has(k),325) as immutable.Map<string, any>;326}327328private select(where: WhereCondition): immutable.Set<number> {329if (immutable.Map.isMap(where)) {330// TODO: maybe do not allow?331where = where.toJS();332}333// Return immutable set with defined indexes the elts of @_records that334// satisfy the where condition.335const n: number = len(where);336let result: immutable.Set<number> | undefined = undefined;337for (const field in where) {338const value = where[field];339const index = this.indexes.get(field);340if (index == null) {341throw Error(`field '${field}' must be a primary key`);342}343const v: immutable.Set<number> | undefined = index.get(to_key(value));344// v may be undefined here345if (v == null) {346return immutable.Set(); // no matches for this field - done347}348if (n === 1) {349// no need to do further intersection350return v;351}352if (result != null) {353// intersect with what we've found so far via indexes.354result = result.intersect(v);355} else {356result = v;357}358}359if (result == null) {360// where condition must have been empty -- matches everything361return this.everything;362} else {363return result;364}365}366367// Used internally for determining the set/where parts of an object.368private parse(obj: Map<string, any>): { where: jsmap; set: jsmap } {369const where: jsmap = {};370const set: jsmap = {};371for (const field in obj) {372const val = obj[field];373if (this.primary_keys.has(field)) {374if (val != null) {375where[field] = val;376}377} else {378set[field] = val;379}380}381return { where, set };382}383384public set(obj: SetCondition | SetCondition[]): DBDocument {385// If you ever want to know exactly what set something, uncomment this:386// console.trace("set ", obj);387if (Array.isArray(obj)) {388let z: DBDocument = this;389for (const x of obj as SetCondition[]) {390z = z.set(x);391}392return z;393}394if (immutable.Map.isMap(obj)) {395// TODO: maybe do not allow?396// it is very clean/convenient to allow this397obj = (obj as immutable.Map<string, any>).toJS();398}399const { set, where } = this.parse(obj as Map<string, any>);400const matches = this.select(where);401let { changes } = this.change_tracker;402const first_match = matches != null ? matches.min() : undefined;403if (first_match != null) {404// edit the first existing record that matches405let record = this.records.get(first_match);406if (record == null) {407// make typescript happier.408throw Error("bug -- record can't be null");409}410const before = record;411for (const field in set) {412const value = set[field];413if (value === null) {414// null = how to delete fields415record = record.delete(field);416} else {417if (this.string_cols.has(field)) {418if (is_array(value)) {419// special case: a string patch420record = record.set(421field,422string_apply_patch(value, before.get(field, ""))[0],423);424continue;425}426if (typeof value != "string") {427// Putting this guard in to address428// https://github.com/sagemathinc/cocalc/issues/3625429// which was caused by some client code setting a string_col430// to something that is not a string. We'll next have431// to wait for this exception to be triggered someday...432throw Error(433`'${field}' must be a string, but tried to set to '${value}' of type ${typeof value}`,434);435}436// falls through to actually set it below.437}438let new_val;439const cur = record.get(field);440const change = immutable.fromJS(value);441if (immutable.Map.isMap(cur) && immutable.Map.isMap(change)) {442new_val = merge_set(cur, change);443} else {444new_val = change;445}446record = record.set(field, new_val);447}448}449450if (!before.equals(record)) {451// there was an actual change, so update; doesn't change452// anything involving indexes.453changes = changes.add(this.primary_key_cols(record));454return new DBDocument(455this.primary_keys,456this.string_cols,457this.records.set(first_match, record),458this.everything,459this.indexes,460{ changes, from_db: this.change_tracker.from_db },461);462} else {463return this;464}465} else {466// The sparse array matches had nothing in it, so467// append a new record.468for (const field of this.string_cols) {469if (obj[field] != null && is_array(obj[field])) {470// It's a patch -- but there is nothing to patch,471// so discard this field472obj = copy_without(obj, field);473}474}475// remove null columns (null indicates delete)476const record = nonnull_cols(immutable.fromJS(obj));477changes = changes.add(this.primary_key_cols(record));478const records = this.records.push(record);479const n = records.size - 1;480const everything = this.everything.add(n);481// update indexes482let indexes = this.indexes;483for (const field of this.primary_keys) {484const val = obj[field];485if (val != null) {486let index = indexes.get(field);487if (index == null) {488index = immutable.Map();489}490const k = to_key(val);491let matches = index.get(k);492if (matches != null) {493matches = matches.add(n);494} else {495matches = immutable.Set([n]);496}497indexes = indexes.set(field, index.set(k, matches));498}499}500return new DBDocument(501this.primary_keys,502this.string_cols,503records,504everything,505indexes,506{ changes, from_db: this.change_tracker.from_db },507);508}509}510511private delete_array(where: WhereCondition[]): DBDocument {512let z = this as DBDocument;513for (const x of where) {514z = z.delete(x);515}516return z;517}518519public delete(where: WhereCondition | WhereCondition[]): DBDocument {520// console.log("delete #{JSON.stringify(where)}")521if (is_array(where)) {522return this.delete_array(where as WhereCondition[]);523}524// if where undefined, will delete everything525if (this.everything.size === 0) {526// no-op -- no data so deleting is trivial527return this;528}529let { changes } = this.change_tracker;530const remove = this.select(where);531if (remove.size === this.everything.size) {532// actually deleting everything; easy special cases533changes = changes.union(534this.records535.filter((record) => record != null)536.map(this.primary_key_cols),537);538return new DBDocument(539this.primary_keys,540this.string_cols,541undefined,542undefined,543undefined,544{ changes, from_db: this.change_tracker.from_db },545);546}547548// remove matches from every index549let indexes = this.indexes;550for (const field of this.primary_keys) {551let index = indexes.get(field);552if (index == null) {553continue;554}555remove.forEach((n) => {556if (n == null) {557return;558}559const record = this.records.get(n);560if (record == null) {561return;562}563const val = record.get(field);564if (val == null) {565return;566}567const k = to_key(val);568const matches = index?.get(k)?.delete(n);569if (matches == null || index == null) return; // shouldn't happen570if (matches.size === 0) {571index = index.delete(k);572} else {573index = index.set(k, matches);574}575indexes = indexes.set(field, index);576});577}578579// delete corresponding records (actually set to undefined to580// preserve index references).581let records = this.records;582remove.forEach((n) => {583if (n == null) {584return;585}586const record = records.get(n);587if (record == null) {588return;589}590changes = changes.add(this.primary_key_cols(record));591records = records.set(n, undefined);592});593594const everything = this.everything.subtract(remove);595596return new DBDocument(597this.primary_keys,598this.string_cols,599records,600everything,601indexes,602{ changes, from_db: this.change_tracker.from_db },603);604}605606// Returns immutable list of all matches607public get(where: WhereCondition): Records {608const matches = this.select(where);609if (matches == null) {610return immutable.List();611}612// The "as" is because Typescript just makes the result of613// filter some more generic type (but it isn't).614return immutable.List(615this.records.filter((_, n) => n != null && matches.includes(n)),616);617}618619// Returns the first match, or undefined if there are no matches620get_one(where: WhereCondition): Record | undefined {621// TODO: couldn't select have a shortcut to return once one622// result is found.623const matches = this.select(where);624if (matches == null) {625return;626}627const min = matches.min();628if (min == null) return;629return this.records.get(min);630}631632// x = javascript object633private primary_key_part(x: jsmap): jsmap {634const where: jsmap = {};635for (const k in x) {636const v = x[k];637if (this.primary_keys.has(k)) {638where[k] = v;639}640}641return where;642}643644// Return immutable set of primary key parts of records that645// change in going from this to other.646public changed_keys(other: DBDocument): immutable.Set<Record> {647if (this.records === other.records) {648// identical -- obviously, nothing changed.649return immutable.Set();650}651// Get the defined records; there may be undefined ones due652// to lazy delete.653let t0: immutable.Set<Record> = immutable.Set(654immutable.Set(this.records).filter((x) => x != null),655);656let t1: immutable.Set<Record> = immutable.Set(657immutable.Set(other.records).filter((x) => x != null),658);659660// Remove the common intersection -- nothing going on there.661// Doing this greatly reduces the complexity in the common662// case in which little has changed663const common = t0.intersect(t1);664t0 = t0.subtract(common);665t1 = t1.subtract(common);666667// compute the key parts of t0 and t1 as sets668const k0 = immutable.Set(t0.map(this.primary_key_cols));669const k1 = immutable.Set(t1.map(this.primary_key_cols));670671return immutable.Set(k0.union(k1));672}673674public changes(prev?: DBDocument): immutable.Set<Record> {675// CRITICAL TODO! Make this efficient using this.change_tracker!!!676if (prev == null) {677return immutable.Set(678immutable679.Set(this.records)680.filter((x) => x != null)681.map(this.primary_key_cols),682);683}684return this.changed_keys(prev);685}686687public count(): number {688return this.size;689}690}691692/*693The underlying string representation has one JSON object694per line. The order doesn't matter.695696WARNING: The primary keys and string cols are NOT stored697in the string representation! That is metadata that must698somehow be tracked separately. (Maybe this should be changed).699700You can't store null since null is used to signify deleting701(in set queries). You can't store undefined or Date objects702due to JSON.703704Also, note that the primary_keys and string_cols are string[]705rather than Set of strings, which is annoyingly inconsistent706with DBDocument above.707*/708709export function from_str(710s: string,711primary_keys: string[],712string_cols: string[],713): DBDocument {714const obj: jsmap[] = [];715for (const line of s.split("\n")) {716if (line.length > 0) {717try {718const x = JSON.parse(line);719if (typeof x === "object") {720obj.push(x);721} else {722throw Error("each line must be an object");723}724} catch (e) {725console.warn(`CORRUPT db-doc string: ${e} -- skipping '${line}'`);726}727}728}729return new DBDocument(730new Set(primary_keys),731new Set(string_cols),732immutable.fromJS(obj) as Records,733);734}735736737