Path: blob/master/src/packages/sync/editor/db/doc.ts
5760 views
/*1* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2* License: MS-RSL – see LICENSE.md for details3*/45import { Document } from "../generic/types";6import * as immutable from "immutable";7import { isEqual } from "lodash";8import { is_array, is_object, copy_without, len } from "@cocalc/util/misc";9import {10make_patch as string_make_patch,11apply_patch as string_apply_patch,12} from "../generic/util";13import {14map_merge_patch,15merge_set,16nonnull_cols,17to_key,18to_str,19} from "./util";2021type Record = immutable.Map<string, any> | undefined;22type Records = immutable.List<Record>;23type Index = immutable.Map<string, immutable.Set<number>>;24type Indexes = immutable.Map<string, Index>;2526type jsmap = { [field: string]: any };27type DBPatch = any // TODO: It's really this, but... [-1 | 1, jsmap];2829export type WhereCondition = { [field: string]: any };30export type SetCondition =31| immutable.Map<string, any>32| { [field: string]: any };3334interface ChangeTracker {35changes: immutable.Set<immutable.Map<string, any>>; // primary keys that changed36from_db: Document; // DBDocument object where change tracking started.37}3839// Immutable DB document40export class DBDocument implements Document {41private primary_keys: Set<string>;4243// Columns that should be treated as non-atomic strings.44// This means simultaneous changes by multiple clients are45// merged, instead of last-write-wins. Also, large changes46// are propagated at patches, rather than sending47// complete string.48private string_cols: Set<string>;4950// list of records -- each is assumed to be an immutable.Map.51private records: immutable.List<Record>;5253// set of numbers n such that records.get(n) is defined.54private everything: immutable.Set<number>;5556// TODO: describe57private indexes: Indexes;5859// Change tracking.60private change_tracker: ChangeTracker;6162public readonly size: number;6364private to_str_cache?: string;6566constructor(67primary_keys: Set<string>,68string_cols: Set<string>,69records: Records = immutable.List(),70everything?: immutable.Set<number>,71indexes?: Indexes,72change_tracker?: ChangeTracker,73) {74this.set = this.set.bind(this);75this.delete_array = this.delete_array.bind(this);76this.primary_key_cols = this.primary_key_cols.bind(this);77this.primary_key_part = this.primary_key_part.bind(this);7879this.primary_keys = new Set(primary_keys);80if (this.primary_keys.size === 0) {81throw Error("there must be at least one primary key");82}83this.string_cols = new Set(string_cols);84this.records = records;85this.everything = everything == null ? this.init_everything() : everything;86this.size = this.everything.size;87this.indexes = indexes == null ? this.init_indexes() : indexes;88this.change_tracker =89change_tracker == null ? this.init_change_tracker() : change_tracker;90}9192// sorted immutable Set of i such that this.records.get(i) is defined.93private init_everything(): immutable.Set<number> {94const v: number[] = [];95for (let n = 0; n < this.records.size; n++) {96if (this.records.get(n) != undefined) {97v.push(n);98}99}100return immutable.Set(v);101}102103private init_indexes(): Indexes {104// Build indexes105let indexes: Indexes = immutable.Map();106for (const field of this.primary_keys) {107const index: Index = immutable.Map();108indexes = indexes.set(field, index);109}110this.records.map((record: Record, n: number) => {111if (record == null) {112// null records are sentinels for deletions.113return;114}115indexes.map((index: Index, field: string) => {116const val = record.get(field);117if (val != null) {118const k: string = to_key(val);119let matches = index.get(k);120if (matches != null) {121matches = matches.add(n);122} else {123matches = immutable.Set([n]);124}125indexes = indexes.set(field, index.set(k, matches));126}127});128});129return indexes;130}131132private init_change_tracker(): ChangeTracker {133return { changes: immutable.Set(), from_db: this };134}135136public to_str(): string {137if (this.to_str_cache != null) {138// We can use a cache, since this is an immutable object139return this.to_str_cache;140}141const obj = this.get({}).toJS();142return (this.to_str_cache = to_str(obj));143}144145public is_equal(other?: DBDocument): boolean {146if (other == null) {147// Definitely not equal if not defined.148return false;149}150if (this.records === other.records) {151return true;152}153if (this.size !== other.size) {154return false;155}156// We include undefineds in the sets below157// since records is a List of Record or undefined, i.e.,158// we use undefined as a sentinel in order to159// make deletes be efficient.160return immutable161.Set(this.records)162.add(undefined)163.equals(immutable.Set(other.records).add(undefined));164}165166public apply_patch(patch: DBPatch): DBDocument {167let i = 0;168let db: DBDocument = this;169while (i < patch.length) {170if (patch[i] === -1) {171db = db.delete(patch[i + 1]);172} else if (patch[i] === 1) {173db = db.set(patch[i + 1]);174}175i += 2;176}177return db;178}179180public make_patch(other: DBDocument): DBPatch {181if (other.size === 0) {182// Special case -- delete everything183return [-1, [{}]];184}185186let t0 = immutable.Set(this.records);187let t1 = immutable.Set(other.records);188// Remove the common intersection -- nothing going on there.189// Doing this greatly reduces the complexity in the common190// case in which little has changed191const common = t0.intersect(t1).add(undefined);192t0 = t0.subtract(common);193t1 = t1.subtract(common);194195// Easy very common special cases196if (t0.size === 0) {197// Special case: t0 is empty -- insert all the records.198return [1, t1.toJS()];199}200if (t1.size === 0) {201// Special case: t1 is empty -- bunch of deletes202const v: jsmap[] = [];203t0.map((x) => {204if (x != null) {205v.push(this.primary_key_part(x.toJS()));206}207});208return [-1, v];209}210211// compute the key parts of t0 and t1 as sets212// means -- set got from t0 by taking only the primary_key columns213// (Why the "as"? Typescript makes the output of the map be of type214// Iterable<Record, Iterable<string, any>>215// But, it's just a set. So cast it.)216const k0 = t0.map(this.primary_key_cols) as immutable.Set<Record>;217const k1 = t1.map(this.primary_key_cols) as immutable.Set<Record>;218219const add: any[] = [];220let remove: any[] | undefined = undefined;221222// Deletes: everything in k0 that is not in k1223const deletes = k0.subtract(k1);224if (deletes.size > 0) {225remove = deletes.toJS();226}227228// Inserts: everything in k1 that is not in k0229const inserts = k1.subtract(k0);230if (inserts.size > 0) {231inserts.map((k) => {232if (k != null) {233const x = other.get_one(k);234if (x != null) {235add.push(x.toJS());236}237}238});239}240241// Everything in k1 that is also in k0 -- these242// must have all changed243const changed = k1.intersect(k0);244if (changed.size > 0) {245changed.map((k) => {246if (k == null) {247return;248}249const obj = k.toJS();250const obj0 = this.primary_key_part(obj);251const from0 = this.get_one(obj0);252const to0 = other.get_one(obj0);253if (from0 == null || to0 == null) {254// just to make typescript happy255return;256}257const from = from0.toJS();258const to = to0.toJS();259// undefined for each key of from not in to260for (const key in from) {261if (to[key] == null) {262obj[key] = null;263}264}265// Explicitly set each key of `to` that is different266// than corresponding key of `from`:267for (const key in to) {268const v = to[key];269if (!isEqual(from[key], v)) {270if (this.string_cols.has(key) && from[key] != null && v != null) {271if (typeof from[key] == "string" && typeof v == "string") {272// A valid string patch, converting one string to another.273obj[key] = string_make_patch(from[key], v);274} else {275/* This should be impossible, due to the type checking that276I've added to set in this same commit. However, it can't277hurt to put in the above check on types, just in case.278https://github.com/sagemathinc/cocalc/issues/3625279A string col actually contains something that is not280a string. (Maybe it's due to loading something old?)281In any case, it's better to "best effort" this, rather282than to make the entire document be un-savable and283un-usable to the user.284We just give up and record no change in this case, so285when doc is read in later (or by another user), there286will be no weird corruption.287This case will probably go away completely when all288client code is written with proper typing.289*/290}291} else if (is_object(from[key]) && is_object(v)) {292// Changing from one map to another, where they are not293// equal -- can use a merge to make this more efficient.294// This is an important optimization, to avoid making295// patches HUGE.296obj[key] = map_merge_patch(from[key], v);297} else {298obj[key] = v;299}300}301}302add.push(obj);303});304}305306const patch: any[] = [];307if (remove != null) {308patch.push(-1);309patch.push(remove);310}311if (add.length > 0) {312patch.push(1);313patch.push(add);314}315316return patch;317}318319// Given an immutable map f, return its restriction320// to the primary keys.321private primary_key_cols(322f: immutable.Map<string, any>,323): immutable.Map<string, any> {324return f.filter(325(_, k) => k != null && this.primary_keys.has(k),326) as immutable.Map<string, any>;327}328329private select(where: WhereCondition): immutable.Set<number> {330if (immutable.Map.isMap(where)) {331// TODO: maybe do not allow?332where = where.toJS();333}334// Return immutable set with defined indexes the elts of @_records that335// satisfy the where condition.336const n: number = len(where);337let result: immutable.Set<number> | undefined = undefined;338for (const field in where) {339const value = where[field];340const index = this.indexes.get(field);341if (index == null) {342throw Error(`field '${field}' must be a primary key`);343}344const v: immutable.Set<number> | undefined = index.get(to_key(value));345// v may be undefined here346if (v == null) {347return immutable.Set(); // no matches for this field - done348}349if (n === 1) {350// no need to do further intersection351return v;352}353if (result != null) {354// intersect with what we've found so far via indexes.355result = result.intersect(v);356} else {357result = v;358}359}360if (result == null) {361// where condition must have been empty -- matches everything362return this.everything;363} else {364return result;365}366}367368// Used internally for determining the set/where parts of an object.369private parse(obj: Map<string, any>): { where: jsmap; set: jsmap } {370const where: jsmap = {};371const set: jsmap = {};372for (const field in obj) {373const val = obj[field];374if (this.primary_keys.has(field)) {375if (val != null) {376where[field] = val;377}378} else {379set[field] = val;380}381}382return { where, set };383}384385public set(obj: SetCondition | SetCondition[]): DBDocument {386// If you ever want to know exactly what set something, uncomment this:387// console.trace("set ", obj);388if (Array.isArray(obj)) {389let z: DBDocument = this;390for (const x of obj as SetCondition[]) {391z = z.set(x);392}393return z;394}395if (immutable.Map.isMap(obj)) {396// TODO: maybe do not allow?397// it is very clean/convenient to allow this398obj = (obj as immutable.Map<string, any>).toJS();399}400const { set, where } = this.parse(obj as Map<string, any>);401const matches = this.select(where);402let { changes } = this.change_tracker;403const first_match = matches != null ? matches.min() : undefined;404if (first_match != null) {405// edit the first existing record that matches406let record = this.records.get(first_match);407if (record == null) {408// make typescript happier.409throw Error("bug -- record can't be null");410}411const before = record;412for (const field in set) {413const value = set[field];414if (value === null) {415// null = how to delete fields416record = record.delete(field);417} else {418if (this.string_cols.has(field)) {419if (is_array(value)) {420// special case: a string patch421record = record.set(422field,423string_apply_patch(value, before.get(field, ""))[0],424);425continue;426}427if (typeof value != "string") {428// Putting this guard in to address429// https://github.com/sagemathinc/cocalc/issues/3625430// which was caused by some client code setting a string_col431// to something that is not a string. We'll next have432// to wait for this exception to be triggered someday...433throw Error(434`'${field}' must be a string, but tried to set to '${value}' of type ${typeof value}`,435);436}437// falls through to actually set it below.438}439let new_val;440const cur = record.get(field);441const change = immutable.fromJS(value);442if (immutable.Map.isMap(cur) && immutable.Map.isMap(change)) {443new_val = merge_set(cur, change);444} else {445new_val = change;446}447record = record.set(field, new_val);448}449}450451if (!before.equals(record)) {452// there was an actual change, so update; doesn't change453// anything involving indexes.454changes = changes.add(this.primary_key_cols(record));455return new DBDocument(456this.primary_keys,457this.string_cols,458this.records.set(first_match, record),459this.everything,460this.indexes,461{ changes, from_db: this.change_tracker.from_db },462);463} else {464return this;465}466} else {467// The sparse array matches had nothing in it, so468// append a new record.469for (const field of this.string_cols) {470if (obj[field] != null && is_array(obj[field])) {471// It's a patch -- but there is nothing to patch,472// so discard this field473obj = copy_without(obj, field);474}475}476// remove null columns (null indicates delete)477const record = nonnull_cols(immutable.fromJS(obj));478changes = changes.add(this.primary_key_cols(record));479const records = this.records.push(record);480const n = records.size - 1;481const everything = this.everything.add(n);482// update indexes483let indexes = this.indexes;484for (const field of this.primary_keys) {485const val = obj[field];486if (val != null) {487let index = indexes.get(field);488if (index == null) {489index = immutable.Map();490}491const k = to_key(val);492let matches = index.get(k);493if (matches != null) {494matches = matches.add(n);495} else {496matches = immutable.Set([n]);497}498indexes = indexes.set(field, index.set(k, matches));499}500}501return new DBDocument(502this.primary_keys,503this.string_cols,504records,505everything,506indexes,507{ changes, from_db: this.change_tracker.from_db },508);509}510}511512private delete_array(where: WhereCondition[]): DBDocument {513let z = this as DBDocument;514for (const x of where) {515z = z.delete(x);516}517return z;518}519520public delete(where: WhereCondition | WhereCondition[]): DBDocument {521// console.log("delete #{JSON.stringify(where)}")522if (is_array(where)) {523return this.delete_array(where as WhereCondition[]);524}525// if where undefined, will delete everything526if (this.everything.size === 0) {527// no-op -- no data so deleting is trivial528return this;529}530let { changes } = this.change_tracker;531const remove = this.select(where);532if (remove.size === this.everything.size) {533// actually deleting everything; easy special cases534changes = changes.union(535this.records536.filter((record) => record != null)537.map(this.primary_key_cols),538);539return new DBDocument(540this.primary_keys,541this.string_cols,542undefined,543undefined,544undefined,545{ changes, from_db: this.change_tracker.from_db },546);547}548549// remove matches from every index550let indexes = this.indexes;551for (const field of this.primary_keys) {552let index = indexes.get(field);553if (index == null) {554continue;555}556remove.forEach((n) => {557if (n == null) {558return;559}560const record = this.records.get(n);561if (record == null) {562return;563}564const val = record.get(field);565if (val == null) {566return;567}568const k = to_key(val);569const matches = index?.get(k)?.delete(n);570if (matches == null || index == null) return; // shouldn't happen571if (matches.size === 0) {572index = index.delete(k);573} else {574index = index.set(k, matches);575}576indexes = indexes.set(field, index);577});578}579580// delete corresponding records (actually set to undefined to581// preserve index references).582let records = this.records;583remove.forEach((n) => {584if (n == null) {585return;586}587const record = records.get(n);588if (record == null) {589return;590}591changes = changes.add(this.primary_key_cols(record));592records = records.set(n, undefined);593});594595const everything = this.everything.subtract(remove);596597return new DBDocument(598this.primary_keys,599this.string_cols,600records,601everything,602indexes,603{ changes, from_db: this.change_tracker.from_db },604);605}606607// Returns immutable list of all matches608public get(where: WhereCondition): Records {609const matches = this.select(where);610if (matches == null) {611return immutable.List();612}613// The "as" is because Typescript just makes the result of614// filter some more generic type (but it isn't).615return immutable.List(616this.records.filter((_, n) => n != null && matches.includes(n)),617);618}619620// Returns the first match, or undefined if there are no matches621get_one(where: WhereCondition): Record | undefined {622// TODO: couldn't select have a shortcut to return once one623// result is found.624const matches = this.select(where);625if (matches == null) {626return;627}628const min = matches.min();629if (min == null) return;630return this.records.get(min);631}632633// x = javascript object634private primary_key_part(x: jsmap): jsmap {635const where: jsmap = {};636for (const k in x) {637const v = x[k];638if (this.primary_keys.has(k)) {639where[k] = v;640}641}642return where;643}644645// Return immutable set of primary key parts of records that646// change in going from this to other.647public changed_keys(other: DBDocument): immutable.Set<Record> {648if (this.records === other.records) {649// identical -- obviously, nothing changed.650return immutable.Set();651}652// Get the defined records; there may be undefined ones due653// to lazy delete.654let t0: immutable.Set<Record> = immutable.Set(655immutable.Set(this.records).filter((x) => x != null),656);657let t1: immutable.Set<Record> = immutable.Set(658immutable.Set(other.records).filter((x) => x != null),659);660661// Remove the common intersection -- nothing going on there.662// Doing this greatly reduces the complexity in the common663// case in which little has changed664const common = t0.intersect(t1);665t0 = t0.subtract(common);666t1 = t1.subtract(common);667668// compute the key parts of t0 and t1 as sets669const k0 = immutable.Set(t0.map(this.primary_key_cols));670const k1 = immutable.Set(t1.map(this.primary_key_cols));671672return immutable.Set(k0.union(k1));673}674675public changes(prev?: DBDocument): immutable.Set<Record> {676// CRITICAL TODO! Make this efficient using this.change_tracker!!!677if (prev == null) {678return immutable.Set(679immutable680.Set(this.records)681.filter((x) => x != null)682.map(this.primary_key_cols),683);684}685return this.changed_keys(prev);686}687688public count(): number {689return this.size;690}691}692693/*694The underlying string representation has one JSON object695per line. The order doesn't matter.696697WARNING: The primary keys and string cols are NOT stored698in the string representation! That is metadata that must699somehow be tracked separately. (Maybe this should be changed).700701You can't store null since null is used to signify deleting702(in set queries). You can't store undefined or Date objects703due to JSON.704705Also, note that the primary_keys and string_cols are string[]706rather than Set of strings, which is annoyingly inconsistent707with DBDocument above.708*/709710export function from_str(711s: string,712primary_keys: string[],713string_cols: string[],714): DBDocument {715const obj: jsmap[] = [];716for (const line of s.split("\n")) {717if (line.length > 0) {718try {719const x = JSON.parse(line);720if (typeof x === "object") {721obj.push(x);722} else {723throw Error("each line must be an object");724}725} catch (e) {726console.warn(`CORRUPT db-doc string: ${e} -- skipping '${line}'`);727}728}729}730return new DBDocument(731new Set(primary_keys),732new Set(string_cols),733immutable.fromJS(obj) as Records,734);735}736737738