CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/sync/editor/db/doc.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { CompressedPatch, Document } from "../generic/types";
7
import * as immutable from "immutable";
8
import { isEqual } from "lodash";
9
import { is_array, is_object, copy_without, len } from "@cocalc/util/misc";
10
import {
11
make_patch as string_make_patch,
12
apply_patch as string_apply_patch,
13
} from "../generic/util";
14
import {
15
map_merge_patch,
16
merge_set,
17
nonnull_cols,
18
to_key,
19
to_str,
20
} from "./util";
21
22
type Record = immutable.Map<string, any> | undefined;
23
type Records = immutable.List<Record>;
24
type Index = immutable.Map<string, immutable.Set<number>>;
25
type Indexes = immutable.Map<string, Index>;
26
27
type jsmap = { [field: string]: any };
28
29
export type WhereCondition = { [field: string]: any };
30
export type SetCondition =
31
| immutable.Map<string, any>
32
| { [field: string]: any };
33
34
interface ChangeTracker {
35
changes: immutable.Set<immutable.Map<string, any>>; // primary keys that changed
36
from_db: Document; // DBDocument object where change tracking started.
37
}
38
39
// Immutable DB document
40
export class DBDocument implements Document {
41
private primary_keys: Set<string>;
42
43
// Columns that should be treated as non-atomic strings.
44
// This means simultaneous changes by multiple clients are
45
// merged, instead of last-write-wins. Also, large changes
46
// are propagated at patches, rather than sending
47
// complete string.
48
private string_cols: Set<string>;
49
50
// list of records -- each is assumed to be an immutable.Map.
51
private records: immutable.List<Record>;
52
53
// set of numbers n such that records.get(n) is defined.
54
private everything: immutable.Set<number>;
55
56
// TODO: describe
57
private indexes: Indexes;
58
59
// Change tracking.
60
private change_tracker: ChangeTracker;
61
62
public readonly size: number;
63
64
private to_str_cache?: string;
65
66
constructor(
67
primary_keys: Set<string>,
68
string_cols: Set<string>,
69
records: Records = immutable.List(),
70
everything?: immutable.Set<number>,
71
indexes?: Indexes,
72
change_tracker?: ChangeTracker,
73
) {
74
this.set = this.set.bind(this);
75
this.delete_array = this.delete_array.bind(this);
76
this.primary_key_cols = this.primary_key_cols.bind(this);
77
this.primary_key_part = this.primary_key_part.bind(this);
78
79
this.primary_keys = new Set(primary_keys);
80
if (this.primary_keys.size === 0) {
81
throw Error("there must be at least one primary key");
82
}
83
this.string_cols = new Set(string_cols);
84
this.records = records;
85
this.everything = everything == null ? this.init_everything() : everything;
86
this.size = this.everything.size;
87
this.indexes = indexes == null ? this.init_indexes() : indexes;
88
this.change_tracker =
89
change_tracker == null ? this.init_change_tracker() : change_tracker;
90
}
91
92
// sorted immutable Set of i such that this.records.get(i) is defined.
93
private init_everything(): immutable.Set<number> {
94
const v: number[] = [];
95
for (let n = 0; n < this.records.size; n++) {
96
if (this.records.get(n) != undefined) {
97
v.push(n);
98
}
99
}
100
return immutable.Set(v);
101
}
102
103
private init_indexes(): Indexes {
104
// Build indexes
105
let indexes: Indexes = immutable.Map();
106
for (const field of this.primary_keys) {
107
const index: Index = immutable.Map();
108
indexes = indexes.set(field, index);
109
}
110
this.records.map((record: Record, n: number) => {
111
if (record == null) {
112
// null records are sentinels for deletions.
113
return;
114
}
115
indexes.map((index: Index, field: string) => {
116
const val = record.get(field);
117
if (val != null) {
118
const k: string = to_key(val);
119
let matches = index.get(k);
120
if (matches != null) {
121
matches = matches.add(n);
122
} else {
123
matches = immutable.Set([n]);
124
}
125
indexes = indexes.set(field, index.set(k, matches));
126
}
127
});
128
});
129
return indexes;
130
}
131
132
private init_change_tracker(): ChangeTracker {
133
return { changes: immutable.Set(), from_db: this };
134
}
135
136
public to_str(): string {
137
if (this.to_str_cache != null) {
138
// We can use a cache, since this is an immutable object
139
return this.to_str_cache;
140
}
141
const obj = this.get({}).toJS();
142
return (this.to_str_cache = to_str(obj));
143
}
144
145
public is_equal(other?: DBDocument): boolean {
146
if (other == null) {
147
// Definitely not equal if not defined.
148
return false;
149
}
150
if (this.records === other.records) {
151
return true;
152
}
153
if (this.size !== other.size) {
154
return false;
155
}
156
// We include undefineds in the sets below
157
// since records is a List of Record or undefined, i.e.,
158
// we use undefined as a sentinel in order to
159
// make deletes be efficient.
160
return immutable
161
.Set(this.records)
162
.add(undefined)
163
.equals(immutable.Set(other.records).add(undefined));
164
}
165
166
public apply_patch(patch: CompressedPatch): DBDocument {
167
let i = 0;
168
let db: DBDocument = this;
169
while (i < patch.length) {
170
if (patch[i] === -1) {
171
db = db.delete(patch[i + 1]);
172
} else if (patch[i] === 1) {
173
db = db.set(patch[i + 1]);
174
}
175
i += 2;
176
}
177
return db;
178
}
179
180
public make_patch(other: DBDocument): CompressedPatch {
181
if (other.size === 0) {
182
// Special case -- delete everything
183
return [-1, [{}]];
184
}
185
186
let t0 = immutable.Set(this.records);
187
let t1 = immutable.Set(other.records);
188
// Remove the common intersection -- nothing going on there.
189
// Doing this greatly reduces the complexity in the common
190
// case in which little has changed
191
const common = t0.intersect(t1).add(undefined);
192
t0 = t0.subtract(common);
193
t1 = t1.subtract(common);
194
195
// Easy very common special cases
196
if (t0.size === 0) {
197
// Special case: t0 is empty -- insert all the records.
198
return [1, t1.toJS()];
199
}
200
if (t1.size === 0) {
201
// Special case: t1 is empty -- bunch of deletes
202
const v: jsmap[] = [];
203
t0.map((x) => {
204
if (x != null) {
205
v.push(this.primary_key_part(x.toJS()));
206
}
207
});
208
return [-1, v];
209
}
210
211
// compute the key parts of t0 and t1 as sets
212
// means -- set got from t0 by taking only the primary_key columns
213
// (Why the "as"? Typescript makes the output of the map be of type
214
// Iterable<Record, Iterable<string, any>>
215
// But, it's just a set. So cast it.)
216
const k0 = t0.map(this.primary_key_cols) as immutable.Set<Record>;
217
const k1 = t1.map(this.primary_key_cols) as immutable.Set<Record>;
218
219
const add: any[] = [];
220
let remove: any[] | undefined = undefined;
221
222
// Deletes: everything in k0 that is not in k1
223
const deletes = k0.subtract(k1);
224
if (deletes.size > 0) {
225
remove = deletes.toJS();
226
}
227
228
// Inserts: everything in k1 that is not in k0
229
const inserts = k1.subtract(k0);
230
if (inserts.size > 0) {
231
inserts.map((k) => {
232
if (k != null) {
233
const x = other.get_one(k);
234
if (x != null) {
235
add.push(x.toJS());
236
}
237
}
238
});
239
}
240
241
// Everything in k1 that is also in k0 -- these
242
// must have all changed
243
const changed = k1.intersect(k0);
244
if (changed.size > 0) {
245
changed.map((k) => {
246
if (k == null) {
247
return;
248
}
249
const obj = k.toJS();
250
const obj0 = this.primary_key_part(obj);
251
const from0 = this.get_one(obj0);
252
const to0 = other.get_one(obj0);
253
if (from0 == null || to0 == null) {
254
// just to make typescript happy
255
return;
256
}
257
const from = from0.toJS();
258
const to = to0.toJS();
259
// undefined for each key of from not in to
260
for (const key in from) {
261
if (to[key] == null) {
262
obj[key] = null;
263
}
264
}
265
// Explicitly set each key of `to` that is different
266
// than corresponding key of `from`:
267
for (const key in to) {
268
const v = to[key];
269
if (!isEqual(from[key], v)) {
270
if (this.string_cols.has(key) && from[key] != null && v != null) {
271
if (typeof from[key] == "string" && typeof v == "string") {
272
// A valid string patch, converting one string to another.
273
obj[key] = string_make_patch(from[key], v);
274
} else {
275
/* This should be impossible, due to the type checking that
276
I've added to set in this same commit. However, it can't
277
hurt to put in the above check on types, just in case.
278
https://github.com/sagemathinc/cocalc/issues/3625
279
A string col actually contains something that is not
280
a string. (Maybe it's due to loading something old?)
281
In any case, it's better to "best effort" this, rather
282
than to make the entire document be un-savable and
283
un-usable to the user.
284
We just give up and record no change in this case, so
285
when doc is read in later (or by another user), there
286
will be no weird corruption.
287
This case will probably go away completely when all
288
client code is written with proper typing.
289
*/
290
}
291
} else if (is_object(from[key]) && is_object(v)) {
292
// Changing from one map to another, where they are not
293
// equal -- can use a merge to make this more efficient.
294
// This is an important optimization, to avoid making
295
// patches HUGE.
296
obj[key] = map_merge_patch(from[key], v);
297
} else {
298
obj[key] = v;
299
}
300
}
301
}
302
add.push(obj);
303
});
304
}
305
306
const patch: any[] = [];
307
if (remove != null) {
308
patch.push(-1);
309
patch.push(remove);
310
}
311
if (add.length > 0) {
312
patch.push(1);
313
patch.push(add);
314
}
315
316
return patch;
317
}
318
319
// Given an immutable map f, return its restriction
320
// to the primary keys.
321
private primary_key_cols(
322
f: immutable.Map<string, any>,
323
): immutable.Map<string, any> {
324
return f.filter(
325
(_, k) => k != null && this.primary_keys.has(k),
326
) as immutable.Map<string, any>;
327
}
328
329
private select(where: WhereCondition): immutable.Set<number> {
330
if (immutable.Map.isMap(where)) {
331
// TODO: maybe do not allow?
332
where = where.toJS();
333
}
334
// Return immutable set with defined indexes the elts of @_records that
335
// satisfy the where condition.
336
const n: number = len(where);
337
let result: immutable.Set<number> | undefined = undefined;
338
for (const field in where) {
339
const value = where[field];
340
const index = this.indexes.get(field);
341
if (index == null) {
342
throw Error(`field '${field}' must be a primary key`);
343
}
344
const v: immutable.Set<number> | undefined = index.get(to_key(value));
345
// v may be undefined here
346
if (v == null) {
347
return immutable.Set(); // no matches for this field - done
348
}
349
if (n === 1) {
350
// no need to do further intersection
351
return v;
352
}
353
if (result != null) {
354
// intersect with what we've found so far via indexes.
355
result = result.intersect(v);
356
} else {
357
result = v;
358
}
359
}
360
if (result == null) {
361
// where condition must have been empty -- matches everything
362
return this.everything;
363
} else {
364
return result;
365
}
366
}
367
368
// Used internally for determining the set/where parts of an object.
369
private parse(obj: Map<string, any>): { where: jsmap; set: jsmap } {
370
const where: jsmap = {};
371
const set: jsmap = {};
372
for (const field in obj) {
373
const val = obj[field];
374
if (this.primary_keys.has(field)) {
375
if (val != null) {
376
where[field] = val;
377
}
378
} else {
379
set[field] = val;
380
}
381
}
382
return { where, set };
383
}
384
385
public set(obj: SetCondition | SetCondition[]): DBDocument {
386
// If you ever want to know exactly what set something, uncomment this:
387
// console.trace("set ", obj);
388
if (Array.isArray(obj)) {
389
let z: DBDocument = this;
390
for (const x of obj as SetCondition[]) {
391
z = z.set(x);
392
}
393
return z;
394
}
395
if (immutable.Map.isMap(obj)) {
396
// TODO: maybe do not allow?
397
// it is very clean/convenient to allow this
398
obj = (obj as immutable.Map<string, any>).toJS();
399
}
400
const { set, where } = this.parse(obj as Map<string, any>);
401
const matches = this.select(where);
402
let { changes } = this.change_tracker;
403
const first_match = matches != null ? matches.min() : undefined;
404
if (first_match != null) {
405
// edit the first existing record that matches
406
let record = this.records.get(first_match);
407
if (record == null) {
408
// make typescript happier.
409
throw Error("bug -- record can't be null");
410
}
411
const before = record;
412
for (const field in set) {
413
const value = set[field];
414
if (value === null) {
415
// null = how to delete fields
416
record = record.delete(field);
417
} else {
418
if (this.string_cols.has(field)) {
419
if (is_array(value)) {
420
// special case: a string patch
421
record = record.set(
422
field,
423
string_apply_patch(value, before.get(field, ""))[0],
424
);
425
continue;
426
}
427
if (typeof value != "string") {
428
// Putting this guard in to address
429
// https://github.com/sagemathinc/cocalc/issues/3625
430
// which was caused by some client code setting a string_col
431
// to something that is not a string. We'll next have
432
// to wait for this exception to be triggered someday...
433
throw Error(
434
`'${field}' must be a string, but tried to set to '${value}' of type ${typeof value}`,
435
);
436
}
437
// falls through to actually set it below.
438
}
439
let new_val;
440
const cur = record.get(field);
441
const change = immutable.fromJS(value);
442
if (immutable.Map.isMap(cur) && immutable.Map.isMap(change)) {
443
new_val = merge_set(cur, change);
444
} else {
445
new_val = change;
446
}
447
record = record.set(field, new_val);
448
}
449
}
450
451
if (!before.equals(record)) {
452
// there was an actual change, so update; doesn't change
453
// anything involving indexes.
454
changes = changes.add(this.primary_key_cols(record));
455
return new DBDocument(
456
this.primary_keys,
457
this.string_cols,
458
this.records.set(first_match, record),
459
this.everything,
460
this.indexes,
461
{ changes, from_db: this.change_tracker.from_db },
462
);
463
} else {
464
return this;
465
}
466
} else {
467
// The sparse array matches had nothing in it, so
468
// append a new record.
469
for (const field of this.string_cols) {
470
if (obj[field] != null && is_array(obj[field])) {
471
// It's a patch -- but there is nothing to patch,
472
// so discard this field
473
obj = copy_without(obj, field);
474
}
475
}
476
// remove null columns (null indicates delete)
477
const record = nonnull_cols(immutable.fromJS(obj));
478
changes = changes.add(this.primary_key_cols(record));
479
const records = this.records.push(record);
480
const n = records.size - 1;
481
const everything = this.everything.add(n);
482
// update indexes
483
let indexes = this.indexes;
484
for (const field of this.primary_keys) {
485
const val = obj[field];
486
if (val != null) {
487
let index = indexes.get(field);
488
if (index == null) {
489
index = immutable.Map();
490
}
491
const k = to_key(val);
492
let matches = index.get(k);
493
if (matches != null) {
494
matches = matches.add(n);
495
} else {
496
matches = immutable.Set([n]);
497
}
498
indexes = indexes.set(field, index.set(k, matches));
499
}
500
}
501
return new DBDocument(
502
this.primary_keys,
503
this.string_cols,
504
records,
505
everything,
506
indexes,
507
{ changes, from_db: this.change_tracker.from_db },
508
);
509
}
510
}
511
512
private delete_array(where: WhereCondition[]): DBDocument {
513
let z = this as DBDocument;
514
for (const x of where) {
515
z = z.delete(x);
516
}
517
return z;
518
}
519
520
public delete(where: WhereCondition | WhereCondition[]): DBDocument {
521
// console.log("delete #{JSON.stringify(where)}")
522
if (is_array(where)) {
523
return this.delete_array(where as WhereCondition[]);
524
}
525
// if where undefined, will delete everything
526
if (this.everything.size === 0) {
527
// no-op -- no data so deleting is trivial
528
return this;
529
}
530
let { changes } = this.change_tracker;
531
const remove = this.select(where);
532
if (remove.size === this.everything.size) {
533
// actually deleting everything; easy special cases
534
changes = changes.union(
535
this.records
536
.filter((record) => record != null)
537
.map(this.primary_key_cols),
538
);
539
return new DBDocument(
540
this.primary_keys,
541
this.string_cols,
542
undefined,
543
undefined,
544
undefined,
545
{ changes, from_db: this.change_tracker.from_db },
546
);
547
}
548
549
// remove matches from every index
550
let indexes = this.indexes;
551
for (const field of this.primary_keys) {
552
let index = indexes.get(field);
553
if (index == null) {
554
continue;
555
}
556
remove.forEach((n) => {
557
if (n == null) {
558
return;
559
}
560
const record = this.records.get(n);
561
if (record == null) {
562
return;
563
}
564
const val = record.get(field);
565
if (val == null) {
566
return;
567
}
568
const k = to_key(val);
569
const matches = index?.get(k)?.delete(n);
570
if (matches == null || index == null) return; // shouldn't happen
571
if (matches.size === 0) {
572
index = index.delete(k);
573
} else {
574
index = index.set(k, matches);
575
}
576
indexes = indexes.set(field, index);
577
});
578
}
579
580
// delete corresponding records (actually set to undefined to
581
// preserve index references).
582
let records = this.records;
583
remove.forEach((n) => {
584
if (n == null) {
585
return;
586
}
587
const record = records.get(n);
588
if (record == null) {
589
return;
590
}
591
changes = changes.add(this.primary_key_cols(record));
592
records = records.set(n, undefined);
593
});
594
595
const everything = this.everything.subtract(remove);
596
597
return new DBDocument(
598
this.primary_keys,
599
this.string_cols,
600
records,
601
everything,
602
indexes,
603
{ changes, from_db: this.change_tracker.from_db },
604
);
605
}
606
607
// Returns immutable list of all matches
608
public get(where: WhereCondition): Records {
609
const matches = this.select(where);
610
if (matches == null) {
611
return immutable.List();
612
}
613
// The "as" is because Typescript just makes the result of
614
// filter some more generic type (but it isn't).
615
return immutable.List(
616
this.records.filter((_, n) => n != null && matches.includes(n)),
617
);
618
}
619
620
// Returns the first match, or undefined if there are no matches
621
get_one(where: WhereCondition): Record | undefined {
622
// TODO: couldn't select have a shortcut to return once one
623
// result is found.
624
const matches = this.select(where);
625
if (matches == null) {
626
return;
627
}
628
const min = matches.min();
629
if (min == null) return;
630
return this.records.get(min);
631
}
632
633
// x = javascript object
634
private primary_key_part(x: jsmap): jsmap {
635
const where: jsmap = {};
636
for (const k in x) {
637
const v = x[k];
638
if (this.primary_keys.has(k)) {
639
where[k] = v;
640
}
641
}
642
return where;
643
}
644
645
// Return immutable set of primary key parts of records that
646
// change in going from this to other.
647
public changed_keys(other: DBDocument): immutable.Set<Record> {
648
if (this.records === other.records) {
649
// identical -- obviously, nothing changed.
650
return immutable.Set();
651
}
652
// Get the defined records; there may be undefined ones due
653
// to lazy delete.
654
let t0: immutable.Set<Record> = immutable.Set(
655
immutable.Set(this.records).filter((x) => x != null),
656
);
657
let t1: immutable.Set<Record> = immutable.Set(
658
immutable.Set(other.records).filter((x) => x != null),
659
);
660
661
// Remove the common intersection -- nothing going on there.
662
// Doing this greatly reduces the complexity in the common
663
// case in which little has changed
664
const common = t0.intersect(t1);
665
t0 = t0.subtract(common);
666
t1 = t1.subtract(common);
667
668
// compute the key parts of t0 and t1 as sets
669
const k0 = immutable.Set(t0.map(this.primary_key_cols));
670
const k1 = immutable.Set(t1.map(this.primary_key_cols));
671
672
return immutable.Set(k0.union(k1));
673
}
674
675
public changes(prev?: DBDocument): immutable.Set<Record> {
676
// CRITICAL TODO! Make this efficient using this.change_tracker!!!
677
if (prev == null) {
678
return immutable.Set(
679
immutable
680
.Set(this.records)
681
.filter((x) => x != null)
682
.map(this.primary_key_cols),
683
);
684
}
685
return this.changed_keys(prev);
686
}
687
688
public count(): number {
689
return this.size;
690
}
691
}
692
693
/*
694
The underlying string representation has one JSON object
695
per line. The order doesn't matter.
696
697
WARNING: The primary keys and string cols are NOT stored
698
in the string representation! That is metadata that must
699
somehow be tracked separately. (Maybe this should be changed).
700
701
You can't store null since null is used to signify deleting
702
(in set queries). You can't store undefined or Date objects
703
due to JSON.
704
705
Also, note that the primary_keys and string_cols are string[]
706
rather than Set of strings, which is annoyingly inconsistent
707
with DBDocument above.
708
*/
709
710
export function from_str(
711
s: string,
712
primary_keys: string[],
713
string_cols: string[],
714
): DBDocument {
715
const obj: jsmap[] = [];
716
for (const line of s.split("\n")) {
717
if (line.length > 0) {
718
try {
719
const x = JSON.parse(line);
720
if (typeof x === "object") {
721
obj.push(x);
722
} else {
723
throw Error("each line must be an object");
724
}
725
} catch (e) {
726
console.warn(`CORRUPT db-doc string: ${e} -- skipping '${line}'`);
727
}
728
}
729
}
730
return new DBDocument(
731
new Set(primary_keys),
732
new Set(string_cols),
733
immutable.fromJS(obj) as Records,
734
);
735
}
736
737