Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/file-use.ts
5800 views
1
/*
2
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import { Table } from "./types";
7
import { minutes_ago } from "../misc";
8
import { SCHEMA as schema } from "./index";
9
10
// Helper function to normalize dates for comparison
11
function getTime(date: any): number {
12
if (!date) return 0;
13
if (date instanceof Date) return date.getTime();
14
if (typeof date === "string") return new Date(date).getTime();
15
return 0;
16
}
17
18
/* TODO: for postgres rewrite after done we MIGHT completely redo file_use to eliminate
19
the id field, use project_id, path as a compound primary key, and maybe put users in
20
another table with a relation. There is also expert discussion about this table in the
21
Hacker News discussion of my PostgreSQL vs ... blog post.
22
*/
23
24
Table({
25
name: "file_use",
26
fields: {
27
id: {
28
type: "string",
29
pg_type: "CHAR(40)",
30
},
31
project_id: {
32
type: "uuid",
33
},
34
path: {
35
type: "string",
36
},
37
users: {
38
type: "map",
39
desc: "{account_id1: {action1: timestamp1, action2:timestamp2}, account_id2: {...}}",
40
date: "all",
41
},
42
last_edited: {
43
type: "timestamp",
44
},
45
},
46
rules: {
47
primary_key: "id",
48
durability: "soft", // loss of some log data not serious, since used only for showing notifications
49
unique_writes: true, // there is no reason for a user to write the same record twice
50
db_standby: "safer", // allow doing the initial read part of the query from a standby node.
51
pg_indexes: ["project_id", "last_edited"],
52
53
// CRITICAL! At scale, this query
54
// SELECT * FROM file_use WHERE project_id = any(select project_id from projects where users ? '25e2cae4-05c7-4c28-ae22-1e6d3d2e8bb3') ORDER BY last_edited DESC limit 100;
55
// will take forever due to the query planner being off with its estimation (its the case where there is no such user or no data) and also uses several workers to do an index scan
56
// We disable the indes scan for this query, which gets rid of the extra workers and runs fine.
57
pg_indexscan: false,
58
59
// I put a time limit in pg_where below of to just give genuinely recent notifications,
60
// and massively reduce server load. The obvious todo list is to make another file_use
61
// virtual table that lets you get older entries.
62
user_query: {
63
get: {
64
pg_where: ["last_edited >= NOW() - interval '21 days'", "projects"],
65
pg_where_load: [
66
"last_edited >= NOW() - interval '10 days'",
67
"projects",
68
],
69
pg_changefeed: "projects",
70
options: [{ order_by: "-last_edited" }, { limit: 200 }], // limit is arbitrary
71
options_load: [{ order_by: "-last_edited" }, { limit: 70 }], // limit is arbitrary
72
throttle_changes: 2000,
73
fields: {
74
id: null,
75
project_id: null,
76
path: null,
77
users: null,
78
last_edited: null,
79
},
80
},
81
set: {
82
fields: {
83
id(obj, db) {
84
return db.sha1(obj.project_id, obj.path);
85
},
86
project_id: "project_write",
87
path: true,
88
users: true,
89
last_edited: true,
90
},
91
required_fields: {
92
id: true,
93
project_id: true,
94
path: true,
95
},
96
check_hook(db, obj, account_id, _project_id, cb) {
97
// hook to note that project is being used (CRITICAL: do not pass path
98
// into db.touch since that would cause another write to the file_use table!)
99
// CRITICAL: Only do this if what edit or chat for this user is very recent.
100
// Otherwise we touch the project just for seeing notifications or opening
101
// the file, which is confusing and wastes a lot of resources.
102
const x = obj.users != null ? obj.users[account_id] : undefined;
103
// edit/chat/open fields may be strings or Date objects depending on how they're processed
104
const recentTime = minutes_ago(3).getTime();
105
106
if (
107
x != null &&
108
(getTime(x.edit) >= recentTime ||
109
getTime(x.chat) >= recentTime ||
110
getTime(x.open) >= recentTime)
111
) {
112
db.touch({ project_id: obj.project_id, account_id });
113
// Also log that this particular file is being used/accessed; this
114
// is mainly only for longterm analytics but also the file_use_times
115
// virtual table queries this. Note that log_file_access
116
// is throttled.
117
db.log_file_access({
118
project_id: obj.project_id,
119
account_id,
120
filename: obj.path,
121
});
122
}
123
cb();
124
},
125
},
126
},
127
},
128
});
129
130
Table({
131
name: "crm_file_use",
132
rules: {
133
virtual: "file_use",
134
primary_key: "id",
135
user_query: {
136
get: {
137
admin: true, // only admins can do get queries on this table
138
fields: schema.file_use.user_query?.get?.fields ?? {},
139
},
140
},
141
},
142
fields: schema.file_use.fields,
143
});
144
145