CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/packages/util/db-schema/compute-servers.ts
Views: 687
1
/*
2
* This file is part of CoCalc: Copyright © 2023 Sagemath, Inc.
3
* License: MS-RSL – see LICENSE.md for details
4
*/
5
6
import type {
7
Region as HyperstackRegion,
8
VirtualMachine as HyperstackVirtualMachine,
9
} from "@cocalc/util/compute/cloud/hyperstack/api-types";
10
import { COLORS } from "@cocalc/util/theme";
11
import { ID, NOTES } from "./crm";
12
import { SCHEMA as schema } from "./index";
13
import { Table } from "./types";
14
export {
15
CLOUDS_BY_NAME,
16
GOOGLE_CLOUD_DEFAULTS,
17
ON_PREM_DEFAULTS,
18
} from "@cocalc/util/compute/cloud/clouds";
19
20
// These are just fallbacks in case something is wrong with the image configuration.
21
export const STANDARD_DISK_SIZE = 20;
22
export const CUDA_DISK_SIZE = 60;
23
24
export const CHECK_IN_PERIOD_S = 20;
25
export const CHECK_IN_PATH = "/cocalc/conf/check-in";
26
27
// Clients are recommended to wait this long after a purchase ends before
28
// requesting the cost. This should give us about a day of wiggle room.
29
// There is no SLA on billing data.
30
const GOOGLE_COST_LAG_DAYS = 2;
31
export const GOOGLE_COST_LAG_MS = GOOGLE_COST_LAG_DAYS * 24 * 60 * 60 * 1000;
32
33
// Compute Server Images -- typings. See packages/server/compute/images.ts for
34
// how the actual data is populated.
35
36
export interface ImageVersion {
37
// tag - must be given and distinct for each version -- this typically identifies the image to docker
38
tag: string;
39
// version -- defaults to tag if not given; usually the upstream version
40
version?: string;
41
// label -- defaults to the tag; this is to display to the user
42
label?: string;
43
// tested -- if this is not set to true, then this version should not be shown by default.
44
// If not tested, only show to users who explicitly really want this (e.g., admins).
45
tested?: boolean;
46
}
47
48
export const AUTOMATIC_SHUTDOWN_DEFAULTS = {
49
INTERVAL_MINUTES: 1,
50
ATTEMPTS: 3,
51
};
52
53
export interface AutomaticShutdown {
54
// run the command with given args on the compute server.
55
// if the output contains the the trigger string, then the
56
// compute server turns off. If it contains the deprovision
57
// string, then it deprovisions.
58
command: string;
59
// timeout in seconds for running the command
60
timeout?: number;
61
// how often to run the command
62
interval_minutes?: number;
63
// try this many times before giving up on running the command and turning machine off.
64
attempts?: number;
65
// turn server off when the script exits with this code.
66
exit_code?: number;
67
// action: 'shtudown', 'deprovision', 'restart'
68
action?: "shutdown" | "deprovision" | "restart" | "suspend";
69
}
70
71
interface ProxyRoute {
72
path: string;
73
target: string;
74
ws?: boolean;
75
}
76
77
export interface Image {
78
// What we show the user to describe this image, e.g., in the image select menu.
79
label: string;
80
// The name of the package on npmjs or dockerhub:
81
package: string;
82
// In case there is a different package name for ARM64, the name of it.
83
package_arm64?: string;
84
// Root filesystem image must be at least this big in GB.
85
minDiskSizeGb?: number;
86
// Description in MARKDOWN to show user of this image. Can include links.
87
// Rough estimate of compressed size of Docker image; useful
88
// to get a sense of how long it will take to download image
89
// on clouds without pregenerated images.
90
dockerSizeGb?: number;
91
description?: string;
92
// Upstream URL for this image, e.g., https://julialang.org/ for the Julia image.
93
url: string;
94
// Icon to show next to the label for this image.
95
icon: string;
96
// Link to a URL with the source for building this image.
97
source: string;
98
// optional list of links to videos about this image, ordered from lowest to highest priority.
99
videos?: string[];
100
// optional list of links to tutorials
101
tutorials?: string[];
102
// The versions of this image that we claim to have built.
103
// The ones with role='prod' (or not specified) are shown
104
// to users as options.
105
versions: ImageVersion[];
106
// If true, then a GPU is required to use this image.
107
gpu?: boolean;
108
// If true, then the microk8s snap is required to use this image.
109
microk8s?: boolean;
110
// authToken: if true, image has web interface that supports configurable auth token
111
authToken?: boolean;
112
// jupyterKernels: if false, no jupyter kernels included. If true or a list of
113
// names, there are kernels available – used in frontend/jupyter/select-kernel.tsx
114
jupyterKernels?: false | true | string[];
115
// If set to true, do not allow creating this compute server with a DNS subdomain.
116
// Some images only make sense to use over the web, and the web server just won't
117
// work without DNS setup properly (e.g., VS Code with LEAN). Ignored for on prem.
118
requireDns?: boolean;
119
// system: if true, this is a system container that is not for user compute
120
system?: boolean;
121
// disabled: if true, this image is completely disabled, so will not be used in any way.
122
disabled?: boolean;
123
// priority -- optional integer used for sorting options to display to user. The bigger the higher.
124
priority?: number;
125
// proxy: if false, do NOT run https proxy server on host VM
126
// if nothing given, runs proxy server with no default config (so does nothing)
127
// if given, is array of proxy config.
128
proxy?: false | ProxyRoute[];
129
apps?: {
130
[name: string]: {
131
icon: string;
132
label: string;
133
url: string;
134
path: string;
135
launch: string;
136
requiresDns?: boolean;
137
};
138
};
139
}
140
141
export type Images = { [name: string]: Image };
142
143
export interface GoogleCloudImage {
144
labels: { [name: string]: string };
145
diskSizeGb: number;
146
creationTimestamp: string;
147
}
148
export type GoogleCloudImages = { [name: string]: GoogleCloudImage };
149
150
// valid for google cloud -- probably not sufficient
151
export function makeValidGoogleName(s: string): string {
152
return s.replace(/[._]/g, "-").toLowerCase().slice(0, 63);
153
}
154
155
export type State =
156
| "off"
157
| "starting"
158
| "running"
159
| "stopping"
160
| "deprovisioned"
161
| "suspending"
162
| "suspended"
163
| "unknown";
164
165
// used for sorting by state -- ordered from my alive to least alive.
166
export const ORDERED_STATES: State[] = [
167
"running",
168
"starting",
169
"stopping",
170
"suspending",
171
"suspended",
172
"off",
173
"deprovisioned",
174
"unknown",
175
];
176
export const STATE_TO_NUMBER: { [state: string]: number } = {};
177
let n = 0;
178
for (const state of ORDERED_STATES) {
179
STATE_TO_NUMBER[state] = n;
180
n += 1;
181
}
182
183
export type Action =
184
| "start"
185
| "resume"
186
| "stop"
187
| "suspend"
188
| "deprovision"
189
| "reboot";
190
191
export const ACTION_INFO: {
192
[action: string]: {
193
label: string;
194
icon: string;
195
tip: string;
196
description: string;
197
confirm?: boolean;
198
confirmMessage?: string;
199
danger?: boolean;
200
target: State; // target stable state after doing this action.
201
clouds?: Cloud[];
202
};
203
} = {
204
start: {
205
label: "Start",
206
icon: "play",
207
tip: "Start",
208
description: "Start the compute server running.",
209
target: "running",
210
},
211
resume: {
212
label: "Resume",
213
icon: "play",
214
clouds: ["google-cloud"],
215
tip: "Resume",
216
description: "Resume the compute server from suspend.",
217
target: "running",
218
},
219
stop: {
220
label: "Stop",
221
icon: "stop",
222
tip: "Turn off",
223
description:
224
"Turn the compute server off. No data on disk is lost, but any data and state in memory will be lost. This is like turning your laptop off.",
225
confirm: true,
226
target: "off",
227
},
228
deprovision: {
229
label: "Deprovision",
230
icon: "trash",
231
tip: "Deprovision the virtual machine",
232
description:
233
"Deprovisioning DELETES THE VIRTUAL MACHINE BOOT DISK, but keeps the compute server parameters. There are no costs associated with a deprovisioned compute server, and you can move it to a different region or zone. Any files in the home directory of your project are not affected.",
234
confirm: true,
235
confirmMessage:
236
"I understand that my compute server disks will be deleted.",
237
danger: true,
238
target: "deprovisioned",
239
},
240
reboot: {
241
label: "Hard Reboot",
242
icon: "refresh",
243
tip: "Hard reboot the virtual machine.",
244
description:
245
"Perform a HARD reset on the virtual machine, which wipes the memory contents and resets the virtual machine to its initial state. This should not delete data from the disk, but can lead to filesystem corruption.",
246
confirm: true,
247
confirmMessage:
248
"I understand that this can lead to filesystem corruption and is slightly dangerous.",
249
danger: true,
250
target: "running",
251
clouds: ["google-cloud", "hyperstack"],
252
},
253
suspend: {
254
label: "Suspend",
255
icon: "pause",
256
clouds: ["google-cloud"],
257
tip: "Suspend disk and memory state",
258
confirm: true,
259
description:
260
"Suspend the compute server. No data on disk or memory is lost, and you are only charged for storing disk and memory. This is like closing your laptop screen. You can leave a compute server suspended for up to 60 days before it automatically shuts off.",
261
target: "suspended",
262
},
263
};
264
265
export const STATE_INFO: {
266
[state: string]: {
267
label: string;
268
actions: Action[];
269
icon: string;
270
color?: string;
271
stable?: boolean;
272
target?: State; // if not stable, this is the target state it is heading to
273
};
274
} = {
275
off: {
276
label: "Off",
277
color: "#ff4b00",
278
actions: ["start", "deprovision"],
279
icon: "stop",
280
stable: true,
281
},
282
suspended: {
283
label: "Suspended",
284
actions: ["resume", "deprovision", "stop"],
285
icon: "pause",
286
color: "#0097a7",
287
stable: true,
288
},
289
suspending: {
290
label: "Suspending",
291
actions: ["suspend"],
292
icon: "pause",
293
color: "#00bcd4",
294
stable: false,
295
target: "suspended",
296
},
297
starting: {
298
label: "Starting",
299
color: "#388e3c",
300
actions: ["start"],
301
icon: "bolt",
302
stable: false,
303
target: "running",
304
},
305
running: {
306
label: "Running",
307
color: COLORS.RUN,
308
actions: ["stop", "deprovision", "reboot", "suspend"],
309
icon: "run",
310
stable: true,
311
},
312
stopping: {
313
label: "Stopping",
314
color: "#ff9800",
315
actions: ["stop"],
316
icon: "hand",
317
stable: false,
318
target: "off",
319
},
320
unknown: {
321
label: "Unknown (click to refresh)",
322
actions: [],
323
icon: "question-circle",
324
stable: true,
325
},
326
deprovisioned: {
327
label: "Deprovisioned",
328
actions: ["start"],
329
color: "#888",
330
icon: "minus-square",
331
stable: true,
332
},
333
};
334
335
export function getTargetState(x: State | Action): State {
336
if (ACTION_INFO[x] != null) {
337
return ACTION_INFO[x].target;
338
}
339
if (STATE_INFO[x] != null) {
340
if (!STATE_INFO[x]?.stable) {
341
return (STATE_INFO[x].target ?? x) as State;
342
}
343
return x as State;
344
}
345
throw Error(`x =${x} must be a state or action`);
346
}
347
348
export type Architecture = "x86_64" | "arm64";
349
350
// Convention is used in cocalc-compute-docker for making
351
// the npm packages @cocalc/compute-server. Don't mess with it!
352
export function getImageField(arch: Architecture) {
353
return arch == "x86_64" ? "package" : "package_arm64";
354
}
355
356
export type Cloud =
357
| "any"
358
| "onprem"
359
| "core-weave"
360
| "hyperstack"
361
| "lambda-cloud"
362
| "google-cloud"
363
| "aws"
364
| "fluid-stack"
365
| "test";
366
367
export function getMinDiskSizeGb({
368
configuration,
369
IMAGES,
370
}: {
371
configuration;
372
IMAGES: Images;
373
}) {
374
if (configuration?.image) {
375
const { minDiskSizeGb } = IMAGES[configuration.image] ?? {};
376
if (minDiskSizeGb) {
377
return minDiskSizeGb;
378
}
379
}
380
// TODO: will have to do something based on actual image size,
381
// maybe, unless I come up with a clever trick involving
382
// one PD mounted on many machines (?).
383
if (configuration?.acceleratorType) {
384
return CUDA_DISK_SIZE;
385
} else {
386
return STANDARD_DISK_SIZE;
387
}
388
}
389
390
interface BaseConfiguration {
391
// image: name of the image to use, e.g. 'python' or 'pytorch'.
392
// images are managed in src/packages/server/compute/images.ts
393
image: string;
394
// tag: tag for the image to use when starting the compute server.
395
// this references the versions field of the image.
396
// If the tag is not given or not available, we use the latest
397
// available tag.
398
tag?: string;
399
// tag_filesystem: tag for the file system container
400
tag_filesystem?: string;
401
// tag_cocalc: tag for the @cocalc/compute-server package.
402
tag_cocalc?: string;
403
// dns - If the string is set and the VM has an external ip address
404
// and dns is configured, then point https://{dns}....
405
// with ssl proxying to this compute server when it is running.
406
dns?: string;
407
// Array of top level directories to exclude from sync.
408
// These can't have "|" in them, since we use that as a separator.
409
// Use "~" to completely disable sync.
410
excludeFromSync?: readonly string[];
411
// If true, view data on the compute server as ephemeral.
412
// Currently this is only meant to impact the user interface.
413
ephemeral?: boolean;
414
// Token used for authentication at https://compute-server...
415
authToken?: string;
416
// Configuration of the https proxy server.
417
proxy?: ProxyRoute[];
418
// If this compute server stops pinging us, e.g., due to being preempted or
419
// just crashing due to out of memory (etc) should we automatically do a
420
// forced restart. Note that currently for on prem this isn't possible.
421
autoRestart?: boolean;
422
autoRestartDisabled?: boolean; // used to temporarily disable it to avoid accidentally triggering it.
423
// Allow collaborators to control the state of the compute server.
424
// They cannot change any other configuration. User still pays for everything and owns compute server.
425
allowCollaboratorControl?: boolean;
426
}
427
428
interface LambdaConfiguration extends BaseConfiguration {
429
cloud: "lambda-cloud";
430
instance_type_name: string;
431
region_name: string;
432
}
433
434
export interface HyperstackConfiguration extends BaseConfiguration {
435
cloud: "hyperstack";
436
flavor_name: string;
437
region_name: HyperstackRegion;
438
// diskSizeGb is an integer >= 1. It defaults to 10.
439
// It's the size of the /data partition. It's implemented
440
// using 1 or more hyperstack (=ceph) volumes, which are combined
441
// together as a ZFS pool. If the compute server is
442
// named "foo", the volumes are named "foo-1", "foo-2",
443
// "foo-3", etc.
444
// There is also always a separate 50GB root volume, which
445
// is named "foo-0", and whose size is not configurable.
446
// NOTE: users install packages "systemwide" inside of
447
// a docker container and we configure docker to store
448
// its data in the zpool, so that's in here too.
449
diskSizeGb: number;
450
}
451
452
export const COREWEAVE_CPU_TYPES = [
453
"amd-epyc-rome",
454
"amd-epyc-milan",
455
"intel-xeon-v1",
456
"intel-xeon-v2",
457
"intel-xeon-v3",
458
"intel-xeon-v4",
459
"intel-xeon-scalable",
460
] as const;
461
462
export const COREWEAVE_GPU_TYPES = [
463
"Quadro_RTX_4000",
464
"Quadro_RTX_5000",
465
"RTX_A4000",
466
"RTX_A5000",
467
"RTX_A6000",
468
"A40",
469
"Tesla_V100_PCIE",
470
"Tesla_V100_NVLINK",
471
"A100_PCIE_40GB",
472
"A100_PCIE_80GB",
473
"A100_NVLINK_40GB",
474
"A100_NVLINK_80GB",
475
] as const;
476
477
interface CoreWeaveConfiguration extends BaseConfiguration {
478
cloud: "core-weave";
479
gpu: {
480
type:
481
| "Quadro_RTX_4000"
482
| "Quadro_RTX_5000"
483
| "RTX_A4000"
484
| "RTX_A5000"
485
| "RTX_A6000"
486
| "A40"
487
| "Tesla_V100_PCIE"
488
| "Tesla_V100_NVLINK"
489
| "A100_PCIE_40GB"
490
| "A100_PCIE_80GB"
491
| "A100_NVLINK_40GB"
492
| "A100_NVLINK_80GB"; //(typeof COREWEAVE_GPU_TYPES)[number];
493
count: number;
494
};
495
cpu: {
496
count: number;
497
type?:
498
| "amd-epyc-rome"
499
| "amd-epyc-milan"
500
| "intel-xeon-v1"
501
| "intel-xeon-v2"
502
| "intel-xeon-v3"
503
| "intel-xeon-v4"
504
| "intel-xeon-scalable"; //(typeof COREWEAVE_CPU_TYPES)[number];
505
};
506
memory: string; // e.g., "12Gi"
507
storage?: {
508
root: {
509
size: string; // e.g., '40Gi'
510
};
511
};
512
}
513
514
interface FluidStackConfiguration extends BaseConfiguration {
515
cloud: "fluid-stack";
516
plan: string;
517
region: string;
518
os: string;
519
}
520
export type GoogleCloudAcceleratorType =
521
| "nvidia-a100-80gb"
522
| "nvidia-tesla-a100"
523
| "nvidia-l4"
524
| "nvidia-tesla-t4"
525
| "nvidia-tesla-v100"
526
| "nvidia-tesla-p4"
527
| "nvidia-tesla-p100";
528
529
export const GOOGLE_CLOUD_ACCELERATOR_TYPES: GoogleCloudAcceleratorType[] = [
530
"nvidia-a100-80gb",
531
"nvidia-tesla-a100",
532
"nvidia-l4",
533
"nvidia-tesla-t4",
534
"nvidia-tesla-v100",
535
"nvidia-tesla-p4",
536
"nvidia-tesla-p100",
537
];
538
539
export type GoogleCloudDiskType =
540
| "pd-standard"
541
| "pd-balanced"
542
| "pd-ssd"
543
| "hyperdisk-balanced";
544
545
export const GOOGLE_CLOUD_DISK_TYPES: GoogleCloudDiskType[] = [
546
"pd-standard",
547
"pd-balanced",
548
"pd-ssd",
549
// NOTE: hyperdisks are complicated and multidimensional, but for cocalc
550
// we just hardcode options for the iops and bandwidth, and allow the
551
// user to adjust the size. Also, "hyperdisk-balanced" means hyperdisk
552
// with the defaults for iops and bandwidth defined in
553
// src/packages/util/compute/cloud/google-cloud/compute-cost.ts
554
"hyperdisk-balanced",
555
];
556
557
export interface GoogleCloudConfiguration extends BaseConfiguration {
558
cloud: "google-cloud";
559
region: string;
560
zone: string;
561
machineType: string;
562
// Ues a spot instance if spot is true.
563
spot?: boolean;
564
// The boot disk:
565
// diskSizeGb is an integer >= 10. It defaults to 10. It's the size of the boot disk.
566
diskSizeGb?: number;
567
hyperdiskBalancedIops?: number;
568
hyperdiskBalancedThroughput?: number;
569
diskType?: GoogleCloudDiskType;
570
acceleratorType?: GoogleCloudAcceleratorType;
571
// the allowed number depends on the accelerator; it defaults to 1.
572
acceleratorCount?: number;
573
// minCpuPlatform
574
terminationTime?: Date;
575
maxRunDurationSeconds?: number;
576
// if true, use newest image, whether or not it is labeled with prod=true.
577
test?: boolean;
578
// an image name of the form "2023-09-13-063355-test", i.e., a timestamp in that format
579
// followed by an optional string. Whether or not to use cuda and and the arch are
580
// determined by parameters above. This is meant to be used for two purposes (1) testing
581
// before deploying to production, and (2) stability, so a given compute server has the
582
// exact same base image every time it is started, instead of being updated. Regarding (2),
583
// this might not be needed, but we'll see. If image is not set, we use the newest
584
// image that is tagged prod:true, or its an error if no such image exists. This is
585
// all about Google Cloud images, not the IMAGES object defined elsewhere in this file.
586
sourceImage?: string;
587
// If true, then we have an external ip address
588
externalIp?: boolean;
589
// If true, can run full VM's inside of the machine, but there is 10% performance penalty.
590
// This will only work for Intel non-e2 non-a3 instance types. No AMD and no ARM64.
591
enableNestedVirtualization?: boolean;
592
}
593
594
export interface OnPremCloudConfiguration extends BaseConfiguration {
595
cloud: "onprem";
596
arch?: Architecture;
597
gpu?: boolean;
598
}
599
600
export type Configuration =
601
| LambdaConfiguration
602
| HyperstackConfiguration
603
| CoreWeaveConfiguration
604
| FluidStackConfiguration
605
| GoogleCloudConfiguration
606
| OnPremCloudConfiguration;
607
608
interface BaseData {
609
cloudflareId?: string;
610
externalIp?: string;
611
internalIp?: string;
612
}
613
614
export interface LambdaCloudData extends BaseData {
615
cloud: "lambda-cloud";
616
instance_id: string;
617
}
618
619
export interface HyperstackData extends BaseData {
620
cloud: "hyperstack";
621
// name we are using for the vm
622
name?: string;
623
// hyperstack description of this vm.
624
vm?: HyperstackVirtualMachine;
625
// id's of persistent storage, with first id the boot disk.
626
// disks are named {name}-0, {name}-1, {name}-2, etc.,
627
// with {name}-0 being the boot disk.
628
disks?: number[];
629
creationTimestamp?: Date;
630
}
631
632
export interface GoogleCloudData extends BaseData {
633
cloud: "google-cloud";
634
name?: string;
635
state?: State;
636
cpuPlatform?: string;
637
creationTimestamp?: Date;
638
lastStartTimestamp?: Date;
639
}
640
641
export type Data = GoogleCloudData | LambdaCloudData | HyperstackData;
642
643
export interface ComponentState {
644
state: string;
645
time: number;
646
expire?: number;
647
}
648
649
export interface ComputeServerTemplate {
650
enabled?: boolean;
651
priority?: number;
652
}
653
654
export interface ComputeServerUserInfo {
655
id: number;
656
project_specific_id?: number; // the project_specific_id of this compute server -- unique within project, minimal
657
account_id: string;
658
project_id: string;
659
title?: string;
660
color?: string;
661
cost_per_hour?: number;
662
deleted?: boolean;
663
state_changed?: Date;
664
started_by?: string;
665
error?: string;
666
state?: State;
667
idle_timeout?: number;
668
automatic_shutdown?: AutomaticShutdown;
669
// google-cloud has a new "Time limit" either by hour or by date, which seems like a great idea!
670
// time_limit
671
autorestart?: boolean;
672
cloud: Cloud;
673
configuration: Configuration;
674
provisioned_configuration?: Configuration;
675
data?: Data;
676
purchase_id?: number;
677
last_edited?: Date;
678
position?: number; // used for UI sorting.
679
detailed_state?: { [name: string]: ComponentState };
680
update_purchase?: boolean;
681
last_purchase_update?: Date;
682
template?: ComputeServerTemplate;
683
}
684
685
export interface ComputeServer extends ComputeServerUserInfo {
686
api_key?: string; // project level api key for the project
687
api_key_id?: number; // id of the api key (needed so we can delete it from database).
688
}
689
690
Table({
691
name: "compute_servers",
692
rules: {
693
primary_key: "id",
694
// unique vpn ip address *within* a given project only:
695
pg_unique_indexes: [
696
"(project_id, vpn_ip)",
697
"(project_id, project_specific_id)",
698
],
699
user_query: {
700
get: {
701
pg_where: [{ "project_id = $::UUID": "project_id" }],
702
throttle_changes: 0, // do not make this bigger; UI really feels off if throttled
703
fields: {
704
id: null,
705
account_id: null,
706
created: null,
707
title: null,
708
color: null,
709
cost_per_hour: null,
710
deleted: null,
711
project_id: null,
712
state_changed: null,
713
error: null,
714
state: null,
715
idle_timeout: null,
716
automatic_shutdown: null,
717
autorestart: null,
718
cloud: null,
719
configuration: null,
720
data: null,
721
provisioned_configuration: null,
722
avatar_image_tiny: null,
723
last_edited: null,
724
purchase_id: null,
725
position: null,
726
detailed_state: null,
727
template: null,
728
notes: null,
729
vpn_ip: null,
730
project_specific_id: null,
731
},
732
},
733
set: {
734
// ATTN: It's assumed that users can't set the data field. Doing so would be very bad and could allow
735
// them to maybe abuse the system and not pay for something.
736
// Most fields, e.g., configuration, get set via api calls, which ensures consistency in terms of valid
737
// data and what is actively deployed.
738
fields: {
739
project_id: "project_write",
740
id: true,
741
position: true,
742
error: true, // easily clear the error
743
notes: true,
744
automatic_shutdown: true,
745
},
746
},
747
},
748
},
749
fields: {
750
id: ID,
751
account_id: {
752
type: "uuid",
753
desc: "User that owns this compute server.",
754
render: { type: "account" },
755
},
756
created: {
757
type: "timestamp",
758
desc: "When the compute server was created.",
759
},
760
title: {
761
type: "string",
762
pg_type: "VARCHAR(254)",
763
desc: "Title of this computer server. Used purely to make it easier for the user to keep track of it.",
764
render: { type: "text", maxLength: 254, editable: true },
765
},
766
color: {
767
type: "string",
768
desc: "A user configurable color, which is used for tags and UI to indicate where a tab is running.",
769
pg_type: "VARCHAR(30)",
770
render: { type: "color", editable: true },
771
},
772
cost_per_hour: {
773
title: "Cost per Hour",
774
desc: "The cost in US dollars per hour that this compute server cost us when it is provisioned. Any time the state is changed, this is set by the server to the proper cost.",
775
type: "number",
776
pg_type: "real",
777
},
778
deleted: {
779
type: "boolean",
780
desc: "True if the compute server has been deleted.",
781
},
782
project_id: {
783
type: "uuid",
784
desc: "The project id that this compute server provides compute for.",
785
render: { type: "project_link" },
786
},
787
api_key: {
788
type: "string",
789
pg_type: "VARCHAR(128)",
790
desc: "api key to connect to the project. This is created by the system right when we are going to create the VM, and gets deleted when we stop it. It's not set by the user and should not be revealed to the user.",
791
},
792
api_key_id: {
793
type: "number",
794
desc: "id of the api key; needed so we can delete it from database",
795
},
796
state_changed: {
797
type: "timestamp",
798
desc: "When the state last changed.",
799
},
800
error: {
801
type: "string",
802
desc: "In case something went wrong, e.g., in starting this compute server, this field will get set with a string error message to show the user. It's also cleared right when we try to start server.",
803
},
804
state: {
805
type: "string",
806
desc: "One of - 'off', 'starting', 'running', 'stopping'. This is the underlying VM's state.",
807
pg_type: "VARCHAR(16)",
808
},
809
idle_timeout: {
810
type: "number",
811
desc: "The idle timeout in seconds of this compute server. If set to 0, never turn it off automatically. The compute server idle timeouts if none of the tabs it is providing are actively touched through the web UI.",
812
},
813
automatic_shutdown: {
814
type: "map",
815
pg_type: "jsonb",
816
desc: "Configuration to control various aspects of the state of the compute server via a background maintenance task.",
817
},
818
autorestart: {
819
type: "boolean",
820
desc: "If true and the compute server stops for any reason, then it will be automatically started again. This is primarily useful for stop instances.",
821
},
822
cloud: {
823
type: "string",
824
pg_type: "varchar(30)",
825
desc: "The cloud where this compute server runs: 'user', 'coreweave', 'lambda', 'google-cloud', 'aws', 'fluidstack'.",
826
},
827
configuration: {
828
type: "map",
829
pg_type: "jsonb",
830
desc: "Cloud specific configuration of the computer at the cloud host. The format depends on the cloud",
831
},
832
provisioned_configuration: {
833
type: "map",
834
pg_type: "jsonb",
835
desc: "Same as configuration, but this is the one we actually used last time we provisioned a VM in a cloud.",
836
},
837
data: {
838
type: "map",
839
pg_type: "jsonb",
840
desc: "Arbitrary data about this server that is cloud provider specific. Store data here to facilitate working with the virtual machine, e.g., the id of the server when it is running, etc. This *IS* returned to the user.",
841
},
842
avatar_image_tiny: {
843
title: "Image",
844
type: "string",
845
desc: "tiny (32x32) visual image associated with the compute server. Suitable to include as part of changefeed, since about 3kb. Derived from avatar_image_full.",
846
render: { type: "image" },
847
},
848
avatar_image_full: {
849
title: "Image",
850
type: "string",
851
desc: "User configurable visual image associated with the compute server. Could be 150kb. NOT include as part of changefeed of projects, since potentially big (e.g., 200kb x 1000 projects = 200MB!).",
852
render: { type: "image" },
853
},
854
purchase_id: {
855
type: "number",
856
desc: "if there is a current active purchase related to this compute server, this is the id of that purchase in the purchases table",
857
},
858
update_purchase: {
859
type: "boolean",
860
desc: "This is set to true if activity with this server is happening that warrants creating/ending a purchase.",
861
},
862
last_purchase_update: {
863
type: "timestamp",
864
desc: "Last time we requested an update to the purchase info about this compute server.",
865
},
866
position: {
867
type: "number",
868
desc: "Used for sorting a list of compute servers in the UI.",
869
},
870
last_edited: {
871
type: "timestamp",
872
desc: "Last time the configuration, state, etc., changed.",
873
},
874
detailed_state: {
875
type: "map",
876
pg_type: "jsonb",
877
desc: "Map from component name to something like {state:'running',time:Date.now()}, e.g., {vm: {state:'running', time:393939938484}}, filesystem: {state:'updating', time:939398484892}, uptime:{state:'22:56:33 up 3 days, 9:28, 0 users, load average: 0.93, 0.73, 0.56', time:?}}. This is used to provide users with insight into what's currently happening on their compute server.",
878
},
879
notes: NOTES,
880
template: {
881
type: "map",
882
pg_type: "jsonb",
883
desc: "Use this compute server configuration as a public template. Only admins can set this field for now. The exact structure of this jsonb is yet to be determined.",
884
},
885
vpn_ip: {
886
type: "string",
887
desc: "IP address of the compute server on the private encrypted project-wide VPN.",
888
},
889
vpn_public_key: {
890
type: "string",
891
desc: "Wireguard public key for this compute server.",
892
},
893
vpn_private_key: {
894
type: "string",
895
desc: "Wireguard private key for this compute server.",
896
},
897
project_specific_id: {
898
type: "integer",
899
desc: "A unique project-specific id assigned to this compute server. This is a positive integer that is guaranteed to be unique for compute servers *in a given project* and minimal when assigned (so it is as small as possible). This number is useful for distributed algorithms, since it can be used to ensure distinct sequence without any additional coordination. This is also useful to display to users so that the id number they see everywhere is not huge.",
900
},
901
},
902
});
903
904
Table({
905
name: "crm_compute_servers",
906
fields: schema.compute_servers.fields,
907
rules: {
908
primary_key: schema.compute_servers.primary_key,
909
virtual: "compute_servers",
910
user_query: {
911
get: {
912
admin: true, // only admins can do get queries on this table
913
// (without this, users who have read access could read)
914
pg_where: [],
915
fields: {
916
...schema.compute_servers.user_query?.get?.fields,
917
template: null,
918
},
919
},
920
set: {
921
admin: true,
922
fields: {
923
id: true,
924
title: true,
925
color: true,
926
deleted: true,
927
notes: true,
928
template: true,
929
state_control: null,
930
},
931
},
932
},
933
},
934
});
935
936
Table({
937
name: "compute_servers_cache",
938
fields: {
939
cloud: {
940
type: "string",
941
desc: "The cloud that we're caching information about",
942
},
943
key: {
944
type: "string",
945
desc: "The key for whatever we're caching.",
946
},
947
value: {
948
type: "string",
949
desc: "The cached data.",
950
},
951
expire: {
952
type: "timestamp",
953
desc: "When this action should be expired.",
954
},
955
},
956
rules: {
957
durability: "soft", // it's just a cache
958
desc: "Cache data about what's going on in various clouds that are used to implement compute servers.",
959
primary_key: ["cloud", "key"],
960
},
961
});
962
963