Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/src/packages/database/postgres-user-queries.coffee
Views: 687
#########################################################################1# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.2# License: MS-RSL – see LICENSE.md for details3#########################################################################45"""6User (and project) client queries78COPYRIGHT : (c) 2017 SageMath, Inc.9LICENSE : MS-RSL10"""1112MAX_CHANGEFEEDS_PER_CLIENT = 20001314# Reject all patches that have timestamp that is more than 3 minutes in the future.15MAX_PATCH_FUTURE_MS = 1000*60*31617EventEmitter = require('events')18async = require('async')19lodash = require('lodash')2021{one_result, all_results, count_result, pg_type, quote_field} = require('./postgres-base')2223{UserQueryQueue} = require('./postgres-user-query-queue')2425{defaults} = misc = require('@cocalc/util/misc')26required = defaults.required2728{PROJECT_UPGRADES, SCHEMA, OPERATORS, isToOperand} = require('@cocalc/util/schema')29{queryIsCmp, userGetQueryFilter} = require("./user-query/user-get-query")3031{file_use_times} = require('./postgres/file-use-times')32{updateRetentionData} = require('./postgres/retention')3334{ checkProjectName } = require("@cocalc/util/db-schema/name-rules");35{callback2} = require('@cocalc/util/async-utils')363738exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext39# Cancel all queued up queries by the given client40cancel_user_queries: (opts) =>41opts = defaults opts,42client_id : required43@_user_query_queue?.cancel_user_queries(opts)4445user_query: (opts) =>46opts = defaults opts,47client_id : undefined # if given, uses to control number of queries at once by one client.48priority : undefined # (NOT IMPLEMENTED) priority for this query (an integer [-10,...,19] like in UNIX)49account_id : undefined50project_id : undefined51query : required52options : []53changes : undefined54cb : undefined5556if opts.account_id?57# Check for "sudo" by admin to query as a different user, which is done by specifying58# options = [..., {account_id:'uuid'}, ...].59for x in opts.options60if x.account_id?61# Check user is an admin, then change opts.account_id62@get_account63columns : ['groups']64account_id : opts.account_id65cb : (err, r) =>66if err67opts.cb?(err)68else if r['groups']? and 'admin' in r['groups']69opts.account_id = x.account_id70opts.options = (y for y in opts.options when not y['account_id']?)71# now do query with new opts and options not including account_id sudo.72@user_query(opts)73else74opts.cb?('user must be admin to sudo')75return7677if not opts.client_id?78# No client_id given, so do not use query queue.79delete opts.priority80delete opts.client_id81@_user_query(opts)82return8384if not @_user_query_queue?85o =86do_query : @_user_query87dbg : @_dbg('user_query_queue')88concurrent : @concurrent89@_user_query_queue ?= new UserQueryQueue(o)9091@_user_query_queue.user_query(opts)9293_user_query: (opts) =>94opts = defaults opts,95account_id : undefined96project_id : undefined97query : required98options : [] # used for initial query; **IGNORED** by changefeed!;99# - Use [{set:true}] or [{set:false}] to force get or set query100# - For a set query, use {delete:true} to delete instead of set. This is the only way101# to delete a record, and won't work unless delete:true is set in the schema102# for the table to explicitly allow deleting.103changes : undefined # id of change feed104cb : undefined # cb(err, result) # WARNING -- this *will* get called multiple times when changes is true!105id = misc.uuid().slice(0,6)106dbg = @_dbg("_user_query(id=#{id})")107dbg(misc.to_json(opts.query))108if misc.is_array(opts.query)109dbg('array query instead')110@_user_query_array(opts)111return112113subs =114'{account_id}' : opts.account_id115'{project_id}' : opts.project_id116'{now}' : new Date()117118if opts.changes?119changes =120id : opts.changes121cb : opts.cb122123v = misc.keys(opts.query)124if v.length > 1125dbg("FATAL no key")126opts.cb?('FATAL: must specify exactly one key in the query')127return128table = v[0]129query = opts.query[table]130if misc.is_array(query)131if query.length > 1132dbg("FATAL not implemented")133opts.cb?("FATAL: array of length > 1 not yet implemented")134return135multi = true136query = query[0]137else138multi = false139is_set_query = undefined140if opts.options?141if not misc.is_array(opts.options)142dbg("FATAL options")143opts.cb?("FATAL: options (=#{misc.to_json(opts.options)}) must be an array")144return145for x in opts.options146if x.set?147is_set_query = !!x.set148options = (x for x in opts.options when not x.set?)149else150options = []151152if misc.is_object(query)153query = misc.deep_copy(query)154misc.obj_key_subs(query, subs)155if not is_set_query?156is_set_query = not misc.has_null_leaf(query)157if is_set_query158dbg("do a set query")159if changes160dbg("FATAL: changefeed")161opts.cb?("FATAL: changefeeds only for read queries")162return163if not opts.account_id? and not opts.project_id?164dbg("FATAL: anon set")165opts.cb?("FATAL: no anonymous set queries")166return167dbg("user_set_query")168@user_set_query169account_id : opts.account_id170project_id : opts.project_id171table : table172query : query173options : opts.options174cb : (err, x) =>175dbg("returned #{err}")176opts.cb?(err, {"#{table}":x})177else178# do a get query179if changes and not multi180dbg("FATAL: changefeed multi")181opts.cb?("FATAL: changefeeds only implemented for multi-document queries")182return183184if changes185err = @_inc_changefeed_count(opts.account_id, opts.project_id, table, changes.id)186if err187dbg("err changefeed count -- #{err}")188opts.cb?(err)189return190191dbg("user_get_query")192@user_get_query193account_id : opts.account_id194project_id : opts.project_id195table : table196query : query197options : options198multi : multi199changes : changes200cb : (err, x) =>201dbg("returned #{err}")202if err and changes203# didn't actually make the changefeed, so don't count it.204@_dec_changefeed_count(changes.id, table)205opts.cb?(err, if not err then {"#{table}":x})206else207dbg("FATAL - invalid table")208opts.cb?("FATAL: invalid user_query of '#{table}' -- query must be an object")209210###211TRACK CHANGEFEED COUNTS212213_inc and dec below are evidently broken, in that it's CRITICAL that they match up exactly, or users will be214locked out until they just happen to switch to another hub with different tracking, which is silly.215216TODO: DISABLED FOR NOW!217###218219# Increment a count of the number of changefeeds by a given client so we can cap it.220_inc_changefeed_count: (account_id, project_id, table, changefeed_id) =>221return222client_name = "#{account_id}-#{project_id}"223cnt = @_user_get_changefeed_counts ?= {}224ids = @_user_get_changefeed_id_to_user ?= {}225if not cnt[client_name]?226cnt[client_name] = 1227else if cnt[client_name] >= MAX_CHANGEFEEDS_PER_CLIENT228return "user may create at most #{MAX_CHANGEFEEDS_PER_CLIENT} changefeeds; please close files, refresh browser, restart project"229else230# increment before successfully making get_query to prevent huge bursts causing trouble!231cnt[client_name] += 1232@_dbg("_inc_changefeed_count(table='#{table}')")("{#{client_name}:#{cnt[client_name]} ...}")233ids[changefeed_id] = client_name234return false235236# Corresponding decrement of count of the number of changefeeds by a given client.237_dec_changefeed_count: (id, table) =>238return239client_name = @_user_get_changefeed_id_to_user[id]240if client_name?241@_user_get_changefeed_counts?[client_name] -= 1242delete @_user_get_changefeed_id_to_user[id]243cnt = @_user_get_changefeed_counts244if table?245t = "(table='#{table}')"246else247t = ""248@_dbg("_dec_changefeed_count#{t}")("counts={#{client_name}:#{cnt[client_name]} ...}")249250# Handle user_query when opts.query is an array. opts below are as for user_query.251_user_query_array: (opts) =>252if opts.changes and opts.query.length > 1253opts.cb?("FATAL: changefeeds only implemented for single table")254return255result = []256f = (query, cb) =>257@user_query258account_id : opts.account_id259project_id : opts.project_id260query : query261options : opts.options262cb : (err, x) =>263result.push(x); cb(err)264async.mapSeries(opts.query, f, (err) => opts.cb(err, result))265266user_query_cancel_changefeed: (opts) =>267opts = defaults opts,268id : required269cb : undefined # not really asynchronous270dbg = @_dbg("user_query_cancel_changefeed(id='#{opts.id}')")271feed = @_changefeeds?[opts.id]272if feed?273dbg("actually canceling feed")274@_dec_changefeed_count(opts.id)275delete @_changefeeds[opts.id]276feed.close()277else278dbg("already canceled before (no such feed)")279opts.cb?()280281_user_get_query_columns: (query, remove_from_query) =>282v = misc.keys(query)283if remove_from_query?284# If remove_from_query is specified it should be an array of strings285# and we do not includes these in what is returned.286v = lodash.difference(v, remove_from_query)287return v288289_require_is_admin: (account_id, cb) =>290if not account_id?291cb("FATAL: user must be an admin")292return293@is_admin294account_id : account_id295cb : (err, is_admin) =>296if err297cb(err)298else if not is_admin299cb("FATAL: user must be an admin")300else301cb()302303# Ensure that each project_id in project_ids is such that the account is in one of the given304# groups for the project, or that the account is an admin. If not, cb(err).305_require_project_ids_in_groups: (account_id, project_ids, groups, cb) =>306s = {"#{account_id}": true}307require_admin = false308@_query309query : "SELECT project_id, users#>'{#{account_id}}' AS user FROM projects"310where : "project_id = ANY($)":project_ids311cache : true312cb : all_results (err, x) =>313if err314cb(err)315else316known_project_ids = {} # we use this to ensure that each of the given project_ids exists.317for p in x318known_project_ids[p.project_id] = true319if p.user?.group not in groups320require_admin = true321# If any of the project_ids don't exist, reject the query.322for project_id in project_ids323if not known_project_ids[project_id]324cb("FATAL: unknown project_id '#{misc.trunc(project_id,100)}'")325return326if require_admin327@_require_is_admin(account_id, cb)328else329cb()330331_query_parse_options: (options) =>332r = {}333for x in options334for name, value of x335switch name336when 'only_changes'337r.only_changes = !!value338when 'limit'339r.limit = parseInt(value)340when 'slice'341r.slice = value342when 'order_by'343if value[0] == '-'344value = value.slice(1) + " DESC "345if r.order_by346r.order_by = r.order_by + ', ' + value347else348r.order_by = value349when 'delete'350null351# ignore delete here - is parsed elsewhere352when 'heartbeat'353@_dbg("_query_parse_options")("TODO/WARNING -- ignoring heartbeat option from old client")354else355r.err = "unknown option '#{name}'"356# Guard rails: no matter what, all queries are capped with a limit of 100000.357# TODO: If somehow somebody has, e.g., more than 100K projects, or maybe more358# than 100K edits of a single file, they could hit this and not realize it. I359# had this set at 1000 for a few minutes and it caused me to randomly not have360# some of my projects.361MAX_LIMIT = 100000362try363if not isFinite(r.limit)364r.limit = MAX_LIMIT365else if r.limit > MAX_LIMIT366r.limit = MAX_LIMIT367catch368r.limit = MAX_LIMIT369return r370371###372SET QUERIES373###374_parse_set_query_opts: (opts) =>375r = {}376377if opts.project_id?378dbg = r.dbg = @_dbg("user_set_query(project_id='#{opts.project_id}', table='#{opts.table}')")379else if opts.account_id?380dbg = r.dbg = @_dbg("user_set_query(account_id='#{opts.account_id}', table='#{opts.table}')")381else382return {err:"FATAL: account_id or project_id must be specified"}383384if not SCHEMA[opts.table]?385return {err:"FATAL: table '#{opts.table}' does not exist"}386387dbg(misc.to_json(opts.query))388389if opts.options390dbg("options=#{misc.to_json(opts.options)}")391392r.query = misc.copy(opts.query)393r.table = opts.table394r.db_table = SCHEMA[opts.table].virtual ? opts.table395r.account_id = opts.account_id396r.project_id = opts.project_id397398s = SCHEMA[opts.table]399400if opts.account_id?401r.client_query = s?.user_query402else403r.client_query = s?.project_query404405if not r.client_query?.set?.fields?406return {err:"FATAL: user set queries not allowed for table '#{opts.table}'"}407408if not @_mod_fields(opts.query, r.client_query)409dbg("shortcut -- no fields will be modified, so nothing to do")410return411412for field in misc.keys(r.client_query.set.fields)413if r.client_query.set.fields[field] == undefined414return {err: "FATAL: user set query not allowed for #{opts.table}.#{field}"}415val = r.client_query.set.fields[field]416417if typeof(val) == 'function'418try419r.query[field] = val(r.query, @)420catch err421return {err:"FATAL: error setting '#{field}' -- #{err}"}422else423switch val424when 'account_id'425if not r.account_id?426return {err: "FATAL: account_id must be specified -- make sure you are signed in"}427r.query[field] = r.account_id428when 'project_id'429if not r.project_id?430return {err: "FATAL: project_id must be specified"}431r.query[field] = r.project_id432when 'time_id'433r.query[field] = uuid.v1()434when 'project_write'435if not r.query[field]?436return {err: "FATAL: must specify #{opts.table}.#{field}"}437r.require_project_ids_write_access = [r.query[field]]438when 'project_owner'439if not r.query[field]?440return {err:"FATAL: must specify #{opts.table}.#{field}"}441r.require_project_ids_owner = [r.query[field]]442443if r.client_query.set.admin444r.require_admin = true445446r.primary_keys = @_primary_keys(r.db_table)447448r.json_fields = @_json_fields(r.db_table, r.query)449450for k, v of r.query451if k in r.primary_keys452continue453if r.client_query?.set?.fields?[k] != undefined454continue455if s.admin_query?.set?.fields?[k] != undefined456r.require_admin = true457continue458return {err: "FATAL: changing #{r.table}.#{k} not allowed"}459460# HOOKS which allow for running arbitrary code in response to461# user set queries. In each case, new_val below is only the part462# of the object that the user requested to change.463464# 0. CHECK: Runs before doing any further processing; has callback, so this465# provides a generic way to quickly check whether or not this query is allowed466# for things that can't be done declaratively. The check_hook can also467# mutate the obj (the user query), e.g., to enforce limits on input size.468r.check_hook = r.client_query.set.check_hook469470# 1. BEFORE: If before_change is set, it is called with input471# (database, old_val, new_val, account_id, cb)472# before the actual change to the database is made.473r.before_change_hook = r.client_query.set.before_change474475# 2. INSTEAD OF: If instead_of_change is set, then instead_of_change_hook476# is called with input477# (database, old_val, new_val, account_id, cb)478# *instead* of actually doing the update/insert to479# the database. This makes it possible to run arbitrary480# code whenever the user does a certain type of set query.481# Obviously, if that code doesn't set the new_val in the482# database, then new_val won't be the new val.483r.instead_of_change_hook = r.client_query.set.instead_of_change484485# 3. AFTER: If set, the on_change_hook is called with486# (database, old_val, new_val, account_id, cb)487# after everything the database has been modified.488r.on_change_hook = r.client_query.set.on_change489490# 4. instead of query491r.instead_of_query = r.client_query.set.instead_of_query492493#dbg("on_change_hook=#{on_change_hook?}, #{misc.to_json(misc.keys(client_query.set))}")494495# Set the query options -- order doesn't matter for set queries (unlike for get), so we496# just merge the options into a single dictionary.497# NOTE: As I write this, there is just one supported option: {delete:true}.498r.options = {}499if r.client_query.set.options?500for x in r.client_query.set.options501for y, z of x502r.options[y] = z503if opts.options?504for x in opts.options505for y, z of x506r.options[y] = z507dbg("options = #{misc.to_json(r.options)}")508509if r.options.delete and not r.client_query.set.delete510# delete option is set, but deletes aren't explicitly allowed on this table. ERROR.511return {err: "FATAL: delete from #{r.table} not allowed"}512513return r514515_user_set_query_enforce_requirements: (r, cb) =>516async.parallel([517(cb) =>518if r.require_admin519@_require_is_admin(r.account_id, cb)520else521cb()522(cb) =>523if r.require_project_ids_write_access?524if r.project_id?525err = undefined526for x in r.require_project_ids_write_access527if x != r.project_id528err = "FATAL: can only query same project"529break530cb(err)531else532@_require_project_ids_in_groups(r.account_id, r.require_project_ids_write_access,\533['owner', 'collaborator'], cb)534else535cb()536(cb) =>537if r.require_project_ids_owner?538@_require_project_ids_in_groups(r.account_id, r.require_project_ids_owner,\539['owner'], cb)540else541cb()542], cb)543544_user_set_query_where: (r) =>545where = {}546for primary_key in @_primary_keys(r.db_table)547value = r.query[primary_key]548if SCHEMA[r.db_table].fields[primary_key].noCoerce549where["#{primary_key}=$"] = value550else551type = pg_type(SCHEMA[r.db_table].fields[primary_key])552if type == 'TIMESTAMP' and not misc.is_date(value)553# Javascript is better at parsing its own dates than PostgreSQL554# isNaN test so NOW(), etc. work still555x = new Date(value)556if not isNaN(x)557value = x558where["#{primary_key}=$::#{type}"] = value559return where560561_user_set_query_values: (r) =>562values = {}563s = SCHEMA[r.db_table]564for key, value of r.query565type = pg_type(s?.fields?[key])566if value? and type? and not s?.fields?[key]?.noCoerce567if type == 'TIMESTAMP' and not misc.is_date(value)568# (as above) Javascript is better at parsing its own dates than PostgreSQL569x = new Date(value)570if not isNaN(x)571value = x572values["#{key}::#{type}"] = value573else574values[key] = value575return values576577_user_set_query_hooks_prepare: (r, cb) =>578if r.on_change_hook? or r.before_change_hook? or r.instead_of_change_hook?579for primary_key in r.primary_keys580if not r.query[primary_key]?581cb("FATAL: query must specify (primary) key '#{primary_key}'")582return583# get the old value before changing it584# TODO: optimization -- can we restrict columns below?585@_query586query : "SELECT * FROM #{r.db_table}"587where : @_user_set_query_where(r)588cb : one_result (err, x) =>589r.old_val = x; cb(err)590else591cb()592593_user_query_set_count: (r, cb) =>594@_query595query : "SELECT COUNT(*) FROM #{r.db_table}"596where : @_user_set_query_where(r)597cb : count_result(cb)598599_user_query_set_delete: (r, cb) =>600@_query601query : "DELETE FROM #{r.db_table}"602where : @_user_set_query_where(r)603cb : cb604605_user_set_query_conflict: (r) =>606return r.primary_keys607608_user_query_set_upsert: (r, cb) =>609# r.dbg("_user_query_set_upsert #{JSON.stringify(r.query)}")610@_query611query : "INSERT INTO #{r.db_table}"612values : @_user_set_query_values(r)613conflict : @_user_set_query_conflict(r)614cb : cb615616# Record is already in DB, so we update it:617# this function handles a case that involves both618# a jsonb_merge and an update.619_user_query_set_upsert_and_jsonb_merge: (r, cb) =>620jsonb_merge = {}621for k of r.json_fields622v = r.query[k]623if v?624jsonb_merge[k] = v625set = {}626for k, v of r.query627if k not in r.primary_keys and not jsonb_merge[k]?628set[k] = v629@_query630query : "UPDATE #{r.db_table}"631jsonb_merge : jsonb_merge632set : set633where : @_user_set_query_where(r)634cb : cb635636_user_set_query_main_query: (r, cb) =>637r.dbg("_user_set_query_main_query")638639if not r.client_query.set.allow_field_deletes640# allow_field_deletes not set, so remove any null/undefined641# fields from the query642for key of r.query643if not r.query[key]?644delete r.query[key]645646if r.options.delete647for primary_key in r.primary_keys648if not r.query[primary_key]?649cb("FATAL: delete query must set primary key")650return651r.dbg("delete based on primary key")652@_user_query_set_delete(r, cb)653return654if r.instead_of_change_hook?655r.instead_of_change_hook(@, r.old_val, r.query, r.account_id, cb)656else657if misc.len(r.json_fields) == 0658# easy case -- there are no jsonb merge fields; just do an upsert.659@_user_query_set_upsert(r, cb)660return661# HARD CASE -- there are json_fields... so we are doing an insert662# if the object isn't already in the database, and an update663# if it is. This is ugly because I don't know how to do both664# a JSON merge as an upsert.665cnt = undefined # will equal number of records having the primary key (so 0 or 1)666async.series([667(cb) =>668@_user_query_set_count r, (err, n) =>669cnt = n; cb(err)670(cb) =>671r.dbg("do the set query")672if cnt == 0673# Just insert (do as upsert to avoid error in case of race)674@_user_query_set_upsert(r, cb)675else676# Do as an update -- record is definitely already in db since cnt > 0.677# This would fail in the unlikely (but possible) case that somebody deletes678# the record between the above count and when we do the UPDATE.679# Using a transaction could avoid this.680# Maybe such an error is reasonable and it's good to report it as such.681@_user_query_set_upsert_and_jsonb_merge(r, cb)682], cb)683684user_set_query: (opts) =>685opts = defaults opts,686account_id : undefined687project_id : undefined688table : required689query : required690options : undefined # options=[{delete:true}] is the only supported nontrivial option here.691cb : required # cb(err)692693# TODO: it would be nice to return the primary key part of the created object on creation.694# That's not implemented and will be somewhat nontrivial, and will use the RETURNING clause695# of postgres's INSERT - https://www.postgresql.org/docs/current/sql-insert.html696697if @is_standby698opts.cb("set queries against standby not allowed")699return700r = @_parse_set_query_opts(opts)701702# Only uncomment for debugging -- too big/verbose/dangerous703# r.dbg("parsed query opts = #{JSON.stringify(r)}")704705if not r? # nothing to do706opts.cb()707return708if r.err709opts.cb(r.err)710return711712async.series([713(cb) =>714@_user_set_query_enforce_requirements(r, cb)715(cb) =>716if r.check_hook?717r.check_hook(@, r.query, r.account_id, r.project_id, cb)718else719cb()720(cb) =>721@_user_set_query_hooks_prepare(r, cb)722(cb) =>723if r.before_change_hook?724r.before_change_hook @, r.old_val, r.query, r.account_id, (err, stop) =>725r.done = stop726cb(err)727else728cb()729(cb) =>730if r.done731cb()732return733if r.instead_of_query?734opts1 = misc.copy_without(opts, ['cb', 'changes', 'table'])735r.instead_of_query(@, opts1, cb)736else737@_user_set_query_main_query(r, cb)738(cb) =>739if r.done740cb()741return742if r.on_change_hook?743r.on_change_hook(@, r.old_val, r.query, r.account_id, cb)744else745cb()746], (err) => opts.cb(err))747748# mod_fields counts the fields in query that might actually get modified749# in the database when we do the query; e.g., account_id won't since it gets750# filled in with the user's account_id, and project_write won't since it must751# refer to an existing project. We use mod_field **only** to skip doing752# no-op queries. It's just an optimization.753_mod_fields: (query, client_query) =>754for field in misc.keys(query)755if client_query.set.fields[field] not in ['account_id', 'project_write']756return true757return false758759_user_get_query_json_timestamps: (obj, fields) =>760# obj is an object returned from the database via a query761# Postgres JSONB doesn't support timestamps, so we convert762# every json leaf node of obj that looks like JSON of a timestamp763# to a Javascript Date.764for k, v of obj765if fields[k]766obj[k] = misc.fix_json_dates(v, fields[k])767768# fill in the default values for obj using the client_query spec.769_user_get_query_set_defaults: (client_query, obj, fields) =>770if not misc.is_array(obj)771obj = [obj]772else if obj.length == 0773return774s = client_query?.get?.fields ? {}775for k in fields776v = s[k]777if v?778# k is a field for which a default value (=v) is provided in the schema779for x in obj780# For each obj pulled from the database that is defined...781if x?782# We check to see if the field k was set on that object.783y = x[k]784if not y?785# It was NOT set, so we deep copy the default value for the field k.786x[k] = misc.deep_copy(v)787else if typeof(v) == 'object' and typeof(y) == 'object' and not misc.is_array(v)788# y *is* defined and is an object, so we merge in the provided defaults.789for k0, v0 of v790if not y[k0]?791y[k0] = v0792793_user_set_query_project_users: (obj, account_id) =>794dbg = @_dbg("_user_set_query_project_users")795if not obj.users?796# nothing to do -- not changing users.797return798##dbg("disabled")799##return obj.users800# - ensures all keys of users are valid uuid's (though not that they are valid users).801# - and format is:802# {group:'owner' or 'collaborator', hide:bool, upgrades:{a map}}803# with valid upgrade fields.804upgrade_fields = PROJECT_UPGRADES.params805users = {}806# TODO: we obviously should check that a user is only changing the part807# of this object involving themselves... or adding/removing collaborators.808# That is not currently done below. TODO TODO TODO SECURITY.809for id, x of obj.users810if misc.is_valid_uuid_string(id)811for key in misc.keys(x)812if key not in ['group', 'hide', 'upgrades', 'ssh_keys']813throw Error("unknown field '#{key}")814if x.group? and (x.group not in ['owner', 'collaborator'])815throw Error("invalid value for field 'group'")816if x.hide? and typeof(x.hide) != 'boolean'817throw Error("invalid type for field 'hide'")818if x.upgrades?819if not misc.is_object(x.upgrades)820throw Error("invalid type for field 'upgrades'")821for k,_ of x.upgrades822if not upgrade_fields[k]823throw Error("invalid upgrades field '#{k}'")824if x.ssh_keys825# do some checks.826if not misc.is_object(x.ssh_keys)827throw Error("ssh_keys must be an object")828for fingerprint, key of x.ssh_keys829if not key # deleting830continue831if not misc.is_object(key)832throw Error("each key in ssh_keys must be an object")833for k, v of key834# the two dates are just numbers not actual timestamps...835if k not in ['title', 'value', 'creation_date', 'last_use_date']836throw Error("invalid ssh_keys field '#{k}'")837users[id] = x838return users839840project_action: (opts) =>841opts = defaults opts,842project_id : required843action_request : required # action is object {action:?, time:?}844cb : required845if opts.action_request.action == 'test'846# used for testing -- shouldn't trigger anything to happen.847opts.cb()848return849dbg = @_dbg("project_action(project_id='#{opts.project_id}',action_request=#{misc.to_json(opts.action_request)})")850dbg()851project = undefined852action_request = misc.copy(opts.action_request)853set_action_request = (cb) =>854dbg("set action_request to #{misc.to_json(action_request)}")855@_query856query : "UPDATE projects"857where : 'project_id = $::UUID':opts.project_id858jsonb_set : {action_request : action_request}859cb : cb860async.series([861(cb) =>862action_request.started = new Date()863set_action_request(cb)864(cb) =>865dbg("get project")866try867project = await @projectControl(opts.project_id)868cb()869catch err870cb(err)871(cb) =>872dbg("doing action")873try874switch action_request.action875when 'restart'876await project.restart()877when 'stop'878await project.stop()879when 'start'880await project.start()881else882throw Error("FATAL: action '#{opts.action_request.action}' not implemented")883cb()884catch err885cb(err)886], (err) =>887if err888action_request.err = err889action_request.finished = new Date()890dbg("finished!")891set_action_request(opts.cb)892)893894# This hook is called *before* the user commits a change to a project in the database895# via a user set query.896# TODO: Add a pre-check here as well that total upgrade isn't going to be exceeded.897# This will avoid a possible subtle edge case if user is cheating and always somehow898# crashes server...?899_user_set_query_project_change_before: (old_val, new_val, account_id, cb) =>900dbg = @_dbg("_user_set_query_project_change_before #{account_id}, #{misc.to_json(old_val)} --> #{misc.to_json(new_val)}")901dbg()902903if new_val?.name and (new_val?.name != old_val?.name)904# Changing or setting the name of the project to something nontrivial.905try906checkProjectName(new_val.name);907catch err908cb(err.toString())909return910if new_val.name911# Setting name to something nontrivial, so we must check uniqueness912# among all projects this user owns.913result = await callback2 @_query,914query : 'SELECT COUNT(*) FROM projects'915where :916"users#>>'{#{account_id},group}' = $::TEXT" : 'owner'917"project_id != $::UUID" : new_val.project_id918"LOWER(name) = $::TEXT":new_val.name.toLowerCase()919if result.rows[0].count > 0920cb("There is already a project with the same owner as this project and name='#{new_val.name}'. Names are not case sensitive.")921return922# A second constraint is that only the project owner can change the project name.923result = await callback2 @_query,924query : 'SELECT COUNT(*) FROM projects'925where :926"users#>>'{#{account_id},group}' = $::TEXT" : 'owner'927"project_id = $::UUID" : new_val.project_id928if result.rows[0].count == 0929cb("Only the owner of the project can currently change the project name.")930return931932if new_val?.action_request? and JSON.stringify(new_val.action_request.time) != JSON.stringify(old_val?.action_request?.time)933# Requesting an action, e.g., save, restart, etc.934dbg("action_request -- #{misc.to_json(new_val.action_request)}")935#936# WARNING: Above, we take the difference of times below, since != doesn't work as we want with937# separate Date objects, as it will say equal dates are not equal. Example:938# coffee> x = JSON.stringify(new Date()); {from_json}=require('misc'); a=from_json(x); b=from_json(x); [a!=b, a-b]939# [ true, 0 ]940941# Launch the action -- success or failure communicated back to all clients through changes to state.942# Also, we don't have to worry about permissions here; that this function got called at all means943# the user has write access to the projects table entry with given project_id, which gives them permission944# to do any action with the project.945@project_action946project_id : new_val.project_id947action_request : misc.copy_with(new_val.action_request, ['action', 'time'])948cb : (err) =>949dbg("action_request #{misc.to_json(new_val.action_request)} completed -- #{err}")950# true means -- do nothing further. We don't want to the user to951# set this same thing since we already dealt with it properly.952cb(err, true)953return954955if not new_val.users? # not changing users956cb(); return957old_val = old_val?.users ? {}958new_val = new_val?.users ? {}959for id in misc.keys(old_val).concat(new_val)960if account_id != id961# make sure user doesn't change anybody else's allocation962if not lodash.isEqual(old_val?[id]?.upgrades, new_val?[id]?.upgrades)963err = "FATAL: user '#{account_id}' tried to change user '#{id}' allocation toward a project"964dbg(err)965cb(err)966return967cb()968969# This hook is called *after* the user commits a change to a project in the database970# via a user set query. It could undo changes the user isn't allowed to make, which971# might require doing various async calls, or take actions (e.g., setting quotas,972# starting projects, etc.).973_user_set_query_project_change_after: (old_val, new_val, account_id, cb) =>974dbg = @_dbg("_user_set_query_project_change_after #{account_id}, #{misc.to_json(old_val)} --> #{misc.to_json(new_val)}")975dbg()976old_upgrades = old_val.users?[account_id]?.upgrades977new_upgrades = new_val.users?[account_id]?.upgrades978if new_upgrades? and not lodash.isEqual(old_upgrades, new_upgrades)979dbg("upgrades changed for #{account_id} from #{misc.to_json(old_upgrades)} to #{misc.to_json(new_upgrades)}")980project = undefined981async.series([982(cb) =>983@ensure_user_project_upgrades_are_valid984account_id : account_id985cb : cb986(cb) =>987if not @projectControl?988cb()989else990dbg("get project")991try992project = await @projectControl(new_val.project_id)993cb()994catch err995cb(err)996(cb) =>997if not project?998cb()999else1000dbg("determine total quotas and apply")1001try1002await project.setAllQuotas()1003cb()1004catch err1005cb(err)1006], cb)1007else1008cb()10091010###1011GET QUERIES1012###10131014# Make any functional substitutions defined by the schema.1015# This may mutate query in place.1016_user_get_query_functional_subs: (query, fields) =>1017if fields?1018for field, val of fields1019if typeof(val) == 'function'1020query[field] = val(query, @)10211022_parse_get_query_opts: (opts) =>1023if opts.changes? and not opts.changes.cb?1024return {err: "FATAL: user_get_query -- if opts.changes is specified, then opts.changes.cb must also be specified"}10251026r = {}1027# get data about user queries on this table1028if opts.project_id?1029r.client_query = SCHEMA[opts.table]?.project_query1030else1031r.client_query = SCHEMA[opts.table]?.user_query10321033if not r.client_query?.get?1034return {err: "FATAL: get queries not allowed for table '#{opts.table}'"}10351036if not opts.account_id? and not opts.project_id? and not SCHEMA[opts.table].anonymous1037return {err: "FATAL: anonymous get queries not allowed for table '#{opts.table}'"}10381039r.table = SCHEMA[opts.table].virtual ? opts.table10401041r.primary_keys = @_primary_keys(opts.table)10421043# Are only admins allowed any get access to this table?1044r.require_admin = !!r.client_query.get.admin10451046# Verify that all requested fields may be read by users1047for field in misc.keys(opts.query)1048if r.client_query.get.fields?[field] == undefined1049return {err: "FATAL: user get query not allowed for #{opts.table}.#{field}"}10501051# Functional substitutions defined by schema1052@_user_get_query_functional_subs(opts.query, r.client_query.get?.fields)10531054if r.client_query.get?.instead_of_query?1055return r10561057# Sanity check: make sure there is something in the query1058# that gets only things in this table that this user1059# is allowed to see, or at least a check_hook. This is not required1060# for admins.1061if not r.client_query.get.pg_where? and not r.client_query.get.check_hook? and not r.require_admin1062return {err: "FATAL: user get query not allowed for #{opts.table} (no getAll filter - pg_where or check_hook)"}10631064# Apply default options to the get query (don't impact changefeed)1065# The user can override these, e.g., if they were to want to explicitly increase a limit1066# to get more file use history.1067user_options = {}1068for x in opts.options1069for y, z of x1070user_options[y] = true10711072get_options = undefined1073if @is_heavily_loaded() and r.client_query.get.options_load?1074get_options = r.client_query.get.options_load1075else if r.client_query.get.options?1076get_options = r.client_query.get.options1077if get_options?1078# complicated since options is a list of {opt:val} !1079for x in get_options1080for y, z of x1081if not user_options[y]1082opts.options.push(x)1083break10841085r.json_fields = @_json_fields(opts.table, opts.query)1086return r10871088# _json_fields: map from field names to array of fields that should be parsed as timestamps1089# These keys of his map are also used by _user_query_set_upsert_and_jsonb_merge to determine1090# JSON deep merging for set queries.1091_json_fields: (table, query) =>1092json_fields = {}1093for field, info of SCHEMA[table].fields1094if (query[field]? or query[field] == null) and (info.type == 'map' or info.pg_type == 'JSONB')1095json_fields[field] = info.date ? []1096return json_fields10971098_user_get_query_where: (client_query, account_id, project_id, user_query, table, cb) =>1099dbg = @_dbg("_user_get_query_where")1100dbg()11011102pg_where = client_query.get.pg_where11031104if @is_heavily_loaded() and client_query.get.pg_where_load?1105# use a different query if load is heavy1106pg_where = client_query.get.pg_where_load11071108if not pg_where?1109pg_where = []1110if pg_where == 'projects'1111pg_where = ['projects']11121113if typeof(pg_where) == 'function'1114pg_where = pg_where(user_query, @)1115if not misc.is_array(pg_where)1116cb("FATAL: pg_where must be an array (of strings or objects)")1117return11181119# Do NOT mutate the schema itself!1120pg_where = misc.deep_copy(pg_where)11211122# expand 'projects' in query, depending on whether project_id is specified or not.1123# This is just a convenience to make the db schema simpler.1124for i in [0...pg_where.length]1125if pg_where[i] == 'projects'1126if user_query.project_id1127pg_where[i] = {"project_id = $::UUID" : 'project_id'}1128else1129pg_where[i] = {"project_id = ANY(select project_id from projects where users ? $::TEXT)" : 'account_id'}11301131# Now we fill in all the parametrized substitutions in the pg_where list.1132subs = {}1133for x in pg_where1134if misc.is_object(x)1135for key, value of x1136subs[value] = value11371138sub_value = (value, cb) =>1139switch value1140when 'account_id'1141if not account_id?1142cb('FATAL: account_id must be given')1143return1144subs[value] = account_id1145cb()1146when 'project_id'1147if project_id?1148subs[value] = project_id1149cb()1150else if not user_query.project_id1151cb("FATAL: must specify project_id")1152else if SCHEMA[table].anonymous1153subs[value] = user_query.project_id1154cb()1155else1156@user_is_in_project_group1157account_id : account_id1158project_id : user_query.project_id1159groups : ['owner', 'collaborator']1160cb : (err, in_group) =>1161if err1162cb(err)1163else if in_group1164subs[value] = user_query.project_id1165cb()1166else1167cb("FATAL: you do not have read access to this project")1168when 'project_id-public'1169if not user_query.project_id?1170cb("FATAL: must specify project_id")1171else1172if SCHEMA[table].anonymous1173@has_public_path1174project_id : user_query.project_id1175cb : (err, has_public_path) =>1176if err1177cb(err)1178else if not has_public_path1179cb("project does not have any public paths")1180else1181subs[value] = user_query.project_id1182cb()1183else1184cb("FATAL: table must allow anonymous queries")1185else1186cb()11871188async.map misc.keys(subs), sub_value, (err) =>1189if err1190cb(err)1191return1192for x in pg_where1193if misc.is_object(x)1194for key, value of x1195x[key] = subs[value]11961197# impose further restrictions (more where conditions)1198pg_where.push(userGetQueryFilter(user_query, client_query))11991200cb(undefined, pg_where)12011202_user_get_query_options: (options, multi, schema_options) =>1203r = {}12041205if schema_options?1206options = options.concat(schema_options)12071208# Parse option part of the query1209{limit, order_by, slice, only_changes, err} = @_query_parse_options(options)12101211if err1212return {err: err}1213if only_changes1214r.only_changes = true1215if limit?1216r.limit = limit1217else if not multi1218r.limit = 11219if order_by?1220r.order_by = order_by1221if slice?1222return {err: "slice not implemented"}1223return r12241225_user_get_query_do_query: (query_opts, client_query, user_query, multi, json_fields, cb) =>1226query_opts.cb = all_results (err, x) =>1227if err1228cb(err)1229else1230if misc.len(json_fields) > 01231# Convert timestamps to Date objects, if **explicitly** specified in the schema1232for obj in x1233@_user_get_query_json_timestamps(obj, json_fields)12341235if not multi1236x = x[0]1237# Fill in default values and remove null's1238@_user_get_query_set_defaults(client_query, x, misc.keys(user_query))1239# Get rid of undefined fields -- that's the default and wastes memory and bandwidth1240if x?1241for obj in x1242misc.map_mutate_out_undefined_and_null(obj)1243cb(undefined, x)1244@_query(query_opts)12451246_user_get_query_query: (table, user_query, remove_from_query) =>1247return "SELECT #{(quote_field(field) for field in @_user_get_query_columns(user_query, remove_from_query)).join(',')} FROM #{table}"12481249_user_get_query_satisfied_by_obj: (user_query, obj, possible_time_fields) =>1250#dbg = @_dbg("_user_get_query_satisfied_by_obj)1251#dbg(user_query, obj)1252for field, value of obj1253date_keys = possible_time_fields[field]1254if date_keys1255value = misc.fix_json_dates(value, date_keys)1256if (q = user_query[field])?1257if (op = queryIsCmp(q))1258#dbg(value:value, op: op, q:q)1259x = q[op]1260switch op1261when '=='1262if value != x1263return false1264when '!='1265if value == x1266return false1267when '>='1268if value < x1269return false1270when '<='1271if value > x1272return false1273when '>'1274if value <= x1275return false1276when '<'1277if value >= x1278return false1279else if value != q1280return false1281return true12821283_user_get_query_handle_field_deletes: (client_query, new_val) =>1284if client_query.get.allow_field_deletes1285# leave in the nulls that might be in new_val1286return1287# remove all nulls from new_val. Right now we1288# just can't support this due to default values.1289# TODO: completely get rid of default values (?) or1290# maybe figure out how to implement this. The symptom1291# of not doing this is a normal user will do things like1292# delete the users field of their projects. Not good.1293for key of new_val1294if not new_val[key]?1295delete new_val[key]12961297_user_get_query_changefeed: (changes, table, primary_keys, user_query,1298where, json_fields, account_id, client_query, cb) =>1299dbg = @_dbg("_user_get_query_changefeed(table='#{table}')")1300dbg()1301# WARNING: always call changes.cb! Do not do something like f = changes.cb, then call f!!!!1302# This is because the value of changes.cb may be changed by the caller.1303if not misc.is_object(changes)1304cb("FATAL: changes must be an object with keys id and cb")1305return1306if not misc.is_valid_uuid_string(changes.id)1307cb("FATAL: changes.id must be a uuid")1308return1309if typeof(changes.cb) != 'function'1310cb("FATAL: changes.cb must be a function")1311return1312for primary_key in primary_keys1313if not user_query[primary_key]? and user_query[primary_key] != null # TODO: this seems slightly off1314cb("FATAL: changefeed MUST include primary key (='#{primary_key}') in query")1315return1316watch = []1317select = {}1318init_tracker = tracker = free_tracker = undefined1319possible_time_fields = misc.deep_copy(json_fields)1320feed = undefined13211322for field, val of user_query1323type = pg_type(SCHEMA[table]?.fields?[field])1324if type == 'TIMESTAMP'1325possible_time_fields[field] = 'all'1326if val == null and field not in primary_keys1327watch.push(field)1328else1329select[field] = type13301331if misc.len(possible_time_fields) > 01332# Convert (likely) timestamps to Date objects; fill in defaults for inserts1333process = (x) =>1334if not x?1335return1336if x.new_val?1337@_user_get_query_json_timestamps(x.new_val, possible_time_fields)1338if x.action == 'insert' # do not do this for delete or update actions!1339@_user_get_query_set_defaults(client_query, x.new_val, misc.keys(user_query))1340else if x.action == 'update'1341@_user_get_query_handle_field_deletes(client_query, x.new_val)1342if x.old_val?1343@_user_get_query_json_timestamps(x.old_val, possible_time_fields)1344else1345process = (x) =>1346if not x?1347return1348if x.new_val?1349if x.action == 'insert' # do not do this for delete or update actions!1350@_user_get_query_set_defaults(client_query, x.new_val, misc.keys(user_query))1351else if x.action == 'update'1352@_user_get_query_handle_field_deletes(client_query, x.new_val)13531354async.series([1355(cb) =>1356# check for alternative where test for changefeed.1357pg_changefeed = client_query?.get?.pg_changefeed1358if not pg_changefeed?1359cb(); return13601361if pg_changefeed == 'projects'1362tracker_add = (project_id) => feed.insert({project_id:project_id})1363tracker_remove = (project_id) => feed.delete({project_id:project_id})1364pg_changefeed = (db, account_id) =>1365where : (obj) =>1366# Check that this is a project we have read access to1367if not db._project_and_user_tracker?.get_projects(account_id)[obj.project_id]1368return false1369# Now check our actual query conditions on the object.1370# This would normally be done by the changefeed, but since1371# we are passing in a custom where, we have to do it.1372if not @_user_get_query_satisfied_by_obj(user_query, obj, possible_time_fields)1373return false1374return true13751376select : {'project_id':'UUID'}13771378init_tracker : (tracker) =>1379tracker.on "add_user_to_project-#{account_id}", tracker_add1380tracker.on "remove_user_from_project-#{account_id}", tracker_remove13811382free_tracker : (tracker) =>1383dbg("freeing project tracker events")1384tracker.removeListener("add_user_to_project-#{account_id}", tracker_add)1385tracker.removeListener("remove_user_from_project-#{account_id}", tracker_remove)138613871388else if pg_changefeed == 'news'1389pg_changefeed = ->1390where : (obj) ->1391if obj.date?1392date_obj = new Date(obj.date)1393# we send future news items to the frontend, but filter it based on the server time1394return date_obj >= misc.months_ago(3)1395else1396return true1397select : {id: 'SERIAL UNIQUE', date: 'TIMESTAMP'}13981399else if pg_changefeed == 'one-hour'1400pg_changefeed = ->1401where : (obj) ->1402if obj.time?1403return new Date(obj.time) >= misc.hours_ago(1)1404else1405return true1406select : {id:'UUID', time:'TIMESTAMP'}14071408else if pg_changefeed == 'five-minutes'1409pg_changefeed = ->1410where : (obj) ->1411if obj.time?1412return new Date(obj.time) >= misc.minutes_ago(5)1413else1414return true1415select : {id:'UUID', time:'TIMESTAMP'}14161417else if pg_changefeed == 'collaborators'1418if not account_id?1419cb("FATAL: account_id must be given")1420return1421tracker_add = (collab_id) => feed.insert({account_id:collab_id})1422tracker_remove = (collab_id) => feed.delete({account_id:collab_id})1423pg_changefeed = (db, account_id) ->1424shared_tracker = undefined1425where : (obj) -> # test of "is a collab with me"1426return shared_tracker.get_collabs(account_id)?[obj.account_id]1427init_tracker : (tracker) =>1428shared_tracker = tracker1429tracker.on "add_collaborator-#{account_id}", tracker_add1430tracker.on "remove_collaborator-#{account_id}", tracker_remove1431free_tracker : (tracker) =>1432dbg("freeing collab tracker events")1433tracker.removeListener("add_collaborator-#{account_id}", tracker_add)1434tracker.removeListener("remove_collaborator-#{account_id}", tracker_remove)143514361437x = pg_changefeed(@, account_id)1438if x.init_tracker?1439init_tracker = x.init_tracker1440if x.free_tracker?1441free_tracker = x.free_tracker1442if x.select?1443for k, v of x.select1444select[k] = v14451446if x.where? or x.init_tracker?1447where = x.where1448if not account_id?1449cb()1450return1451# initialize user tracker is needed for where tests...1452@project_and_user_tracker cb : (err, _tracker) =>1453if err1454cb(err)1455else1456tracker = _tracker1457try1458await tracker.register(account_id)1459cb()1460catch err1461cb(err)1462else1463cb()1464(cb) =>1465@changefeed1466table : table1467select : select1468where : where1469watch : watch1470cb : (err, _feed) =>1471if err1472cb(err)1473return1474feed = _feed1475feed.on 'change', (x) ->1476process(x)1477changes.cb(undefined, x)1478feed.on 'close', ->1479changes.cb(undefined, {action:'close'})1480dbg("feed close")1481if tracker? and free_tracker?1482dbg("free_tracker")1483free_tracker(tracker)1484dbg("do NOT free_tracker")1485feed.on 'error', (err) ->1486changes.cb("feed error - #{err}")1487@_changefeeds ?= {}1488@_changefeeds[changes.id] = feed1489init_tracker?(tracker)1490# Any tracker error means this changefeed is now broken and1491# has to be recreated.1492tracker?.once 'error', (err) ->1493changes.cb("tracker error - #{err}")1494cb()1495], cb)14961497user_get_query: (opts) =>1498opts = defaults opts,1499account_id : undefined1500project_id : undefined1501table : required1502query : required1503multi : required1504options : required # used for initial query; **IGNORED** by changefeed,1505# which ensures that *something* is sent every n minutes, in case no1506# changes are coming out of the changefeed. This is an additional1507# measure in case the client somehow doesn't get a "this changefeed died" message.1508# Use [{delete:true}] to instead delete the selected records (must1509# have delete:true in schema).1510changes : undefined # {id:?, cb:?}1511cb : required # cb(err, result)1512###1513The general idea is that user get queries are of the form15141515SELECT [columns] FROM table WHERE [get_all] AND [further restrictions] LIMIT/slice15161517Using the whitelist rules specified in SCHEMA, we1518determine each of the above, then run the query.15191520If no error in query, and changes is a given uuid, set up a change1521feed that calls opts.cb on changes as well.1522###1523id = misc.uuid().slice(0,6)1524#dbg = @_dbg("user_get_query(id=#{id})")1525dbg = -> # Logging below is just too verbose, and turns out to not be useful...1526dbg("account_id='#{opts.account_id}', project_id='#{opts.project_id}', query=#{misc.to_json(opts.query)}, multi=#{opts.multi}, options=#{misc.to_json(opts.options)}, changes=#{misc.to_json(opts.changes)}")1527{err, table, client_query, require_admin, primary_keys, json_fields} = @_parse_get_query_opts(opts)15281529if err1530dbg("error parsing query opts -- #{err}")1531opts.cb(err)1532return15331534_query_opts = {} # this will be the input to the @_query command.1535locals =1536result : undefined1537changes_cb : undefined15381539async.series([1540(cb) =>1541if client_query.get.check_hook?1542dbg("do check hook")1543client_query.get.check_hook(@, opts.query, opts.account_id, opts.project_id, cb)1544else1545cb()1546(cb) =>1547if require_admin1548dbg('require admin')1549@_require_is_admin(opts.account_id, cb)1550else1551cb()1552(cb) =>1553# NOTE: _user_get_query_where may mutate opts.query (for 'null' params)1554# so it is important that this is called before @_user_get_query_query below.1555# See the TODO in userGetQueryFilter.1556dbg("get_query_where")1557@_user_get_query_where client_query, opts.account_id, opts.project_id, opts.query, opts.table, (err, where) =>1558_query_opts.where = where1559cb(err)1560(cb) =>1561if client_query.get.instead_of_query?1562cb();1563return1564_query_opts.query = @_user_get_query_query(table, opts.query, client_query.get.remove_from_query)1565x = @_user_get_query_options(opts.options, opts.multi, client_query.options)1566if x.err1567dbg("error in get_query_options, #{x.err}")1568cb(x.err)1569return1570misc.merge(_query_opts, x)15711572nestloop = SCHEMA[opts.table]?.pg_nestloop # true, false or undefined1573if typeof nestloop == 'boolean'1574val = if nestloop then 'on' else 'off'1575_query_opts.pg_params = {enable_nestloop : val}15761577indexscan = SCHEMA[opts.table]?.pg_indexscan # true, false or undefined1578if typeof indexscan == 'boolean'1579val = if indexscan then 'on' else 'off'1580_query_opts.pg_params = {enable_indexscan : val}15811582if opts.changes?1583locals.changes_cb = opts.changes.cb1584locals.changes_queue = []1585# see note about why we do the following at the bottom of this file1586opts.changes.cb = (err, obj) ->1587locals.changes_queue.push({err:err, obj:obj})1588dbg("getting changefeed")1589@_user_get_query_changefeed(opts.changes, table, primary_keys,1590opts.query, _query_opts.where, json_fields,1591opts.account_id, client_query, cb)1592else1593cb()15941595(cb) =>1596if client_query.get.instead_of_query?1597if opts.changes?1598cb("changefeeds are not supported for querying this table")1599return1600# Custom version: instead of doing a full query, we instead1601# call a function and that's it.1602dbg("do instead_of_query instead")1603opts1 = misc.copy_without(opts, ['cb', 'changes', 'table'])1604client_query.get.instead_of_query @, opts1, (err, result) =>1605locals.result = result1606cb(err)1607return16081609if _query_opts.only_changes1610dbg("skipping query")1611locals.result = undefined1612cb()1613else1614dbg("finally doing query")1615@_user_get_query_do_query _query_opts, client_query, opts.query, opts.multi, json_fields, (err, result) =>1616if err1617cb(err)1618return1619locals.result = result1620cb()1621], (err) =>1622if err1623dbg("series failed -- err=#{err}")1624opts.cb(err)1625return1626dbg("series succeeded")1627opts.cb(undefined, locals.result)1628if opts.changes?1629dbg("sending change queue")1630opts.changes.cb = locals.changes_cb1631##dbg("sending queued #{JSON.stringify(locals.changes_queue)}")1632for {err, obj} in locals.changes_queue1633##dbg("sending queued changes #{JSON.stringify([err, obj])}")1634opts.changes.cb(err, obj)1635)16361637###1638Synchronized strings1639###1640_user_set_query_syncstring_change_after: (old_val, new_val, account_id, cb) =>1641dbg = @_dbg("_user_set_query_syncstring_change_after")1642cb() # return immediately -- stuff below can happen as side effect in the background.1643# Now do the following reactions to this syncstring change in the background:1644# 1. Awaken the relevant project.1645project_id = old_val?.project_id ? new_val?.project_id1646if project_id? and (new_val?.save?.state == 'requested' or (new_val?.last_active? and new_val?.last_active != old_val?.last_active))1647dbg("awakening project #{project_id}")1648awaken_project(@, project_id)164916501651# Verify that writing a patch is allowed.1652_user_set_query_patches_check: (obj, account_id, project_id, cb) =>1653# Reject any patch that is too new1654if obj.time - new Date() > MAX_PATCH_FUTURE_MS1655cb("clock") # this exact error is assumed in synctable!1656return1657# Write access1658@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16591660# Verify that writing a patch is allowed.1661_user_get_query_patches_check: (obj, account_id, project_id, cb) =>1662# Write access (no notion of read only yet -- will be easy to add later)1663@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16641665# Verify that writing a patch is allowed.1666_user_set_query_cursors_check: (obj, account_id, project_id, cb) =>1667@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16681669# Verify that writing a patch is allowed.1670_user_get_query_cursors_check: (obj, account_id, project_id, cb) =>1671@_syncstring_access_check(obj.string_id, account_id, project_id, cb)16721673_syncstring_access_check: (string_id, account_id, project_id, cb) =>1674# Check that string_id is the id of a syncstring the given account_id or1675# project_id is allowed to write to. NOTE: We do not concern ourselves (for now at least)1676# with proof of identity (i.e., one user with full read/write access to a project1677# claiming they are another users of that SAME project), since our security model1678# is that any user of a project can edit anything there. In particular, the1679# synctable lets any user with write access to the project edit the users field.1680if string_id?.length != 401681cb("FATAL: string_id (='#{string_id}') must be a string of length 40")1682return1683@_query1684query : "SELECT project_id FROM syncstrings"1685where : "string_id = $::CHAR(40)" : string_id1686cache : false # *MUST* leave as false (not true), since unfortunately, if this returns no, due to FATAL below this would break opening the file until cache clears.1687cb : one_result 'project_id', (err, x) =>1688if err1689cb(err)1690else if not x1691# There is no such syncstring with this id -- fail1692cb("FATAL: no such syncstring")1693else if account_id?1694# Attempt to read or write by a user browser client1695@_require_project_ids_in_groups(account_id, [x], ['owner', 'collaborator'], cb)1696else if project_id?1697# Attempt to read or write by a *project*1698if project_id == x1699cb()1700else1701cb("FATAL: project not allowed to write to syncstring in different project")170217031704# Check permissions for querying for syncstrings in a project1705_syncstrings_check: (obj, account_id, project_id, cb) =>1706#dbg = @dbg("_syncstrings_check")1707#dbg(misc.to_json([obj, account_id, project_id]))1708if not misc.is_valid_uuid_string(obj?.project_id)1709cb("FATAL: project_id (='#{obj?.project_id}') must be a valid uuid")1710return1711if project_id?1712if project_id == obj.project_id1713# The project can access its own syncstrings1714cb()1715else1716cb("FATAL: projects can only access their own syncstrings") # for now at least!1717return1718if account_id?1719# Access request by a client user1720@_require_project_ids_in_groups(account_id, [obj.project_id], ['owner', 'collaborator'], cb)1721else1722cb("FATAL: only users and projects can access syncstrings")17231724# Other functions that are needed to implement various use queries,1725# e.g., for virtual queries like file_use_times.1726# ASYNC FUNCTION with no callback.1727file_use_times: (opts) => # for docs, see where this is imported from.1728return await file_use_times(@, opts)17291730updateRetentionData: (opts) =>1731return await updateRetentionData(opts)17321733_last_awaken_time = {}1734awaken_project = (db, project_id, cb) ->1735# throttle so that this gets called *for a given project* at most once every 30s.1736now = new Date()1737if _last_awaken_time[project_id]? and now - _last_awaken_time[project_id] < 300001738return1739_last_awaken_time[project_id] = now1740dbg = db._dbg("_awaken_project(project_id=#{project_id})")1741if not db.projectControl?1742dbg("skipping since no projectControl defined")1743return1744dbg("doing it...")1745async.series([1746(cb) ->1747try1748project = db.projectControl(project_id)1749await project.start()1750cb()1751catch err1752cb("error starting project = #{err}")1753(cb) ->1754if not db.ensure_connection_to_project?1755cb()1756return1757dbg("also make sure there is a connection from hub to project")1758# This is so the project can find out that the user wants to save a file (etc.)1759db.ensure_connection_to_project(project_id, cb)1760], (err) ->1761if err1762dbg("awaken project error -- #{err}")1763else1764dbg("success awakening project")1765cb?(err)1766)1767###1768Note about opts.changes.cb:17691770Regarding sync, what was happening I think is:1771- (a) https://github.com/sagemathinc/cocalc/blob/master/src/packages/hub/postgres-user-queries.coffee#L1384 starts sending changes1772- (b) https://github.com/sagemathinc/cocalc/blob/master/src/packages/hub/postgres-user-queries.coffee#L1393 sends the full table.17731774(a) could result in changes actually getting to the client before the table itself has been initialized. The client code assumes that it only gets changes *after* the table is initialized. The browser client seems to be smart enough that it detects this situation and resets itself, so the browser never gets messed up as a result.1775However, the project definitely does NOT do so well, and it can get messed up. Then it has a broken version of the table, missing some last minute change. It is broken until the project forgets about that table entirely, which is can be a pretty long time (or project restart).17761777My fix is to queue up those changes on the server, then only start sending them to the client **after** the (b) query is done. I tested this by using setTimeout to manually delay (b) for a few seconds, and fully seeing the "file won't save problem". The other approach would make it so clients are more robust against getting changes first. However, it would take a long time for all clients to update (restart all projects), and it's an annoying assumption to make in general -- we may have entirely new clients later and they could make the same bad assumptions about order...1778###177917801781