Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Avatar for KuCalc : devops.
Download
50629 views
1
##############################################################################
2
#
3
# CoCalc: Collaborative Calculation in the Cloud
4
#
5
# Copyright (C) 2016, Sagemath Inc.
6
#
7
# This program is free software: you can redistribute it and/or modify
8
# it under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, either version 3 of the License, or
10
# (at your option) any later version.
11
#
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU General Public License for more details.
16
#
17
# You should have received a copy of the GNU General Public License
18
# along with this program. If not, see <http://www.gnu.org/licenses/>.
19
#
20
###############################################################################
21
22
###
23
HTTP Proxy Server, which passes requests directly onto http
24
servers running on project vm's
25
###
26
27
async = require('async')
28
winston = require('winston')
29
http_proxy = require('http-proxy')
30
url = require('url')
31
http = require('http')
32
mime = require('mime')
33
Cookies = require('cookies')
34
ms = require('ms')
35
36
misc = require('smc-util/misc')
37
{defaults, required} = misc
38
theme = require('smc-util/theme')
39
{DOMAIN_NAME} = theme
40
41
hub_projects = require('./projects')
42
auth = require('./auth')
43
access = require('./access')
44
45
DEBUG2 = false
46
47
exports.target_parse_req = target_parse_req = (remember_me, url) ->
48
v = url.split('/')
49
project_id = v[1]
50
type = v[2] # 'port' or 'raw'
51
key = remember_me + project_id + type
52
if type == 'port'
53
key += v[3]
54
port = v[3]
55
return {key:key, type:type, project_id:project_id, port_number:port}
56
57
exports.jupyter_server_port = jupyter_server_port = (opts) ->
58
opts = defaults opts,
59
project_id : required # assumed valid and that all auth already done
60
compute_server : required
61
database : required
62
cb : required # cb(err, port)
63
hub_projects.new_project(opts.project_id, opts.database, opts.compute_server).jupyter_port
64
cb : opts.cb
65
66
exports.init_http_proxy_server = (opts) ->
67
opts = defaults opts,
68
database : required
69
compute_server : required
70
base_url : required
71
port : required
72
host : required
73
{database, compute_server, base_url} = opts
74
75
winston.debug("init_http_proxy_server")
76
77
_remember_me_check_for_access_to_project = (opts) ->
78
opts = defaults opts,
79
project_id : required
80
remember_me : required
81
type : 'write' # 'read' or 'write'
82
cb : required # cb(err, has_access)
83
dbg = (m) -> winston.debug("_remember_me_check_for_access_to_project: #{m}")
84
account_id = undefined
85
email_address = undefined
86
has_access = false
87
hash = undefined
88
async.series([
89
(cb) ->
90
dbg("get remember_me message")
91
x = opts.remember_me.split('$')
92
hash = auth.generate_hash(x[0], x[1], x[2], x[3])
93
database.get_remember_me
94
hash : hash
95
cb : (err, signed_in_mesg) =>
96
if err or not signed_in_mesg?
97
cb("unable to get remember_me from db -- #{err}")
98
dbg("failed to get remember_me -- #{err}")
99
else
100
account_id = signed_in_mesg.account_id
101
email_address = signed_in_mesg.email_address
102
dbg("account_id=#{account_id}, email_address=#{email_address}")
103
cb()
104
(cb) ->
105
dbg("check if user has #{opts.type} access to project")
106
if opts.type == 'write'
107
access.user_has_write_access_to_project
108
database : database
109
project_id : opts.project_id
110
account_id : account_id
111
cb : (err, result) =>
112
dbg("got: #{err}, #{result}")
113
if err
114
cb(err)
115
else if not result
116
cb("User does not have write access to project.")
117
else
118
has_access = true
119
cb()
120
else
121
access.user_has_read_access_to_project
122
project_id : opts.project_id
123
account_id : account_id
124
database : database
125
cb : (err, result) =>
126
dbg("got: #{err}, #{result}")
127
if err
128
cb(err)
129
else if not result
130
cb("User does not have read access to project.")
131
else
132
has_access = true
133
cb()
134
135
], (err) ->
136
opts.cb(err, has_access)
137
)
138
139
_remember_me_cache = {}
140
remember_me_check_for_access_to_project = (opts) ->
141
opts = defaults opts,
142
project_id : required
143
remember_me : required
144
type : 'write'
145
cb : required # cb(err, has_access)
146
key = opts.project_id + opts.remember_me + opts.type
147
has_access = _remember_me_cache[key]
148
if has_access?
149
opts.cb(false, has_access)
150
return
151
# get the answer, cache it, return answer
152
_remember_me_check_for_access_to_project
153
project_id : opts.project_id
154
remember_me : opts.remember_me
155
type : opts.type
156
cb : (err, has_access) ->
157
# if cache gets huge for some *weird* reason (should never happen under normal conditions),
158
# just reset it to avoid any possibility of DOS-->RAM crash attack
159
if misc.len(_remember_me_cache) >= 100000
160
_remember_me_cache = {}
161
162
_remember_me_cache[key] = has_access
163
# Set a ttl time bomb on this cache entry. The idea is to keep the cache not too big,
164
# but also if the user is suddenly granted permission to the project, this should be
165
# reflected within a few seconds.
166
f = () ->
167
delete _remember_me_cache[key]
168
if has_access
169
setTimeout(f, 1000*60*7)
170
# access lasts 7 minutes (i.e., if you revoke privs to a user they
171
# could still hit the port for this long)
172
else
173
setTimeout(f, 1000*10)
174
# not having access lasts 10 seconds -- maybe they weren't logged in yet..., so don't
175
# have things broken forever!
176
opts.cb(err, has_access)
177
178
_target_cache = {}
179
180
invalidate_target_cache = (remember_me, url) ->
181
{key} = target_parse_req(remember_me, url)
182
winston.debug("invalidate_target_cache: #{url}")
183
delete _target_cache[key]
184
185
target = (remember_me, url, cb) ->
186
{key, type, project_id, port_number} = target_parse_req(remember_me, url)
187
188
t = _target_cache[key]
189
if t?
190
cb(false, t)
191
return
192
193
dbg = (m) -> winston.debug("target(#{key}): #{m}")
194
dbg("url=#{url}")
195
196
tm = misc.walltime()
197
host = undefined
198
port = undefined
199
async.series([
200
(cb) ->
201
if not remember_me?
202
# remember_me = undefined means "allow"; this is used for the websocket upgrade.
203
cb(); return
204
205
# It's still unclear if we will ever grant read access to the raw server...
206
#if type == 'raw'
207
# access_type = 'read'
208
#else
209
# access_type = 'write'
210
access_type = 'write'
211
212
remember_me_check_for_access_to_project
213
project_id : project_id
214
remember_me : remember_me
215
type : access_type
216
cb : (err, has_access) ->
217
dbg("finished remember_me_check_for_access_to_project (mark: #{misc.walltime(tm)}) -- #{err}")
218
if err
219
cb(err)
220
else if not has_access
221
cb("user does not have #{access_type} access to this project")
222
else
223
cb()
224
(cb) ->
225
if host?
226
cb()
227
else
228
compute_server.project
229
project_id : project_id
230
cb : (err, project) ->
231
dbg("first compute_server.project finished (mark: #{misc.walltime(tm)}) -- #{err}")
232
if err
233
cb(err)
234
else
235
host = project.host
236
cb()
237
(cb) ->
238
#dbg("determine the port")
239
if type == 'port'
240
if port_number == "jupyter"
241
dbg("determine jupyter_server_port")
242
jupyter_server_port
243
project_id : project_id
244
compute_server : compute_server
245
database : database
246
cb : (err, jupyter_port) ->
247
dbg("got jupyter_port=#{jupyter_port}, err=#{err}")
248
if err
249
cb(err)
250
else
251
port = jupyter_port
252
cb()
253
else
254
port = port_number
255
cb()
256
else if type == 'raw'
257
compute_server.project
258
project_id : project_id
259
cb : (err, project) ->
260
dbg("second compute_server.project finished (mark: #{misc.walltime(tm)}) -- #{err}")
261
if err
262
cb(err)
263
else
264
project.status
265
cb : (err, status) ->
266
dbg("project.status finished (mark: #{misc.walltime(tm)})")
267
if err
268
cb(err)
269
else if not status['raw.port']?
270
cb("raw port not available -- project might not be opened or running")
271
else
272
port = status['raw.port']
273
cb()
274
else
275
cb("unknown url type -- #{type}")
276
], (err) ->
277
dbg("all finished (mark: #{misc.walltime(tm)}): host=#{host}; port=#{port}; type=#{type} -- #{err}")
278
if err
279
cb(err)
280
else
281
t = {host:host, port:port}
282
_target_cache[key] = t
283
cb(false, t)
284
if type == 'raw'
285
# Set a ttl time bomb on this cache entry. The idea is to keep the cache not too big,
286
# but also if a new user is granted permission to the project they didn't have, or the project server
287
# is restarted, this should be reflected. Since there are dozens (at least) of hubs,
288
# and any could cause a project restart at any time, we just timeout this.
289
# This helps enormously when there is a burst of requests.
290
# Also if project restarts the raw port will change and we don't want to have
291
# fix this via getting an error.
292
setTimeout((->delete _target_cache[key]), 7*60*1000)
293
)
294
295
#proxy = http_proxy.createProxyServer(ws:true)
296
proxy_cache = {}
297
http_proxy_server = http.createServer (req, res) ->
298
tm = misc.walltime()
299
{query, pathname} = url.parse(req.url, true)
300
req_url = req.url.slice(base_url.length) # strip base_url for purposes of determining project location/permissions
301
if req_url == "/alive"
302
res.end('')
303
return
304
305
#buffer = http_proxy.buffer(req) # see http://stackoverflow.com/questions/11672294/invoking-an-asynchronous-method-inside-a-middleware-in-node-http-proxy
306
307
dbg = (m) ->
308
## for low level debugging
309
if DEBUG2
310
winston.debug("http_proxy_server(#{req_url}): #{m}")
311
dbg('got request')
312
313
cookies = new Cookies(req, res)
314
remember_me = cookies.get(base_url + 'remember_me')
315
316
if not remember_me?
317
# before giving an error, check on possibility that file is public
318
public_raw req_url, query, res, (err, is_public) ->
319
if err or not is_public
320
res.writeHead(500, {'Content-Type':'text/html'})
321
res.end("Please login to <a target='_blank' href='#{DOMAIN_NAME}'>#{DOMAIN_NAME}</a> with cookies enabled, then refresh this page.")
322
323
return
324
325
target remember_me, req_url, (err, location) ->
326
dbg("got target: #{misc.walltime(tm)}")
327
if err
328
public_raw req_url, query, res, (err, is_public) ->
329
if err or not is_public
330
winston.debug("proxy denied -- #{err}")
331
res.writeHead(500, {'Content-Type':'text/html'})
332
res.end("Access denied. Please login to <a target='_blank' href='#{DOMAIN_NAME}'>#{DOMAIN_NAME}</a> as a user with access to this project, then refresh this page.")
333
else
334
t = "http://#{location.host}:#{location.port}"
335
if proxy_cache[t]?
336
# we already have the proxy server for this remote location in the cache, so use it.
337
proxy = proxy_cache[t]
338
dbg("used cached proxy object: #{misc.walltime(tm)}")
339
else
340
dbg("make a new proxy server connecting to this remote location")
341
proxy = http_proxy.createProxyServer(ws:false, target:t, timeout:3000)
342
# and cache it.
343
proxy_cache[t] = proxy
344
dbg("created new proxy: #{misc.walltime(tm)}")
345
# setup error handler, so that if something goes wrong with this proxy (it will,
346
# e.g., on project restart), we properly invalidate it.
347
proxy.on "error", (e) ->
348
dbg("http proxy error -- #{e}")
349
delete proxy_cache[t]
350
invalidate_target_cache(remember_me, req_url)
351
#proxy.on 'proxyRes', (res) ->
352
# dbg("(mark: #{misc.walltime(tm)}) got response from the target")
353
354
proxy.web(req, res)
355
356
winston.debug("starting proxy server listening on #{opts.host}:#{opts.port}")
357
http_proxy_server.listen(opts.port, opts.host)
358
359
# add websockets support
360
_ws_proxy_servers = {}
361
http_proxy_server.on 'upgrade', (req, socket, head) ->
362
req_url = req.url.slice(base_url.length) # strip base_url for purposes of determining project location/permissions
363
dbg = (m) -> winston.debug("http_proxy_server websocket(#{req_url}): #{m}")
364
target undefined, req_url, (err, location) ->
365
if err
366
dbg("websocket upgrade error -- #{err}")
367
else
368
dbg("websocket upgrade success -- ws://#{location.host}:#{location.port}")
369
t = "ws://#{location.host}:#{location.port}"
370
proxy = _ws_proxy_servers[t]
371
if not proxy?
372
dbg("websocket upgrade #{t} -- not using cache")
373
proxy = http_proxy.createProxyServer(ws:true, target:t, timeout:0)
374
proxy.on "error", (e) ->
375
dbg("websocket proxy error, so clearing cache -- #{e}")
376
delete _ws_proxy_servers[t]
377
invalidate_target_cache(undefined, req_url)
378
_ws_proxy_servers[t] = proxy
379
else
380
dbg("websocket upgrade -- using cache")
381
proxy.ws(req, socket, head)
382
383
public_raw_paths_cache = {}
384
385
public_raw = (req_url, query, res, cb) ->
386
# Determine if the requested path is public (and not too big).
387
# If so, send content to the client and cb(undefined, true)
388
# If not, cb(undefined, false)
389
# req_url = /9627b34f-fefd-44d3-88ba-5b1fc1affef1/raw/a.html
390
x = req_url.split('?')
391
params = x[1]
392
v = x[0].split('/')
393
if v[2] != 'raw'
394
cb(undefined, false)
395
return
396
project_id = v[1]
397
if not misc.is_valid_uuid_string(project_id)
398
cb(undefined, false)
399
return
400
path = decodeURI(v.slice(3).join('/'))
401
winston.debug("public_raw: project_id=#{project_id}, path=#{path}")
402
public_paths = undefined
403
is_public = false
404
async.series([
405
(cb) ->
406
# Get a list of public paths in the project, or use the cached list
407
# The cached list is cached for a few seconds, since a typical access
408
# pattern is that the client downloads a bunch of files from the same
409
# project in parallel. On the other hand, we don't want to cache for
410
# too long, since the project user may add/remove public paths at any time.
411
public_paths = public_raw_paths_cache[project_id]
412
if public_paths?
413
cb()
414
else
415
database.get_public_paths
416
project_id : project_id
417
cb : (err, paths) ->
418
if err
419
cb(err)
420
else
421
public_paths = public_raw_paths_cache[project_id] = paths
422
setTimeout((()=>delete public_raw_paths_cache[project_id]), 3*60*1000) # cache a few seconds
423
cb()
424
(cb) ->
425
#winston.debug("public_raw -- path_is_in_public_paths(#{path}, #{misc.to_json(public_paths)})")
426
if not misc.path_is_in_public_paths(path, public_paths)
427
# The requested path is not public, so nothing to do.
428
cb()
429
else
430
# The requested path *is* public, so we get the file
431
# from one (of the potentially many) compute servers
432
# that has the file -- (right now this is implemented
433
# via sending/receiving JSON messages and using base64
434
# encoding, but that could change).
435
compute_server.project
436
project_id : project_id
437
cb : (err, project) ->
438
if err
439
cb(err); return
440
project.read_file
441
path : path
442
maxsize : 40000000 # 40MB for now
443
cb : (err, data) ->
444
if err
445
cb(err)
446
else
447
if query.download?
448
res.setHeader('Content-disposition', 'attachment')
449
filename = path.slice(path.lastIndexOf('/') + 1)
450
# see https://www.npmjs.com/package/mime
451
mime_type = mime.lookup(filename)
452
res.setHeader("Content-Type", mime_type)
453
timeout = ms('10 minutes')
454
res.setHeader('Cache-Control', "public, max-age='#{timeout}'")
455
res.setHeader('Expires', new Date(Date.now() + timeout).toUTCString());
456
res.write(data)
457
res.end()
458
is_public = true
459
cb()
460
], (err) ->
461
cb(err, is_public)
462
)
463
464