Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/net/sunrpc/svc.c
15109 views
1
/*
2
* linux/net/sunrpc/svc.c
3
*
4
* High-level RPC service routines
5
*
6
* Copyright (C) 1995, 1996 Olaf Kirch <[email protected]>
7
*
8
* Multiple threads pools and NUMAisation
9
* Copyright (c) 2006 Silicon Graphics, Inc.
10
* by Greg Banks <[email protected]>
11
*/
12
13
#include <linux/linkage.h>
14
#include <linux/sched.h>
15
#include <linux/errno.h>
16
#include <linux/net.h>
17
#include <linux/in.h>
18
#include <linux/mm.h>
19
#include <linux/interrupt.h>
20
#include <linux/module.h>
21
#include <linux/kthread.h>
22
#include <linux/slab.h>
23
24
#include <linux/sunrpc/types.h>
25
#include <linux/sunrpc/xdr.h>
26
#include <linux/sunrpc/stats.h>
27
#include <linux/sunrpc/svcsock.h>
28
#include <linux/sunrpc/clnt.h>
29
#include <linux/sunrpc/bc_xprt.h>
30
31
#define RPCDBG_FACILITY RPCDBG_SVCDSP
32
33
static void svc_unregister(const struct svc_serv *serv);
34
35
#define svc_serv_is_pooled(serv) ((serv)->sv_function)
36
37
/*
38
* Mode for mapping cpus to pools.
39
*/
40
enum {
41
SVC_POOL_AUTO = -1, /* choose one of the others */
42
SVC_POOL_GLOBAL, /* no mapping, just a single global pool
43
* (legacy & UP mode) */
44
SVC_POOL_PERCPU, /* one pool per cpu */
45
SVC_POOL_PERNODE /* one pool per numa node */
46
};
47
#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
48
49
/*
50
* Structure for mapping cpus to pools and vice versa.
51
* Setup once during sunrpc initialisation.
52
*/
53
static struct svc_pool_map {
54
int count; /* How many svc_servs use us */
55
int mode; /* Note: int not enum to avoid
56
* warnings about "enumeration value
57
* not handled in switch" */
58
unsigned int npools;
59
unsigned int *pool_to; /* maps pool id to cpu or node */
60
unsigned int *to_pool; /* maps cpu or node to pool id */
61
} svc_pool_map = {
62
.count = 0,
63
.mode = SVC_POOL_DEFAULT
64
};
65
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
66
67
static int
68
param_set_pool_mode(const char *val, struct kernel_param *kp)
69
{
70
int *ip = (int *)kp->arg;
71
struct svc_pool_map *m = &svc_pool_map;
72
int err;
73
74
mutex_lock(&svc_pool_map_mutex);
75
76
err = -EBUSY;
77
if (m->count)
78
goto out;
79
80
err = 0;
81
if (!strncmp(val, "auto", 4))
82
*ip = SVC_POOL_AUTO;
83
else if (!strncmp(val, "global", 6))
84
*ip = SVC_POOL_GLOBAL;
85
else if (!strncmp(val, "percpu", 6))
86
*ip = SVC_POOL_PERCPU;
87
else if (!strncmp(val, "pernode", 7))
88
*ip = SVC_POOL_PERNODE;
89
else
90
err = -EINVAL;
91
92
out:
93
mutex_unlock(&svc_pool_map_mutex);
94
return err;
95
}
96
97
static int
98
param_get_pool_mode(char *buf, struct kernel_param *kp)
99
{
100
int *ip = (int *)kp->arg;
101
102
switch (*ip)
103
{
104
case SVC_POOL_AUTO:
105
return strlcpy(buf, "auto", 20);
106
case SVC_POOL_GLOBAL:
107
return strlcpy(buf, "global", 20);
108
case SVC_POOL_PERCPU:
109
return strlcpy(buf, "percpu", 20);
110
case SVC_POOL_PERNODE:
111
return strlcpy(buf, "pernode", 20);
112
default:
113
return sprintf(buf, "%d", *ip);
114
}
115
}
116
117
module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
118
&svc_pool_map.mode, 0644);
119
120
/*
121
* Detect best pool mapping mode heuristically,
122
* according to the machine's topology.
123
*/
124
static int
125
svc_pool_map_choose_mode(void)
126
{
127
unsigned int node;
128
129
if (nr_online_nodes > 1) {
130
/*
131
* Actually have multiple NUMA nodes,
132
* so split pools on NUMA node boundaries
133
*/
134
return SVC_POOL_PERNODE;
135
}
136
137
node = first_online_node;
138
if (nr_cpus_node(node) > 2) {
139
/*
140
* Non-trivial SMP, or CONFIG_NUMA on
141
* non-NUMA hardware, e.g. with a generic
142
* x86_64 kernel on Xeons. In this case we
143
* want to divide the pools on cpu boundaries.
144
*/
145
return SVC_POOL_PERCPU;
146
}
147
148
/* default: one global pool */
149
return SVC_POOL_GLOBAL;
150
}
151
152
/*
153
* Allocate the to_pool[] and pool_to[] arrays.
154
* Returns 0 on success or an errno.
155
*/
156
static int
157
svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
158
{
159
m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
160
if (!m->to_pool)
161
goto fail;
162
m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
163
if (!m->pool_to)
164
goto fail_free;
165
166
return 0;
167
168
fail_free:
169
kfree(m->to_pool);
170
fail:
171
return -ENOMEM;
172
}
173
174
/*
175
* Initialise the pool map for SVC_POOL_PERCPU mode.
176
* Returns number of pools or <0 on error.
177
*/
178
static int
179
svc_pool_map_init_percpu(struct svc_pool_map *m)
180
{
181
unsigned int maxpools = nr_cpu_ids;
182
unsigned int pidx = 0;
183
unsigned int cpu;
184
int err;
185
186
err = svc_pool_map_alloc_arrays(m, maxpools);
187
if (err)
188
return err;
189
190
for_each_online_cpu(cpu) {
191
BUG_ON(pidx > maxpools);
192
m->to_pool[cpu] = pidx;
193
m->pool_to[pidx] = cpu;
194
pidx++;
195
}
196
/* cpus brought online later all get mapped to pool0, sorry */
197
198
return pidx;
199
};
200
201
202
/*
203
* Initialise the pool map for SVC_POOL_PERNODE mode.
204
* Returns number of pools or <0 on error.
205
*/
206
static int
207
svc_pool_map_init_pernode(struct svc_pool_map *m)
208
{
209
unsigned int maxpools = nr_node_ids;
210
unsigned int pidx = 0;
211
unsigned int node;
212
int err;
213
214
err = svc_pool_map_alloc_arrays(m, maxpools);
215
if (err)
216
return err;
217
218
for_each_node_with_cpus(node) {
219
/* some architectures (e.g. SN2) have cpuless nodes */
220
BUG_ON(pidx > maxpools);
221
m->to_pool[node] = pidx;
222
m->pool_to[pidx] = node;
223
pidx++;
224
}
225
/* nodes brought online later all get mapped to pool0, sorry */
226
227
return pidx;
228
}
229
230
231
/*
232
* Add a reference to the global map of cpus to pools (and
233
* vice versa). Initialise the map if we're the first user.
234
* Returns the number of pools.
235
*/
236
static unsigned int
237
svc_pool_map_get(void)
238
{
239
struct svc_pool_map *m = &svc_pool_map;
240
int npools = -1;
241
242
mutex_lock(&svc_pool_map_mutex);
243
244
if (m->count++) {
245
mutex_unlock(&svc_pool_map_mutex);
246
return m->npools;
247
}
248
249
if (m->mode == SVC_POOL_AUTO)
250
m->mode = svc_pool_map_choose_mode();
251
252
switch (m->mode) {
253
case SVC_POOL_PERCPU:
254
npools = svc_pool_map_init_percpu(m);
255
break;
256
case SVC_POOL_PERNODE:
257
npools = svc_pool_map_init_pernode(m);
258
break;
259
}
260
261
if (npools < 0) {
262
/* default, or memory allocation failure */
263
npools = 1;
264
m->mode = SVC_POOL_GLOBAL;
265
}
266
m->npools = npools;
267
268
mutex_unlock(&svc_pool_map_mutex);
269
return m->npools;
270
}
271
272
273
/*
274
* Drop a reference to the global map of cpus to pools.
275
* When the last reference is dropped, the map data is
276
* freed; this allows the sysadmin to change the pool
277
* mode using the pool_mode module option without
278
* rebooting or re-loading sunrpc.ko.
279
*/
280
static void
281
svc_pool_map_put(void)
282
{
283
struct svc_pool_map *m = &svc_pool_map;
284
285
mutex_lock(&svc_pool_map_mutex);
286
287
if (!--m->count) {
288
m->mode = SVC_POOL_DEFAULT;
289
kfree(m->to_pool);
290
kfree(m->pool_to);
291
m->npools = 0;
292
}
293
294
mutex_unlock(&svc_pool_map_mutex);
295
}
296
297
298
/*
299
* Set the given thread's cpus_allowed mask so that it
300
* will only run on cpus in the given pool.
301
*/
302
static inline void
303
svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
304
{
305
struct svc_pool_map *m = &svc_pool_map;
306
unsigned int node = m->pool_to[pidx];
307
308
/*
309
* The caller checks for sv_nrpools > 1, which
310
* implies that we've been initialized.
311
*/
312
BUG_ON(m->count == 0);
313
314
switch (m->mode) {
315
case SVC_POOL_PERCPU:
316
{
317
set_cpus_allowed_ptr(task, cpumask_of(node));
318
break;
319
}
320
case SVC_POOL_PERNODE:
321
{
322
set_cpus_allowed_ptr(task, cpumask_of_node(node));
323
break;
324
}
325
}
326
}
327
328
/*
329
* Use the mapping mode to choose a pool for a given CPU.
330
* Used when enqueueing an incoming RPC. Always returns
331
* a non-NULL pool pointer.
332
*/
333
struct svc_pool *
334
svc_pool_for_cpu(struct svc_serv *serv, int cpu)
335
{
336
struct svc_pool_map *m = &svc_pool_map;
337
unsigned int pidx = 0;
338
339
/*
340
* An uninitialised map happens in a pure client when
341
* lockd is brought up, so silently treat it the
342
* same as SVC_POOL_GLOBAL.
343
*/
344
if (svc_serv_is_pooled(serv)) {
345
switch (m->mode) {
346
case SVC_POOL_PERCPU:
347
pidx = m->to_pool[cpu];
348
break;
349
case SVC_POOL_PERNODE:
350
pidx = m->to_pool[cpu_to_node(cpu)];
351
break;
352
}
353
}
354
return &serv->sv_pools[pidx % serv->sv_nrpools];
355
}
356
357
358
/*
359
* Create an RPC service
360
*/
361
static struct svc_serv *
362
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
363
void (*shutdown)(struct svc_serv *serv))
364
{
365
struct svc_serv *serv;
366
unsigned int vers;
367
unsigned int xdrsize;
368
unsigned int i;
369
370
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
371
return NULL;
372
serv->sv_name = prog->pg_name;
373
serv->sv_program = prog;
374
serv->sv_nrthreads = 1;
375
serv->sv_stats = prog->pg_stats;
376
if (bufsize > RPCSVC_MAXPAYLOAD)
377
bufsize = RPCSVC_MAXPAYLOAD;
378
serv->sv_max_payload = bufsize? bufsize : 4096;
379
serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
380
serv->sv_shutdown = shutdown;
381
xdrsize = 0;
382
while (prog) {
383
prog->pg_lovers = prog->pg_nvers-1;
384
for (vers=0; vers<prog->pg_nvers ; vers++)
385
if (prog->pg_vers[vers]) {
386
prog->pg_hivers = vers;
387
if (prog->pg_lovers > vers)
388
prog->pg_lovers = vers;
389
if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
390
xdrsize = prog->pg_vers[vers]->vs_xdrsize;
391
}
392
prog = prog->pg_next;
393
}
394
serv->sv_xdrsize = xdrsize;
395
INIT_LIST_HEAD(&serv->sv_tempsocks);
396
INIT_LIST_HEAD(&serv->sv_permsocks);
397
init_timer(&serv->sv_temptimer);
398
spin_lock_init(&serv->sv_lock);
399
400
serv->sv_nrpools = npools;
401
serv->sv_pools =
402
kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
403
GFP_KERNEL);
404
if (!serv->sv_pools) {
405
kfree(serv);
406
return NULL;
407
}
408
409
for (i = 0; i < serv->sv_nrpools; i++) {
410
struct svc_pool *pool = &serv->sv_pools[i];
411
412
dprintk("svc: initialising pool %u for %s\n",
413
i, serv->sv_name);
414
415
pool->sp_id = i;
416
INIT_LIST_HEAD(&pool->sp_threads);
417
INIT_LIST_HEAD(&pool->sp_sockets);
418
INIT_LIST_HEAD(&pool->sp_all_threads);
419
spin_lock_init(&pool->sp_lock);
420
}
421
422
/* Remove any stale portmap registrations */
423
svc_unregister(serv);
424
425
return serv;
426
}
427
428
struct svc_serv *
429
svc_create(struct svc_program *prog, unsigned int bufsize,
430
void (*shutdown)(struct svc_serv *serv))
431
{
432
return __svc_create(prog, bufsize, /*npools*/1, shutdown);
433
}
434
EXPORT_SYMBOL_GPL(svc_create);
435
436
struct svc_serv *
437
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
438
void (*shutdown)(struct svc_serv *serv),
439
svc_thread_fn func, struct module *mod)
440
{
441
struct svc_serv *serv;
442
unsigned int npools = svc_pool_map_get();
443
444
serv = __svc_create(prog, bufsize, npools, shutdown);
445
446
if (serv != NULL) {
447
serv->sv_function = func;
448
serv->sv_module = mod;
449
}
450
451
return serv;
452
}
453
EXPORT_SYMBOL_GPL(svc_create_pooled);
454
455
/*
456
* Destroy an RPC service. Should be called with appropriate locking to
457
* protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
458
*/
459
void
460
svc_destroy(struct svc_serv *serv)
461
{
462
dprintk("svc: svc_destroy(%s, %d)\n",
463
serv->sv_program->pg_name,
464
serv->sv_nrthreads);
465
466
if (serv->sv_nrthreads) {
467
if (--(serv->sv_nrthreads) != 0) {
468
svc_sock_update_bufs(serv);
469
return;
470
}
471
} else
472
printk("svc_destroy: no threads for serv=%p!\n", serv);
473
474
del_timer_sync(&serv->sv_temptimer);
475
476
svc_close_all(&serv->sv_tempsocks);
477
478
if (serv->sv_shutdown)
479
serv->sv_shutdown(serv);
480
481
svc_close_all(&serv->sv_permsocks);
482
483
BUG_ON(!list_empty(&serv->sv_permsocks));
484
BUG_ON(!list_empty(&serv->sv_tempsocks));
485
486
cache_clean_deferred(serv);
487
488
if (svc_serv_is_pooled(serv))
489
svc_pool_map_put();
490
491
svc_unregister(serv);
492
kfree(serv->sv_pools);
493
kfree(serv);
494
}
495
EXPORT_SYMBOL_GPL(svc_destroy);
496
497
/*
498
* Allocate an RPC server's buffer space.
499
* We allocate pages and place them in rq_argpages.
500
*/
501
static int
502
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
503
{
504
unsigned int pages, arghi;
505
506
/* bc_xprt uses fore channel allocated buffers */
507
if (svc_is_backchannel(rqstp))
508
return 1;
509
510
pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
511
* We assume one is at most one page
512
*/
513
arghi = 0;
514
BUG_ON(pages > RPCSVC_MAXPAGES);
515
while (pages) {
516
struct page *p = alloc_page(GFP_KERNEL);
517
if (!p)
518
break;
519
rqstp->rq_pages[arghi++] = p;
520
pages--;
521
}
522
return pages == 0;
523
}
524
525
/*
526
* Release an RPC server buffer
527
*/
528
static void
529
svc_release_buffer(struct svc_rqst *rqstp)
530
{
531
unsigned int i;
532
533
for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++)
534
if (rqstp->rq_pages[i])
535
put_page(rqstp->rq_pages[i]);
536
}
537
538
struct svc_rqst *
539
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
540
{
541
struct svc_rqst *rqstp;
542
543
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
544
if (!rqstp)
545
goto out_enomem;
546
547
init_waitqueue_head(&rqstp->rq_wait);
548
549
serv->sv_nrthreads++;
550
spin_lock_bh(&pool->sp_lock);
551
pool->sp_nrthreads++;
552
list_add(&rqstp->rq_all, &pool->sp_all_threads);
553
spin_unlock_bh(&pool->sp_lock);
554
rqstp->rq_server = serv;
555
rqstp->rq_pool = pool;
556
557
rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
558
if (!rqstp->rq_argp)
559
goto out_thread;
560
561
rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
562
if (!rqstp->rq_resp)
563
goto out_thread;
564
565
if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
566
goto out_thread;
567
568
return rqstp;
569
out_thread:
570
svc_exit_thread(rqstp);
571
out_enomem:
572
return ERR_PTR(-ENOMEM);
573
}
574
EXPORT_SYMBOL_GPL(svc_prepare_thread);
575
576
/*
577
* Choose a pool in which to create a new thread, for svc_set_num_threads
578
*/
579
static inline struct svc_pool *
580
choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
581
{
582
if (pool != NULL)
583
return pool;
584
585
return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
586
}
587
588
/*
589
* Choose a thread to kill, for svc_set_num_threads
590
*/
591
static inline struct task_struct *
592
choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
593
{
594
unsigned int i;
595
struct task_struct *task = NULL;
596
597
if (pool != NULL) {
598
spin_lock_bh(&pool->sp_lock);
599
} else {
600
/* choose a pool in round-robin fashion */
601
for (i = 0; i < serv->sv_nrpools; i++) {
602
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
603
spin_lock_bh(&pool->sp_lock);
604
if (!list_empty(&pool->sp_all_threads))
605
goto found_pool;
606
spin_unlock_bh(&pool->sp_lock);
607
}
608
return NULL;
609
}
610
611
found_pool:
612
if (!list_empty(&pool->sp_all_threads)) {
613
struct svc_rqst *rqstp;
614
615
/*
616
* Remove from the pool->sp_all_threads list
617
* so we don't try to kill it again.
618
*/
619
rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
620
list_del_init(&rqstp->rq_all);
621
task = rqstp->rq_task;
622
}
623
spin_unlock_bh(&pool->sp_lock);
624
625
return task;
626
}
627
628
/*
629
* Create or destroy enough new threads to make the number
630
* of threads the given number. If `pool' is non-NULL, applies
631
* only to threads in that pool, otherwise round-robins between
632
* all pools. Must be called with a svc_get() reference and
633
* the BKL or another lock to protect access to svc_serv fields.
634
*
635
* Destroying threads relies on the service threads filling in
636
* rqstp->rq_task, which only the nfs ones do. Assumes the serv
637
* has been created using svc_create_pooled().
638
*
639
* Based on code that used to be in nfsd_svc() but tweaked
640
* to be pool-aware.
641
*/
642
int
643
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
644
{
645
struct svc_rqst *rqstp;
646
struct task_struct *task;
647
struct svc_pool *chosen_pool;
648
int error = 0;
649
unsigned int state = serv->sv_nrthreads-1;
650
651
if (pool == NULL) {
652
/* The -1 assumes caller has done a svc_get() */
653
nrservs -= (serv->sv_nrthreads-1);
654
} else {
655
spin_lock_bh(&pool->sp_lock);
656
nrservs -= pool->sp_nrthreads;
657
spin_unlock_bh(&pool->sp_lock);
658
}
659
660
/* create new threads */
661
while (nrservs > 0) {
662
nrservs--;
663
chosen_pool = choose_pool(serv, pool, &state);
664
665
rqstp = svc_prepare_thread(serv, chosen_pool);
666
if (IS_ERR(rqstp)) {
667
error = PTR_ERR(rqstp);
668
break;
669
}
670
671
__module_get(serv->sv_module);
672
task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
673
if (IS_ERR(task)) {
674
error = PTR_ERR(task);
675
module_put(serv->sv_module);
676
svc_exit_thread(rqstp);
677
break;
678
}
679
680
rqstp->rq_task = task;
681
if (serv->sv_nrpools > 1)
682
svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
683
684
svc_sock_update_bufs(serv);
685
wake_up_process(task);
686
}
687
/* destroy old threads */
688
while (nrservs < 0 &&
689
(task = choose_victim(serv, pool, &state)) != NULL) {
690
send_sig(SIGINT, task, 1);
691
nrservs++;
692
}
693
694
return error;
695
}
696
EXPORT_SYMBOL_GPL(svc_set_num_threads);
697
698
/*
699
* Called from a server thread as it's exiting. Caller must hold the BKL or
700
* the "service mutex", whichever is appropriate for the service.
701
*/
702
void
703
svc_exit_thread(struct svc_rqst *rqstp)
704
{
705
struct svc_serv *serv = rqstp->rq_server;
706
struct svc_pool *pool = rqstp->rq_pool;
707
708
svc_release_buffer(rqstp);
709
kfree(rqstp->rq_resp);
710
kfree(rqstp->rq_argp);
711
kfree(rqstp->rq_auth_data);
712
713
spin_lock_bh(&pool->sp_lock);
714
pool->sp_nrthreads--;
715
list_del(&rqstp->rq_all);
716
spin_unlock_bh(&pool->sp_lock);
717
718
kfree(rqstp);
719
720
/* Release the server */
721
if (serv)
722
svc_destroy(serv);
723
}
724
EXPORT_SYMBOL_GPL(svc_exit_thread);
725
726
/*
727
* Register an "inet" protocol family netid with the local
728
* rpcbind daemon via an rpcbind v4 SET request.
729
*
730
* No netconfig infrastructure is available in the kernel, so
731
* we map IP_ protocol numbers to netids by hand.
732
*
733
* Returns zero on success; a negative errno value is returned
734
* if any error occurs.
735
*/
736
static int __svc_rpcb_register4(const u32 program, const u32 version,
737
const unsigned short protocol,
738
const unsigned short port)
739
{
740
const struct sockaddr_in sin = {
741
.sin_family = AF_INET,
742
.sin_addr.s_addr = htonl(INADDR_ANY),
743
.sin_port = htons(port),
744
};
745
const char *netid;
746
int error;
747
748
switch (protocol) {
749
case IPPROTO_UDP:
750
netid = RPCBIND_NETID_UDP;
751
break;
752
case IPPROTO_TCP:
753
netid = RPCBIND_NETID_TCP;
754
break;
755
default:
756
return -ENOPROTOOPT;
757
}
758
759
error = rpcb_v4_register(program, version,
760
(const struct sockaddr *)&sin, netid);
761
762
/*
763
* User space didn't support rpcbind v4, so retry this
764
* registration request with the legacy rpcbind v2 protocol.
765
*/
766
if (error == -EPROTONOSUPPORT)
767
error = rpcb_register(program, version, protocol, port);
768
769
return error;
770
}
771
772
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
773
/*
774
* Register an "inet6" protocol family netid with the local
775
* rpcbind daemon via an rpcbind v4 SET request.
776
*
777
* No netconfig infrastructure is available in the kernel, so
778
* we map IP_ protocol numbers to netids by hand.
779
*
780
* Returns zero on success; a negative errno value is returned
781
* if any error occurs.
782
*/
783
static int __svc_rpcb_register6(const u32 program, const u32 version,
784
const unsigned short protocol,
785
const unsigned short port)
786
{
787
const struct sockaddr_in6 sin6 = {
788
.sin6_family = AF_INET6,
789
.sin6_addr = IN6ADDR_ANY_INIT,
790
.sin6_port = htons(port),
791
};
792
const char *netid;
793
int error;
794
795
switch (protocol) {
796
case IPPROTO_UDP:
797
netid = RPCBIND_NETID_UDP6;
798
break;
799
case IPPROTO_TCP:
800
netid = RPCBIND_NETID_TCP6;
801
break;
802
default:
803
return -ENOPROTOOPT;
804
}
805
806
error = rpcb_v4_register(program, version,
807
(const struct sockaddr *)&sin6, netid);
808
809
/*
810
* User space didn't support rpcbind version 4, so we won't
811
* use a PF_INET6 listener.
812
*/
813
if (error == -EPROTONOSUPPORT)
814
error = -EAFNOSUPPORT;
815
816
return error;
817
}
818
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
819
820
/*
821
* Register a kernel RPC service via rpcbind version 4.
822
*
823
* Returns zero on success; a negative errno value is returned
824
* if any error occurs.
825
*/
826
static int __svc_register(const char *progname,
827
const u32 program, const u32 version,
828
const int family,
829
const unsigned short protocol,
830
const unsigned short port)
831
{
832
int error = -EAFNOSUPPORT;
833
834
switch (family) {
835
case PF_INET:
836
error = __svc_rpcb_register4(program, version,
837
protocol, port);
838
break;
839
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
840
case PF_INET6:
841
error = __svc_rpcb_register6(program, version,
842
protocol, port);
843
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
844
}
845
846
if (error < 0)
847
printk(KERN_WARNING "svc: failed to register %sv%u RPC "
848
"service (errno %d).\n", progname, version, -error);
849
return error;
850
}
851
852
/**
853
* svc_register - register an RPC service with the local portmapper
854
* @serv: svc_serv struct for the service to register
855
* @family: protocol family of service's listener socket
856
* @proto: transport protocol number to advertise
857
* @port: port to advertise
858
*
859
* Service is registered for any address in the passed-in protocol family
860
*/
861
int svc_register(const struct svc_serv *serv, const int family,
862
const unsigned short proto, const unsigned short port)
863
{
864
struct svc_program *progp;
865
unsigned int i;
866
int error = 0;
867
868
BUG_ON(proto == 0 && port == 0);
869
870
for (progp = serv->sv_program; progp; progp = progp->pg_next) {
871
for (i = 0; i < progp->pg_nvers; i++) {
872
if (progp->pg_vers[i] == NULL)
873
continue;
874
875
dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
876
progp->pg_name,
877
i,
878
proto == IPPROTO_UDP? "udp" : "tcp",
879
port,
880
family,
881
progp->pg_vers[i]->vs_hidden?
882
" (but not telling portmap)" : "");
883
884
if (progp->pg_vers[i]->vs_hidden)
885
continue;
886
887
error = __svc_register(progp->pg_name, progp->pg_prog,
888
i, family, proto, port);
889
if (error < 0)
890
break;
891
}
892
}
893
894
return error;
895
}
896
897
/*
898
* If user space is running rpcbind, it should take the v4 UNSET
899
* and clear everything for this [program, version]. If user space
900
* is running portmap, it will reject the v4 UNSET, but won't have
901
* any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
902
* in this case to clear all existing entries for [program, version].
903
*/
904
static void __svc_unregister(const u32 program, const u32 version,
905
const char *progname)
906
{
907
int error;
908
909
error = rpcb_v4_register(program, version, NULL, "");
910
911
/*
912
* User space didn't support rpcbind v4, so retry this
913
* request with the legacy rpcbind v2 protocol.
914
*/
915
if (error == -EPROTONOSUPPORT)
916
error = rpcb_register(program, version, 0, 0);
917
918
dprintk("svc: %s(%sv%u), error %d\n",
919
__func__, progname, version, error);
920
}
921
922
/*
923
* All netids, bind addresses and ports registered for [program, version]
924
* are removed from the local rpcbind database (if the service is not
925
* hidden) to make way for a new instance of the service.
926
*
927
* The result of unregistration is reported via dprintk for those who want
928
* verification of the result, but is otherwise not important.
929
*/
930
static void svc_unregister(const struct svc_serv *serv)
931
{
932
struct svc_program *progp;
933
unsigned long flags;
934
unsigned int i;
935
936
clear_thread_flag(TIF_SIGPENDING);
937
938
for (progp = serv->sv_program; progp; progp = progp->pg_next) {
939
for (i = 0; i < progp->pg_nvers; i++) {
940
if (progp->pg_vers[i] == NULL)
941
continue;
942
if (progp->pg_vers[i]->vs_hidden)
943
continue;
944
945
dprintk("svc: attempting to unregister %sv%u\n",
946
progp->pg_name, i);
947
__svc_unregister(progp->pg_prog, i, progp->pg_name);
948
}
949
}
950
951
spin_lock_irqsave(&current->sighand->siglock, flags);
952
recalc_sigpending();
953
spin_unlock_irqrestore(&current->sighand->siglock, flags);
954
}
955
956
/*
957
* Printk the given error with the address of the client that caused it.
958
*/
959
static int
960
__attribute__ ((format (printf, 2, 3)))
961
svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
962
{
963
va_list args;
964
int r;
965
char buf[RPC_MAX_ADDRBUFLEN];
966
967
if (!net_ratelimit())
968
return 0;
969
970
printk(KERN_WARNING "svc: %s: ",
971
svc_print_addr(rqstp, buf, sizeof(buf)));
972
973
va_start(args, fmt);
974
r = vprintk(fmt, args);
975
va_end(args);
976
977
return r;
978
}
979
980
/*
981
* Common routine for processing the RPC request.
982
*/
983
static int
984
svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
985
{
986
struct svc_program *progp;
987
struct svc_version *versp = NULL; /* compiler food */
988
struct svc_procedure *procp = NULL;
989
struct svc_serv *serv = rqstp->rq_server;
990
kxdrproc_t xdr;
991
__be32 *statp;
992
u32 prog, vers, proc;
993
__be32 auth_stat, rpc_stat;
994
int auth_res;
995
__be32 *reply_statp;
996
997
rpc_stat = rpc_success;
998
999
if (argv->iov_len < 6*4)
1000
goto err_short_len;
1001
1002
/* Will be turned off only in gss privacy case: */
1003
rqstp->rq_splice_ok = 1;
1004
/* Will be turned off only when NFSv4 Sessions are used */
1005
rqstp->rq_usedeferral = 1;
1006
rqstp->rq_dropme = false;
1007
1008
/* Setup reply header */
1009
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
1010
1011
svc_putu32(resv, rqstp->rq_xid);
1012
1013
vers = svc_getnl(argv);
1014
1015
/* First words of reply: */
1016
svc_putnl(resv, 1); /* REPLY */
1017
1018
if (vers != 2) /* RPC version number */
1019
goto err_bad_rpc;
1020
1021
/* Save position in case we later decide to reject: */
1022
reply_statp = resv->iov_base + resv->iov_len;
1023
1024
svc_putnl(resv, 0); /* ACCEPT */
1025
1026
rqstp->rq_prog = prog = svc_getnl(argv); /* program number */
1027
rqstp->rq_vers = vers = svc_getnl(argv); /* version number */
1028
rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */
1029
1030
progp = serv->sv_program;
1031
1032
for (progp = serv->sv_program; progp; progp = progp->pg_next)
1033
if (prog == progp->pg_prog)
1034
break;
1035
1036
/*
1037
* Decode auth data, and add verifier to reply buffer.
1038
* We do this before anything else in order to get a decent
1039
* auth verifier.
1040
*/
1041
auth_res = svc_authenticate(rqstp, &auth_stat);
1042
/* Also give the program a chance to reject this call: */
1043
if (auth_res == SVC_OK && progp) {
1044
auth_stat = rpc_autherr_badcred;
1045
auth_res = progp->pg_authenticate(rqstp);
1046
}
1047
switch (auth_res) {
1048
case SVC_OK:
1049
break;
1050
case SVC_GARBAGE:
1051
goto err_garbage;
1052
case SVC_SYSERR:
1053
rpc_stat = rpc_system_err;
1054
goto err_bad;
1055
case SVC_DENIED:
1056
goto err_bad_auth;
1057
case SVC_CLOSE:
1058
if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1059
svc_close_xprt(rqstp->rq_xprt);
1060
case SVC_DROP:
1061
goto dropit;
1062
case SVC_COMPLETE:
1063
goto sendit;
1064
}
1065
1066
if (progp == NULL)
1067
goto err_bad_prog;
1068
1069
if (vers >= progp->pg_nvers ||
1070
!(versp = progp->pg_vers[vers]))
1071
goto err_bad_vers;
1072
1073
procp = versp->vs_proc + proc;
1074
if (proc >= versp->vs_nproc || !procp->pc_func)
1075
goto err_bad_proc;
1076
rqstp->rq_procinfo = procp;
1077
1078
/* Syntactic check complete */
1079
serv->sv_stats->rpccnt++;
1080
1081
/* Build the reply header. */
1082
statp = resv->iov_base +resv->iov_len;
1083
svc_putnl(resv, RPC_SUCCESS);
1084
1085
/* Bump per-procedure stats counter */
1086
procp->pc_count++;
1087
1088
/* Initialize storage for argp and resp */
1089
memset(rqstp->rq_argp, 0, procp->pc_argsize);
1090
memset(rqstp->rq_resp, 0, procp->pc_ressize);
1091
1092
/* un-reserve some of the out-queue now that we have a
1093
* better idea of reply size
1094
*/
1095
if (procp->pc_xdrressize)
1096
svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
1097
1098
/* Call the function that processes the request. */
1099
if (!versp->vs_dispatch) {
1100
/* Decode arguments */
1101
xdr = procp->pc_decode;
1102
if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
1103
goto err_garbage;
1104
1105
*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
1106
1107
/* Encode reply */
1108
if (rqstp->rq_dropme) {
1109
if (procp->pc_release)
1110
procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1111
goto dropit;
1112
}
1113
if (*statp == rpc_success &&
1114
(xdr = procp->pc_encode) &&
1115
!xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
1116
dprintk("svc: failed to encode reply\n");
1117
/* serv->sv_stats->rpcsystemerr++; */
1118
*statp = rpc_system_err;
1119
}
1120
} else {
1121
dprintk("svc: calling dispatcher\n");
1122
if (!versp->vs_dispatch(rqstp, statp)) {
1123
/* Release reply info */
1124
if (procp->pc_release)
1125
procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1126
goto dropit;
1127
}
1128
}
1129
1130
/* Check RPC status result */
1131
if (*statp != rpc_success)
1132
resv->iov_len = ((void*)statp) - resv->iov_base + 4;
1133
1134
/* Release reply info */
1135
if (procp->pc_release)
1136
procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1137
1138
if (procp->pc_encode == NULL)
1139
goto dropit;
1140
1141
sendit:
1142
if (svc_authorise(rqstp))
1143
goto dropit;
1144
return 1; /* Caller can now send it */
1145
1146
dropit:
1147
svc_authorise(rqstp); /* doesn't hurt to call this twice */
1148
dprintk("svc: svc_process dropit\n");
1149
return 0;
1150
1151
err_short_len:
1152
svc_printk(rqstp, "short len %Zd, dropping request\n",
1153
argv->iov_len);
1154
1155
goto dropit; /* drop request */
1156
1157
err_bad_rpc:
1158
serv->sv_stats->rpcbadfmt++;
1159
svc_putnl(resv, 1); /* REJECT */
1160
svc_putnl(resv, 0); /* RPC_MISMATCH */
1161
svc_putnl(resv, 2); /* Only RPCv2 supported */
1162
svc_putnl(resv, 2);
1163
goto sendit;
1164
1165
err_bad_auth:
1166
dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
1167
serv->sv_stats->rpcbadauth++;
1168
/* Restore write pointer to location of accept status: */
1169
xdr_ressize_check(rqstp, reply_statp);
1170
svc_putnl(resv, 1); /* REJECT */
1171
svc_putnl(resv, 1); /* AUTH_ERROR */
1172
svc_putnl(resv, ntohl(auth_stat)); /* status */
1173
goto sendit;
1174
1175
err_bad_prog:
1176
dprintk("svc: unknown program %d\n", prog);
1177
serv->sv_stats->rpcbadfmt++;
1178
svc_putnl(resv, RPC_PROG_UNAVAIL);
1179
goto sendit;
1180
1181
err_bad_vers:
1182
svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
1183
vers, prog, progp->pg_name);
1184
1185
serv->sv_stats->rpcbadfmt++;
1186
svc_putnl(resv, RPC_PROG_MISMATCH);
1187
svc_putnl(resv, progp->pg_lovers);
1188
svc_putnl(resv, progp->pg_hivers);
1189
goto sendit;
1190
1191
err_bad_proc:
1192
svc_printk(rqstp, "unknown procedure (%d)\n", proc);
1193
1194
serv->sv_stats->rpcbadfmt++;
1195
svc_putnl(resv, RPC_PROC_UNAVAIL);
1196
goto sendit;
1197
1198
err_garbage:
1199
svc_printk(rqstp, "failed to decode args\n");
1200
1201
rpc_stat = rpc_garbage_args;
1202
err_bad:
1203
serv->sv_stats->rpcbadfmt++;
1204
svc_putnl(resv, ntohl(rpc_stat));
1205
goto sendit;
1206
}
1207
EXPORT_SYMBOL_GPL(svc_process);
1208
1209
/*
1210
* Process the RPC request.
1211
*/
1212
int
1213
svc_process(struct svc_rqst *rqstp)
1214
{
1215
struct kvec *argv = &rqstp->rq_arg.head[0];
1216
struct kvec *resv = &rqstp->rq_res.head[0];
1217
struct svc_serv *serv = rqstp->rq_server;
1218
u32 dir;
1219
1220
/*
1221
* Setup response xdr_buf.
1222
* Initially it has just one page
1223
*/
1224
rqstp->rq_resused = 1;
1225
resv->iov_base = page_address(rqstp->rq_respages[0]);
1226
resv->iov_len = 0;
1227
rqstp->rq_res.pages = rqstp->rq_respages + 1;
1228
rqstp->rq_res.len = 0;
1229
rqstp->rq_res.page_base = 0;
1230
rqstp->rq_res.page_len = 0;
1231
rqstp->rq_res.buflen = PAGE_SIZE;
1232
rqstp->rq_res.tail[0].iov_base = NULL;
1233
rqstp->rq_res.tail[0].iov_len = 0;
1234
1235
rqstp->rq_xid = svc_getu32(argv);
1236
1237
dir = svc_getnl(argv);
1238
if (dir != 0) {
1239
/* direction != CALL */
1240
svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
1241
serv->sv_stats->rpcbadfmt++;
1242
svc_drop(rqstp);
1243
return 0;
1244
}
1245
1246
/* Returns 1 for send, 0 for drop */
1247
if (svc_process_common(rqstp, argv, resv))
1248
return svc_send(rqstp);
1249
else {
1250
svc_drop(rqstp);
1251
return 0;
1252
}
1253
}
1254
1255
#if defined(CONFIG_NFS_V4_1)
1256
/*
1257
* Process a backchannel RPC request that arrived over an existing
1258
* outbound connection
1259
*/
1260
int
1261
bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1262
struct svc_rqst *rqstp)
1263
{
1264
struct kvec *argv = &rqstp->rq_arg.head[0];
1265
struct kvec *resv = &rqstp->rq_res.head[0];
1266
1267
/* Build the svc_rqst used by the common processing routine */
1268
rqstp->rq_xprt = serv->sv_bc_xprt;
1269
rqstp->rq_xid = req->rq_xid;
1270
rqstp->rq_prot = req->rq_xprt->prot;
1271
rqstp->rq_server = serv;
1272
1273
rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
1274
memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
1275
memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
1276
memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
1277
1278
/* reset result send buffer "put" position */
1279
resv->iov_len = 0;
1280
1281
if (rqstp->rq_prot != IPPROTO_TCP) {
1282
printk(KERN_ERR "No support for Non-TCP transports!\n");
1283
BUG();
1284
}
1285
1286
/*
1287
* Skip the next two words because they've already been
1288
* processed in the trasport
1289
*/
1290
svc_getu32(argv); /* XID */
1291
svc_getnl(argv); /* CALLDIR */
1292
1293
/* Returns 1 for send, 0 for drop */
1294
if (svc_process_common(rqstp, argv, resv)) {
1295
memcpy(&req->rq_snd_buf, &rqstp->rq_res,
1296
sizeof(req->rq_snd_buf));
1297
return bc_send(req);
1298
} else {
1299
/* Nothing to do to drop request */
1300
return 0;
1301
}
1302
}
1303
EXPORT_SYMBOL(bc_svc_process);
1304
#endif /* CONFIG_NFS_V4_1 */
1305
1306
/*
1307
* Return (transport-specific) limit on the rpc payload.
1308
*/
1309
u32 svc_max_payload(const struct svc_rqst *rqstp)
1310
{
1311
u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
1312
1313
if (rqstp->rq_server->sv_max_payload < max)
1314
max = rqstp->rq_server->sv_max_payload;
1315
return max;
1316
}
1317
EXPORT_SYMBOL_GPL(svc_max_payload);
1318
1319