Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/rpc/clnt_nl.c
39478 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2025 Gleb Smirnoff <[email protected]>
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
* SUCH DAMAGE.
26
*/
27
28
#include <sys/param.h>
29
#include <sys/lock.h>
30
#include <sys/kernel.h>
31
#include <sys/malloc.h>
32
#include <sys/mutex.h>
33
#include <sys/rwlock.h>
34
#include <sys/mbuf.h>
35
#include <sys/priv.h>
36
#include <sys/proc.h>
37
#include <sys/queue.h>
38
#include <sys/tree.h>
39
40
#include <rpc/rpc.h>
41
#include <rpc/rpc_com.h>
42
#include <rpc/krpc.h>
43
#include <rpc/clnt_nl.h>
44
45
#include <netlink/netlink.h>
46
#include <netlink/netlink_ctl.h>
47
#include <netlink/netlink_generic.h>
48
49
/*
50
* Kernel RPC client over netlink(4), where kernel is RPC client and an
51
* application is a server. See svc_nl.c in the libc/rpc as the counterpart.
52
*
53
* The module registers itself within generic netlink families list under name
54
* "rpc". Every new client creates a new multicast group belonging to this
55
* family. When a client starts RPC, the module will multicast the call to
56
* potential netlink listeners and sleep/retry until receiving a result. The
57
* framing of the request:
58
*
59
* [netlink message header, type = "rpc" ID, seq == xid]
60
* [generic netlink header, cmd = RPCNL_REQUEST]
61
* [netlink attribute RPCNL_REQUEST_GROUP]
62
* [group ID]
63
* [netlink attribute RPCNL_REQUEST_BODY]
64
* [XDR encoded payload]
65
*
66
* Note: the generic netlink header and attributes aren't really necessary
67
* for successful communication, since the netlink multicast membership already
68
* guarantees us all needed filtering. The working prototype was putting the
69
* XDR encoded payload right after netlink message header. But we will provide
70
* this framing to allow for any future extensions.
71
*
72
* The expected RPC result from the userland shall be framed like this:
73
*
74
* [netlink message header, type = "rpc" ID, seq == xid]
75
* [generic netlink header, cmd = RPCNL_REPLY]
76
* [netlink attribute RPCNL_REPLY_GROUP]
77
* [group ID]
78
* [netlink attribute RPCNL_REPLY_BODY]
79
* [XDR encoded payload]
80
*
81
* Disclaimer: has been designed and tested only for the NFS related kernel
82
* RPC clients: kgssapi, RPC binding for NLM, TLS client and TLS server.
83
*
84
* Caveats:
85
* 1) Now the privilege checking is hardcoded to PRIV_NFS_DAEMON at the netlink
86
* command and multicast layers. If any new client in addition to NFS
87
* service emerges, we may want to rewrite privelege checking at the client
88
* level somehow.
89
* 2) Since we are using netlink attribute for the payload, payload size is
90
* limited to UINT16_MAX. Today it is smaller than RPC_MAXDATASIZE of 9000.
91
* What if a future RPC wants more?
92
*/
93
94
static enum clnt_stat clnt_nl_call(CLIENT *, struct rpc_callextra *,
95
rpcproc_t, struct mbuf *, struct mbuf **, struct timeval);
96
static void clnt_nl_close(CLIENT *);
97
static void clnt_nl_destroy(CLIENT *);
98
static bool_t clnt_nl_control(CLIENT *, u_int, void *);
99
100
static const struct clnt_ops clnt_nl_ops = {
101
.cl_call = clnt_nl_call,
102
.cl_close = clnt_nl_close,
103
.cl_destroy = clnt_nl_destroy,
104
.cl_control = clnt_nl_control,
105
};
106
107
static int clnt_nl_reply(struct nlmsghdr *, struct nl_pstate *);
108
109
static const struct genl_cmd clnt_cmds[] = {
110
{
111
.cmd_num = RPCNL_REPLY,
112
.cmd_name = "request",
113
.cmd_cb = clnt_nl_reply,
114
.cmd_priv = PRIV_NFS_DAEMON,
115
},
116
};
117
118
struct nl_reply_parsed {
119
uint32_t group;
120
struct nlattr *data;
121
};
122
static const struct nlattr_parser rpcnl_attr_parser[] = {
123
#define OUT(field) offsetof(struct nl_reply_parsed, field)
124
{ .type = RPCNL_REPLY_GROUP, .off = OUT(group), .cb = nlattr_get_uint32 },
125
{ .type = RPCNL_REPLY_BODY, .off = OUT(data), .cb = nlattr_get_nla },
126
#undef OUT
127
};
128
NL_DECLARE_PARSER(rpcnl_parser, struct genlmsghdr, nlf_p_empty,
129
rpcnl_attr_parser);
130
131
struct nl_data {
132
struct mtx nl_lock;
133
RB_ENTRY(nl_data) nl_tree;
134
TAILQ_HEAD(, ct_request) nl_pending;
135
uint32_t nl_xid;
136
u_int nl_mpos;
137
u_int nl_authlen;
138
u_int nl_retries;
139
struct {
140
struct genlmsghdr ghdr;
141
struct nlattr gattr;
142
uint32_t group;
143
} nl_hdr; /* pre-initialized header */
144
char nl_mcallc[MCALL_MSG_SIZE]; /* marshalled callmsg */
145
/* msleep(9) arguments */
146
const char * nl_wchan;
147
int nl_prio;
148
int nl_timo;
149
};
150
151
static RB_HEAD(nl_data_t, nl_data) rpcnl_clients;
152
static int32_t
153
nl_data_compare(const struct nl_data *a, const struct nl_data *b)
154
{
155
return ((int32_t)(a->nl_hdr.group - b->nl_hdr.group));
156
}
157
RB_GENERATE_STATIC(nl_data_t, nl_data, nl_tree, nl_data_compare);
158
static struct rwlock rpcnl_global_lock;
159
160
static const char rpcnl_family_name[] = "rpc";
161
static uint16_t rpcnl_family_id;
162
163
void
164
rpcnl_init(void)
165
{
166
bool rv __diagused;
167
168
rpcnl_family_id = genl_register_family(rpcnl_family_name, 0, 1, 1);
169
MPASS(rpcnl_family_id != 0);
170
rv = genl_register_cmds(rpcnl_family_id, clnt_cmds, nitems(clnt_cmds));
171
MPASS(rv);
172
rw_init(&rpcnl_global_lock, rpcnl_family_name);
173
}
174
175
CLIENT *
176
client_nl_create(const char *name, const rpcprog_t program,
177
const rpcvers_t version)
178
{
179
CLIENT *cl;
180
struct nl_data *nl;
181
struct timeval now;
182
struct rpc_msg call_msg;
183
XDR xdrs;
184
uint32_t group;
185
bool rv __diagused;
186
187
if ((group = genl_register_group(rpcnl_family_id, name)) == 0)
188
return (NULL);
189
190
nl = malloc(sizeof(*nl), M_RPC, M_WAITOK);
191
*nl = (struct nl_data){
192
.nl_pending = TAILQ_HEAD_INITIALIZER(nl->nl_pending),
193
.nl_hdr = {
194
.ghdr.cmd = RPCNL_REQUEST,
195
.gattr.nla_type = RPCNL_REQUEST_GROUP,
196
.gattr.nla_len = sizeof(struct nlattr) +
197
sizeof(uint32_t),
198
.group = group,
199
},
200
.nl_wchan = rpcnl_family_name,
201
.nl_prio = PSOCK | PCATCH,
202
.nl_timo = 60 * hz,
203
.nl_retries = 1,
204
};
205
mtx_init(&nl->nl_lock, "rpc_clnt_nl", NULL, MTX_DEF);
206
207
/*
208
* Initialize and pre-serialize the static part of the call message.
209
*/
210
getmicrotime(&now);
211
nl->nl_xid = __RPC_GETXID(&now);
212
call_msg = (struct rpc_msg ){
213
.rm_xid = nl->nl_xid,
214
.rm_direction = CALL,
215
.rm_call = {
216
.cb_rpcvers = RPC_MSG_VERSION,
217
.cb_prog = (uint32_t)program,
218
.cb_vers = (uint32_t)version,
219
},
220
};
221
222
cl = malloc(sizeof(*cl), M_RPC, M_WAITOK);
223
*cl = (CLIENT){
224
.cl_refs = 1,
225
.cl_ops = &clnt_nl_ops,
226
.cl_private = nl,
227
.cl_auth = authnone_create(),
228
};
229
230
/*
231
* Experimentally learn how many bytes does procedure name plus
232
* authnone header needs. Use nl_mcallc as temporary scratch space.
233
*/
234
xdrmem_create(&xdrs, nl->nl_mcallc, MCALL_MSG_SIZE, XDR_ENCODE);
235
rv = xdr_putint32(&xdrs, &(rpcproc_t){0});
236
MPASS(rv);
237
rv = AUTH_MARSHALL(cl->cl_auth, 0, &xdrs, NULL);
238
MPASS(rv);
239
nl->nl_authlen = xdr_getpos(&xdrs);
240
xdr_destroy(&xdrs);
241
242
xdrmem_create(&xdrs, nl->nl_mcallc, MCALL_MSG_SIZE, XDR_ENCODE);
243
rv = xdr_callhdr(&xdrs, &call_msg);
244
MPASS(rv);
245
nl->nl_mpos = xdr_getpos(&xdrs);
246
xdr_destroy(&xdrs);
247
248
rw_wlock(&rpcnl_global_lock);
249
RB_INSERT(nl_data_t, &rpcnl_clients, nl);
250
rw_wunlock(&rpcnl_global_lock);
251
252
return (cl);
253
}
254
255
static enum clnt_stat
256
clnt_nl_call(CLIENT *cl, struct rpc_callextra *ext, rpcproc_t proc,
257
struct mbuf *args, struct mbuf **resultsp, struct timeval utimeout)
258
{
259
struct nl_writer nw;
260
struct nl_data *nl = cl->cl_private;
261
struct ct_request *cr;
262
struct rpc_err *errp, err;
263
enum clnt_stat stat;
264
AUTH *auth;
265
XDR xdrs;
266
void *mem;
267
uint32_t len, xlen;
268
u_int retries = 0;
269
bool rv __diagused;
270
271
CURVNET_ASSERT_SET();
272
273
cr = malloc(sizeof(struct ct_request), M_RPC, M_WAITOK);
274
*cr = (struct ct_request){
275
.cr_xid = atomic_fetchadd_32(&nl->nl_xid, 1),
276
.cr_error = ETIMEDOUT,
277
#ifdef VIMAGE
278
.cr_vnet = curvnet,
279
#endif
280
};
281
282
if (ext) {
283
auth = ext->rc_auth;
284
errp = &ext->rc_err;
285
len = RPC_MAXDATASIZE; /* XXXGL: can be improved */
286
} else {
287
auth = cl->cl_auth;
288
errp = &err;
289
len = nl->nl_mpos + nl->nl_authlen + m_length(args, NULL);
290
}
291
292
mem = malloc(len, M_RPC, M_WAITOK);
293
retry:
294
xdrmem_create(&xdrs, mem, len, XDR_ENCODE);
295
296
rv = xdr_putbytes(&xdrs, nl->nl_mcallc, nl->nl_mpos);
297
MPASS(rv);
298
rv = xdr_putint32(&xdrs, &proc);
299
MPASS(rv);
300
if (!AUTH_MARSHALL(auth, cr->cr_xid, &xdrs, args)) {
301
stat = errp->re_status = RPC_CANTENCODEARGS;
302
goto out;
303
} else
304
stat = errp->re_status = RPC_SUCCESS;
305
306
/* XXX: XID is the first thing in the request. */
307
*(uint32_t *)mem = htonl(cr->cr_xid);
308
309
xlen = xdr_getpos(&xdrs);
310
rv = nl_writer_group(&nw, xlen, NETLINK_GENERIC, nl->nl_hdr.group,
311
PRIV_NFS_DAEMON, true);
312
MPASS(rv);
313
314
rv = nlmsg_add(&nw, 0, cr->cr_xid, rpcnl_family_id, 0,
315
sizeof(nl->nl_hdr) + sizeof(struct nlattr) + xlen);
316
MPASS(rv);
317
318
memcpy(nlmsg_reserve_data_raw(&nw, sizeof(nl->nl_hdr)), &nl->nl_hdr,
319
sizeof(nl->nl_hdr));
320
321
rv = nlattr_add(&nw, RPCNL_REQUEST_BODY, xlen, mem);
322
MPASS(rv);
323
324
rv = nlmsg_end(&nw);
325
MPASS(rv);
326
327
mtx_lock(&nl->nl_lock);
328
TAILQ_INSERT_TAIL(&nl->nl_pending, cr, cr_link);
329
mtx_unlock(&nl->nl_lock);
330
331
nlmsg_flush(&nw);
332
333
mtx_lock(&nl->nl_lock);
334
if (__predict_true(cr->cr_error == ETIMEDOUT))
335
(void)msleep(cr, &nl->nl_lock, nl->nl_prio, nl->nl_wchan,
336
(nl->nl_timo ? nl->nl_timo : tvtohz(&utimeout)) /
337
nl->nl_retries);
338
TAILQ_REMOVE(&nl->nl_pending, cr, cr_link);
339
mtx_unlock(&nl->nl_lock);
340
341
if (__predict_true(cr->cr_error == 0)) {
342
struct rpc_msg reply_msg = {
343
.acpted_rply.ar_verf.oa_base = cr->cr_verf,
344
.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void,
345
};
346
347
MPASS(cr->cr_mrep);
348
if (ext && ext->rc_feedback)
349
ext->rc_feedback(FEEDBACK_OK, proc,
350
ext->rc_feedback_arg);
351
xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE);
352
rv = xdr_replymsg(&xdrs, &reply_msg);
353
if (__predict_false(!rv)) {
354
stat = errp->re_status = RPC_CANTDECODERES;
355
goto out;
356
}
357
if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
358
(reply_msg.acpted_rply.ar_stat == SUCCESS)) {
359
struct mbuf *results;
360
361
stat = errp->re_status = RPC_SUCCESS;
362
results = xdrmbuf_getall(&xdrs);
363
if (__predict_true(AUTH_VALIDATE(auth, cr->cr_xid,
364
&reply_msg.acpted_rply.ar_verf, &results))) {
365
MPASS(results);
366
*resultsp = results;
367
/* end successful completion */
368
} else {
369
stat = errp->re_status = RPC_AUTHERROR;
370
errp->re_why = AUTH_INVALIDRESP;
371
}
372
} else {
373
stat = _seterr_reply(&reply_msg, errp);
374
}
375
xdr_destroy(&xdrs); /* frees cr->cr_mrep */
376
} else {
377
MPASS(cr->cr_mrep == NULL);
378
errp->re_errno = cr->cr_error;
379
stat = errp->re_status = RPC_CANTRECV;
380
if (cr->cr_error == ETIMEDOUT && ++retries < nl->nl_retries) {
381
cr->cr_xid = atomic_fetchadd_32(&nl->nl_xid, 1);
382
goto retry;
383
}
384
}
385
out:
386
free(cr, M_RPC);
387
free(mem, M_RPC);
388
389
return (stat);
390
}
391
392
static int
393
clnt_nl_reply(struct nlmsghdr *hdr, struct nl_pstate *npt)
394
{
395
struct nl_reply_parsed attrs = {};
396
struct nl_data *nl;
397
struct ct_request *cr;
398
struct mchain mc;
399
int error;
400
401
CURVNET_ASSERT_SET();
402
403
if ((error = nl_parse_nlmsg(hdr, &rpcnl_parser, npt, &attrs)) != 0)
404
return (error);
405
if (attrs.data == NULL)
406
return (EINVAL);
407
408
error = mc_get(&mc, NLA_DATA_LEN(attrs.data), M_WAITOK, MT_DATA, 0);
409
MPASS(error == 0);
410
m_copyback(mc_first(&mc), 0, NLA_DATA_LEN(attrs.data),
411
NLA_DATA(attrs.data));
412
413
rw_rlock(&rpcnl_global_lock);
414
if ((nl = RB_FIND(nl_data_t, &rpcnl_clients,
415
&(struct nl_data){ .nl_hdr.group = attrs.group })) == NULL) {
416
rw_runlock(&rpcnl_global_lock);
417
mc_freem(&mc);
418
return (EPROGUNAVAIL);
419
};
420
mtx_lock(&nl->nl_lock);
421
rw_runlock(&rpcnl_global_lock);
422
423
TAILQ_FOREACH(cr, &nl->nl_pending, cr_link)
424
if (cr->cr_xid == hdr->nlmsg_seq
425
#ifdef VIMAGE
426
&& cr->cr_vnet == curvnet
427
#endif
428
)
429
break;
430
if (cr == NULL) {
431
mtx_unlock(&nl->nl_lock);
432
mc_freem(&mc);
433
return (EPROCUNAVAIL);
434
}
435
cr->cr_mrep = mc_first(&mc);
436
cr->cr_error = 0;
437
wakeup(cr);
438
mtx_unlock(&nl->nl_lock);
439
440
return (0);
441
}
442
443
static void
444
clnt_nl_close(CLIENT *cl)
445
{
446
struct nl_data *nl = cl->cl_private;
447
struct ct_request *cr;
448
449
mtx_lock(&nl->nl_lock);
450
TAILQ_FOREACH(cr, &nl->nl_pending, cr_link) {
451
cr->cr_error = ESHUTDOWN;
452
wakeup(cr);
453
}
454
mtx_unlock(&nl->nl_lock);
455
}
456
457
static void
458
clnt_nl_destroy(CLIENT *cl)
459
{
460
struct nl_data *nl = cl->cl_private;
461
462
MPASS(TAILQ_EMPTY(&nl->nl_pending));
463
464
genl_unregister_group(rpcnl_family_id, nl->nl_hdr.group);
465
rw_wlock(&rpcnl_global_lock);
466
RB_REMOVE(nl_data_t, &rpcnl_clients, nl);
467
rw_wlock(&rpcnl_global_lock);
468
469
mtx_destroy(&nl->nl_lock);
470
free(nl, M_RPC);
471
free(cl, M_RPC);
472
}
473
474
static bool_t
475
clnt_nl_control(CLIENT *cl, u_int request, void *info)
476
{
477
struct nl_data *nl = (struct nl_data *)cl->cl_private;
478
479
mtx_lock(&nl->nl_lock);
480
switch (request) {
481
case CLSET_TIMEOUT:
482
nl->nl_timo = tvtohz((struct timeval *)info);
483
break;
484
485
case CLGET_TIMEOUT:
486
*(struct timeval *)info =
487
(struct timeval){.tv_sec = nl->nl_timo / hz};
488
break;
489
490
case CLSET_RETRIES:
491
nl->nl_retries = *(u_int *)info;
492
break;
493
494
case CLSET_WAITCHAN:
495
nl->nl_wchan = (const char *)info;
496
break;
497
498
case CLGET_WAITCHAN:
499
*(const char **)info = nl->nl_wchan;
500
break;
501
502
case CLSET_INTERRUPTIBLE:
503
if (*(int *)info)
504
nl->nl_prio |= PCATCH;
505
else
506
nl->nl_prio &= ~PCATCH;
507
break;
508
509
case CLGET_INTERRUPTIBLE:
510
*(int *)info = (nl->nl_prio & PCATCH) ? TRUE : FALSE;
511
break;
512
513
default:
514
mtx_unlock(&nl->nl_lock);
515
printf("%s: unsupported request %u\n", __func__, request);
516
return (FALSE);
517
}
518
519
mtx_unlock(&nl->nl_lock);
520
return (TRUE);
521
}
522
523