Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/fs/nfs/nfs_commonkrpc.c
39483 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1989, 1991, 1993, 1995
5
* The Regents of the University of California. All rights reserved.
6
*
7
* This code is derived from software contributed to Berkeley by
8
* Rick Macklem at The University of Guelph.
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
18
* 3. Neither the name of the University nor the names of its contributors
19
* may be used to endorse or promote products derived from this software
20
* without specific prior written permission.
21
*
22
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
* SUCH DAMAGE.
33
*
34
*/
35
36
#include <sys/cdefs.h>
37
/*
38
* Socket operations for use by nfs
39
*/
40
41
#include "opt_kgssapi.h"
42
#include "opt_nfs.h"
43
44
#include <sys/param.h>
45
#include <sys/systm.h>
46
#include <sys/kernel.h>
47
#include <sys/limits.h>
48
#include <sys/lock.h>
49
#include <sys/malloc.h>
50
#include <sys/mbuf.h>
51
#include <sys/mount.h>
52
#include <sys/mutex.h>
53
#include <sys/proc.h>
54
#include <sys/signalvar.h>
55
#include <sys/syscallsubr.h>
56
#include <sys/sysctl.h>
57
#include <sys/syslog.h>
58
#include <sys/vnode.h>
59
60
#include <rpc/rpc.h>
61
#include <rpc/krpc.h>
62
63
#include <kgssapi/krb5/kcrypto.h>
64
65
#include <fs/nfs/nfsport.h>
66
67
#ifdef KDTRACE_HOOKS
68
#include <sys/dtrace_bsd.h>
69
70
dtrace_nfsclient_nfs23_start_probe_func_t
71
dtrace_nfscl_nfs234_start_probe;
72
73
dtrace_nfsclient_nfs23_done_probe_func_t
74
dtrace_nfscl_nfs234_done_probe;
75
76
/*
77
* Registered probes by RPC type.
78
*/
79
uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1];
80
uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1];
81
82
uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1];
83
uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1];
84
85
uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1];
86
uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1];
87
#endif
88
89
NFSSTATESPINLOCK;
90
NFSREQSPINLOCK;
91
NFSDLOCKMUTEX;
92
NFSCLSTATEMUTEX;
93
extern struct nfsstatsv1 nfsstatsv1;
94
extern struct nfsreqhead nfsd_reqq;
95
extern int nfscl_ticks;
96
extern void (*ncl_call_invalcaches)(struct vnode *);
97
extern int nfs_numnfscbd;
98
extern int nfscl_debuglevel;
99
extern int nfsrv_lease;
100
101
SVCPOOL *nfscbd_pool;
102
int nfs_bufpackets = 4;
103
static int nfsrv_gsscallbackson = 0;
104
static int nfs_reconnects;
105
static int nfs3_jukebox_delay = 10;
106
static int nfs_skip_wcc_data_onerr = 1;
107
static int nfs_dsretries = 2;
108
static struct timespec nfs_trylater_max = {
109
.tv_sec = NFS_TRYLATERDEL,
110
.tv_nsec = 0,
111
};
112
113
SYSCTL_DECL(_vfs_nfs);
114
115
SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
116
"Buffer reservation size 2 < x < 64");
117
SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
118
"Number of times the nfs client has had to reconnect");
119
SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
120
"Number of seconds to delay a retry after receiving EJUKEBOX");
121
SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
122
"Disable weak cache consistency checking when server returns an error");
123
SYSCTL_INT(_vfs_nfs, OID_AUTO, dsretries, CTLFLAG_RW, &nfs_dsretries, 0,
124
"Number of retries for a DS RPC before failure");
125
126
static void nfs_down(struct nfsmount *, struct thread *, const char *,
127
int, int);
128
static void nfs_up(struct nfsmount *, struct thread *, const char *,
129
int, int);
130
static int nfs_msg(struct thread *, const char *, const char *, int);
131
132
struct nfs_cached_auth {
133
int ca_refs; /* refcount, including 1 from the cache */
134
uid_t ca_uid; /* uid that corresponds to this auth */
135
AUTH *ca_auth; /* RPC auth handle */
136
};
137
138
static int nfsv2_procid[NFS_V3NPROCS] = {
139
NFSV2PROC_NULL,
140
NFSV2PROC_GETATTR,
141
NFSV2PROC_SETATTR,
142
NFSV2PROC_LOOKUP,
143
NFSV2PROC_NOOP,
144
NFSV2PROC_READLINK,
145
NFSV2PROC_READ,
146
NFSV2PROC_WRITE,
147
NFSV2PROC_CREATE,
148
NFSV2PROC_MKDIR,
149
NFSV2PROC_SYMLINK,
150
NFSV2PROC_CREATE,
151
NFSV2PROC_REMOVE,
152
NFSV2PROC_RMDIR,
153
NFSV2PROC_RENAME,
154
NFSV2PROC_LINK,
155
NFSV2PROC_READDIR,
156
NFSV2PROC_NOOP,
157
NFSV2PROC_STATFS,
158
NFSV2PROC_NOOP,
159
NFSV2PROC_NOOP,
160
NFSV2PROC_NOOP,
161
};
162
163
/*
164
* This static array indicates that a NFSv4 RPC should use
165
* RPCSEC_GSS, if the mount indicates that via sec=krb5[ip].
166
* System RPCs that do not use file handles will be false
167
* in this array so that they will use AUTH_SYS when the
168
* "syskrb5" mount option is specified, along with
169
* "sec=krb5[ip]".
170
*/
171
static bool nfscl_use_gss[NFSV42_NPROCS] = {
172
true,
173
true,
174
true,
175
true,
176
true,
177
true,
178
true,
179
true,
180
true,
181
true,
182
true,
183
true,
184
true,
185
true,
186
true,
187
true,
188
true,
189
true,
190
true,
191
true,
192
true,
193
true,
194
true,
195
false, /* SetClientID */
196
false, /* SetClientIDConfirm */
197
true,
198
true,
199
true,
200
true,
201
true,
202
true,
203
true,
204
false, /* Renew */
205
true,
206
false, /* ReleaseLockOwn */
207
true,
208
true,
209
true,
210
true,
211
true,
212
true,
213
false, /* ExchangeID */
214
false, /* CreateSession */
215
false, /* DestroySession */
216
false, /* DestroyClientID */
217
false, /* FreeStateID */
218
true,
219
true,
220
true,
221
true,
222
false, /* ReclaimComplete */
223
true,
224
true,
225
true,
226
true,
227
true,
228
true,
229
true,
230
true,
231
true,
232
true,
233
true,
234
true,
235
true,
236
true,
237
false, /* BindConnectionToSession */
238
true,
239
true,
240
true,
241
true,
242
true,
243
};
244
245
/*
246
* Initialize sockets and congestion for a new NFS connection.
247
* We do not free the sockaddr if error.
248
* Which arguments are set to NULL indicate what kind of call it is.
249
* cred == NULL --> a call to connect to a pNFS DS
250
* nmp == NULL --> indicates an upcall to userland or a NFSv4.0 callback
251
*/
252
int
253
newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
254
struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls,
255
struct __rpc_client **clipp)
256
{
257
int rcvreserve, sndreserve;
258
int pktscale, pktscalesav;
259
struct sockaddr *saddr;
260
struct ucred *origcred;
261
CLIENT *client;
262
struct netconfig *nconf;
263
struct socket *so;
264
int one = 1, retries, error = 0;
265
struct thread *td = curthread;
266
SVCXPRT *xprt;
267
struct timeval timo;
268
uint64_t tval;
269
270
/*
271
* We need to establish the socket using the credentials of
272
* the mountpoint. Some parts of this process (such as
273
* sobind() and soconnect()) will use the curent thread's
274
* credential instead of the socket credential. To work
275
* around this, temporarily change the current thread's
276
* credential to that of the mountpoint.
277
*
278
* XXX: It would be better to explicitly pass the correct
279
* credential to sobind() and soconnect().
280
*/
281
origcred = td->td_ucred;
282
283
/*
284
* Use the credential in nr_cred, if not NULL.
285
*/
286
if (nrp->nr_cred != NULL)
287
td->td_ucred = nrp->nr_cred;
288
else
289
td->td_ucred = cred;
290
saddr = nrp->nr_nam;
291
292
if (saddr->sa_family == AF_INET)
293
if (nrp->nr_sotype == SOCK_DGRAM)
294
nconf = getnetconfigent("udp");
295
else
296
nconf = getnetconfigent("tcp");
297
else
298
if (nrp->nr_sotype == SOCK_DGRAM)
299
nconf = getnetconfigent("udp6");
300
else
301
nconf = getnetconfigent("tcp6");
302
303
pktscale = nfs_bufpackets;
304
if (pktscale < 2)
305
pktscale = 2;
306
if (pktscale > 64)
307
pktscale = 64;
308
pktscalesav = pktscale;
309
/*
310
* soreserve() can fail if sb_max is too small, so shrink pktscale
311
* and try again if there is an error.
312
* Print a log message suggesting increasing sb_max.
313
* Creating a socket and doing this is necessary since, if the
314
* reservation sizes are too large and will make soreserve() fail,
315
* the connection will work until a large send is attempted and
316
* then it will loop in the krpc code.
317
*/
318
so = NULL;
319
saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *);
320
error = socreate(saddr->sa_family, &so, nrp->nr_sotype,
321
nrp->nr_soproto, td->td_ucred, td);
322
if (error != 0)
323
goto out;
324
do {
325
if (error != 0 && pktscale > 2) {
326
if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
327
pktscale == pktscalesav) {
328
/*
329
* Suggest vfs.nfs.bufpackets * maximum RPC message,
330
* adjusted for the sb_max->sb_max_adj conversion of
331
* MCLBYTES / (MSIZE + MCLBYTES) as the minimum setting
332
* for kern.ipc.maxsockbuf.
333
*/
334
tval = (NFS_MAXBSIZE + NFS_MAXXDR) * nfs_bufpackets;
335
tval *= MSIZE + MCLBYTES;
336
tval += MCLBYTES - 1; /* Round up divide by MCLBYTES. */
337
tval /= MCLBYTES;
338
printf("Consider increasing kern.ipc.maxsockbuf to a "
339
"minimum of %ju to support %ubyte NFS I/O\n",
340
(uintmax_t)tval, NFS_MAXBSIZE);
341
}
342
pktscale--;
343
}
344
if (nrp->nr_sotype == SOCK_DGRAM) {
345
if (nmp != NULL) {
346
sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
347
pktscale;
348
rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
349
pktscale;
350
} else {
351
sndreserve = rcvreserve = 1024 * pktscale;
352
}
353
} else {
354
if (nrp->nr_sotype != SOCK_STREAM)
355
panic("nfscon sotype");
356
if (nmp != NULL) {
357
sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR) *
358
pktscale;
359
rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR) *
360
pktscale;
361
} else {
362
sndreserve = rcvreserve = 1024 * pktscale;
363
}
364
}
365
error = soreserve(so, sndreserve, rcvreserve);
366
if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
367
pktscale <= 2)
368
printf("Must increase kern.ipc.maxsockbuf or reduce"
369
" rsize, wsize\n");
370
} while (error != 0 && pktscale > 2);
371
soclose(so);
372
if (error != 0)
373
goto out;
374
375
client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog,
376
nrp->nr_vers, sndreserve, rcvreserve);
377
CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq");
378
if (nmp != NULL) {
379
if ((nmp->nm_flag & NFSMNT_INT))
380
CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
381
if ((nmp->nm_flag & NFSMNT_RESVPORT))
382
CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
383
if (NFSHASTLS(nmp)) {
384
CLNT_CONTROL(client, CLSET_TLS, &one);
385
if (nmp->nm_tlscertname != NULL)
386
CLNT_CONTROL(client, CLSET_TLSCERTNAME,
387
nmp->nm_tlscertname);
388
}
389
if (NFSHASSOFT(nmp)) {
390
if (nmp->nm_sotype == SOCK_DGRAM)
391
/*
392
* For UDP, the large timeout for a reconnect
393
* will be set to "nm_retry * nm_timeo / 2", so
394
* we only want to do 2 reconnect timeout
395
* retries.
396
*/
397
retries = 2;
398
else
399
retries = nmp->nm_retry;
400
} else
401
retries = INT_MAX;
402
if (NFSHASNFSV4N(nmp)) {
403
if (cred != NULL) {
404
if (NFSHASSOFT(nmp)) {
405
/*
406
* This should be a DS mount.
407
* Use CLSET_TIMEOUT to set the timeout
408
* for connections to DSs instead of
409
* specifying a timeout on each RPC.
410
* This is done so that SO_SNDTIMEO
411
* is set on the TCP socket as well
412
* as specifying a time limit when
413
* waiting for an RPC reply. Useful
414
* if the send queue for the TCP
415
* connection has become constipated,
416
* due to a failed DS.
417
* The choice of lease_duration / 4 is
418
* fairly arbitrary, but seems to work
419
* ok, with a lower bound of 10sec.
420
*/
421
timo.tv_sec = nfsrv_lease / 4;
422
if (timo.tv_sec < 10)
423
timo.tv_sec = 10;
424
timo.tv_usec = 0;
425
CLNT_CONTROL(client, CLSET_TIMEOUT,
426
&timo);
427
}
428
/*
429
* Make sure the nfscbd_pool doesn't get
430
* destroyed while doing this.
431
*/
432
NFSD_LOCK();
433
if (nfs_numnfscbd > 0) {
434
nfs_numnfscbd++;
435
NFSD_UNLOCK();
436
xprt = svc_vc_create_backchannel(
437
nfscbd_pool);
438
CLNT_CONTROL(client, CLSET_BACKCHANNEL,
439
xprt);
440
NFSD_LOCK();
441
nfs_numnfscbd--;
442
if (nfs_numnfscbd == 0)
443
wakeup(&nfs_numnfscbd);
444
}
445
NFSD_UNLOCK();
446
} else {
447
/*
448
* cred == NULL for a DS connect.
449
* For connects to a DS, set a retry limit
450
* so that failed DSs will be detected.
451
* This is ok for NFSv4.1, since a DS does
452
* not maintain open/lock state and is the
453
* only case where using a "soft" mount is
454
* recommended for NFSv4.
455
* For mounts from the MDS to DS, this is done
456
* via mount options, but that is not the case
457
* here. The retry limit here can be adjusted
458
* via the sysctl vfs.nfs.dsretries.
459
* See the comment above w.r.t. timeout.
460
*/
461
timo.tv_sec = nfsrv_lease / 4;
462
if (timo.tv_sec < 10)
463
timo.tv_sec = 10;
464
timo.tv_usec = 0;
465
CLNT_CONTROL(client, CLSET_TIMEOUT, &timo);
466
retries = nfs_dsretries;
467
}
468
}
469
} else {
470
/*
471
* Three cases:
472
* - Null RPC callback to client
473
* - Non-Null RPC callback to client, wait a little longer
474
* - upcalls to nfsuserd and gssd (clp == NULL)
475
*/
476
if (callback_retry_mult == 0) {
477
retries = NFSV4_UPCALLRETRY;
478
CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
479
} else {
480
retries = NFSV4_CALLBACKRETRY * callback_retry_mult;
481
}
482
if (dotls)
483
CLNT_CONTROL(client, CLSET_TLS, &one);
484
}
485
CLNT_CONTROL(client, CLSET_RETRIES, &retries);
486
487
if (nmp != NULL) {
488
/*
489
* For UDP, there are 2 timeouts:
490
* - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
491
* that does a retransmit of an RPC request using the same
492
* socket and xid. This is what you normally want to do,
493
* since NFS servers depend on "same xid" for their
494
* Duplicate Request Cache.
495
* - timeout specified in CLNT_CALL_MBUF(), which specifies when
496
* retransmits on the same socket should fail and a fresh
497
* socket created. Each of these timeouts counts as one
498
* CLSET_RETRIES as set above.
499
* Set the initial retransmit timeout for UDP. This timeout
500
* doesn't exist for TCP and the following call just fails,
501
* which is ok.
502
*/
503
timo.tv_sec = nmp->nm_timeo / NFS_HZ;
504
timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
505
CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
506
}
507
508
/*
509
* *clipp is &nrp->nr_client or &nm_aconn[nmp->nm_nextaconn].
510
* The latter case is for additional connections specified by the
511
* "nconnect" mount option. nr_mtx etc is used for these additional
512
* connections, as well as nr_client in the nfssockreq
513
* structure for the mount.
514
*/
515
mtx_lock(&nrp->nr_mtx);
516
if (*clipp != NULL) {
517
mtx_unlock(&nrp->nr_mtx);
518
/*
519
* Someone else already connected.
520
*/
521
CLNT_RELEASE(client);
522
} else {
523
*clipp = client;
524
/*
525
* Protocols that do not require connections may be optionally
526
* left unconnected for servers that reply from a port other
527
* than NFS_PORT.
528
*/
529
if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) {
530
mtx_unlock(&nrp->nr_mtx);
531
CLNT_CONTROL(client, CLSET_CONNECT, &one);
532
} else
533
mtx_unlock(&nrp->nr_mtx);
534
}
535
536
out:
537
/* Restore current thread's credentials. */
538
td->td_ucred = origcred;
539
540
NFSEXITCODE(error);
541
return (error);
542
}
543
544
/*
545
* NFS disconnect. Clean up and unlink.
546
*/
547
void
548
newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp)
549
{
550
CLIENT *client, *aconn[NFS_MAXNCONN - 1];
551
int i;
552
553
mtx_lock(&nrp->nr_mtx);
554
if (nrp->nr_client != NULL) {
555
client = nrp->nr_client;
556
nrp->nr_client = NULL;
557
if (nmp != NULL && nmp->nm_aconnect > 0) {
558
for (i = 0; i < nmp->nm_aconnect; i++) {
559
aconn[i] = nmp->nm_aconn[i];
560
nmp->nm_aconn[i] = NULL;
561
}
562
}
563
mtx_unlock(&nrp->nr_mtx);
564
rpc_gss_secpurge_call(client);
565
CLNT_CLOSE(client);
566
CLNT_RELEASE(client);
567
if (nmp != NULL && nmp->nm_aconnect > 0) {
568
for (i = 0; i < nmp->nm_aconnect; i++) {
569
if (aconn[i] != NULL) {
570
rpc_gss_secpurge_call(aconn[i]);
571
CLNT_CLOSE(aconn[i]);
572
CLNT_RELEASE(aconn[i]);
573
}
574
}
575
}
576
} else {
577
mtx_unlock(&nrp->nr_mtx);
578
}
579
}
580
581
static AUTH *
582
nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal,
583
char *srv_principal, gss_OID mech_oid, struct ucred *cred)
584
{
585
rpc_gss_service_t svc;
586
AUTH *auth;
587
588
switch (secflavour) {
589
case RPCSEC_GSS_KRB5:
590
case RPCSEC_GSS_KRB5I:
591
case RPCSEC_GSS_KRB5P:
592
if (!mech_oid) {
593
if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid))
594
return (NULL);
595
}
596
if (secflavour == RPCSEC_GSS_KRB5)
597
svc = rpc_gss_svc_none;
598
else if (secflavour == RPCSEC_GSS_KRB5I)
599
svc = rpc_gss_svc_integrity;
600
else
601
svc = rpc_gss_svc_privacy;
602
603
if (clnt_principal == NULL) {
604
NFSCL_DEBUG(1, "nfs_getauth: clnt princ=NULL, "
605
"srv princ=%s\n", srv_principal);
606
auth = rpc_gss_secfind_call(nrp->nr_client, cred,
607
srv_principal, mech_oid, svc);
608
} else {
609
NFSCL_DEBUG(1, "nfs_getauth: clnt princ=%s "
610
"srv princ=%s\n", clnt_principal, srv_principal);
611
auth = rpc_gss_seccreate_call(nrp->nr_client, cred,
612
clnt_principal, srv_principal, "kerberosv5",
613
svc, NULL, NULL, NULL);
614
return (auth);
615
}
616
if (auth != NULL)
617
return (auth);
618
/* fallthrough */
619
case AUTH_SYS:
620
default:
621
return (authunix_create(cred));
622
}
623
}
624
625
/*
626
* Callback from the RPC code to generate up/down notifications.
627
*/
628
629
struct nfs_feedback_arg {
630
struct nfsmount *nf_mount;
631
int nf_lastmsg; /* last tprintf */
632
int nf_tprintfmsg;
633
struct thread *nf_td;
634
};
635
636
static void
637
nfs_feedback(int type, int proc, void *arg)
638
{
639
struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
640
struct nfsmount *nmp = nf->nf_mount;
641
time_t now;
642
643
switch (type) {
644
case FEEDBACK_REXMIT2:
645
case FEEDBACK_RECONNECT:
646
now = NFSD_MONOSEC;
647
if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) {
648
nfs_down(nmp, nf->nf_td,
649
"not responding", 0, NFSSTA_TIMEO);
650
nf->nf_tprintfmsg = TRUE;
651
nf->nf_lastmsg = now;
652
}
653
break;
654
655
case FEEDBACK_OK:
656
nfs_up(nf->nf_mount, nf->nf_td,
657
"is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
658
break;
659
}
660
}
661
662
/*
663
* newnfs_request - goes something like this
664
* - does the rpc by calling the krpc layer
665
* - break down rpc header and return with nfs reply
666
* nb: always frees up nd_mreq mbuf list
667
*/
668
int
669
newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
670
struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp,
671
struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
672
u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
673
{
674
uint32_t retseq, retval, retval0, slotseq, *tl;
675
int i = 0, j = 0, opcnt, set_sigset = 0, slot;
676
int error = 0, usegssname = 0, secflavour = AUTH_SYS;
677
int freeslot, maxslot, reterr, slotpos, timeo;
678
u_int16_t procnum;
679
u_int nextconn;
680
struct nfs_feedback_arg nf;
681
struct timeval timo;
682
AUTH *auth;
683
struct rpc_callextra ext;
684
enum clnt_stat stat;
685
struct nfsreq *rep = NULL;
686
char *srv_principal = NULL, *clnt_principal = NULL;
687
sigset_t oldset;
688
struct ucred *authcred;
689
struct nfsclsession *sep;
690
uint8_t sessionid[NFSX_V4SESSIONID];
691
bool nextconn_set;
692
struct timespec trylater_delay, ts, waituntil;
693
694
/* Initially 1msec. */
695
trylater_delay.tv_sec = 0;
696
trylater_delay.tv_nsec = 1000000;
697
sep = dssep;
698
if (xidp != NULL)
699
*xidp = 0;
700
/* Reject requests while attempting a forced unmount. */
701
if (nmp != NULL && NFSCL_FORCEDISM(nmp->nm_mountp)) {
702
m_freem(nd->nd_mreq);
703
return (ESTALE);
704
}
705
706
/*
707
* Set authcred, which is used to acquire RPC credentials to
708
* the cred argument, by default. The crhold() should not be
709
* necessary, but will ensure that some future code change
710
* doesn't result in the credential being free'd prematurely.
711
*/
712
authcred = crhold(cred);
713
714
/* For client side interruptible mounts, mask off the signals. */
715
if (nmp != NULL && td != NULL && NFSHASINT(nmp)) {
716
newnfs_set_sigmask(td, &oldset);
717
set_sigset = 1;
718
}
719
720
/*
721
* If not already connected call newnfs_connect now.
722
*/
723
if (nrp->nr_client == NULL)
724
newnfs_connect(nmp, nrp, cred, td, 0, false, &nrp->nr_client);
725
726
/*
727
* If the "nconnect" mount option was specified and this RPC is
728
* one that can have a large RPC message and is being done through
729
* the NFS/MDS server, use an additional connection. (When the RPC is
730
* being done through the server/MDS, nrp == &nmp->nm_sockreq.)
731
* The "nconnect" mount option normally has minimal effect when the
732
* "pnfs" mount option is specified, since only Readdir RPCs are
733
* normally done through the NFS/MDS server.
734
*/
735
nextconn_set = false;
736
if (nmp != NULL && nmp->nm_aconnect > 0 && nrp == &nmp->nm_sockreq &&
737
(nd->nd_procnum == NFSPROC_READ ||
738
nd->nd_procnum == NFSPROC_READDIR ||
739
nd->nd_procnum == NFSPROC_READDIRPLUS ||
740
nd->nd_procnum == NFSPROC_WRITE)) {
741
nextconn = atomic_fetchadd_int(&nmp->nm_nextaconn, 1);
742
nextconn %= nmp->nm_aconnect;
743
nextconn_set = true;
744
if (nmp->nm_aconn[nextconn] == NULL)
745
newnfs_connect(nmp, nrp, cred, td, 0, false,
746
&nmp->nm_aconn[nextconn]);
747
}
748
749
/*
750
* For a client side mount, nmp is != NULL and clp == NULL. For
751
* server calls (callbacks or upcalls), nmp == NULL.
752
*/
753
if (clp != NULL) {
754
NFSLOCKSTATE();
755
if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) {
756
secflavour = RPCSEC_GSS_KRB5;
757
if (nd->nd_procnum != NFSPROC_NULL) {
758
if (clp->lc_flags & LCL_GSSINTEGRITY)
759
secflavour = RPCSEC_GSS_KRB5I;
760
else if (clp->lc_flags & LCL_GSSPRIVACY)
761
secflavour = RPCSEC_GSS_KRB5P;
762
}
763
}
764
NFSUNLOCKSTATE();
765
} else if (nmp != NULL && NFSHASKERB(nmp) &&
766
nd->nd_procnum != NFSPROC_NULL && (!NFSHASSYSKRB5(nmp) ||
767
nfscl_use_gss[nd->nd_procnum])) {
768
if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0)
769
nd->nd_flag |= ND_USEGSSNAME;
770
if ((nd->nd_flag & ND_USEGSSNAME) != 0) {
771
/*
772
* If there is a client side host based credential,
773
* use that, otherwise use the system uid, if set.
774
* The system uid is in the nmp->nm_sockreq.nr_cred
775
* credentials.
776
*/
777
if (nmp->nm_krbnamelen > 0) {
778
usegssname = 1;
779
clnt_principal = nmp->nm_krbname;
780
} else if (nmp->nm_uid != (uid_t)-1) {
781
KASSERT(nmp->nm_sockreq.nr_cred != NULL,
782
("newnfs_request: NULL nr_cred"));
783
crfree(authcred);
784
authcred = crhold(nmp->nm_sockreq.nr_cred);
785
}
786
} else if (nmp->nm_krbnamelen == 0 &&
787
nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) {
788
/*
789
* If there is no host based principal name and
790
* the system uid is set and this is root, use the
791
* system uid, since root won't have user
792
* credentials in a credentials cache file.
793
* The system uid is in the nmp->nm_sockreq.nr_cred
794
* credentials.
795
*/
796
KASSERT(nmp->nm_sockreq.nr_cred != NULL,
797
("newnfs_request: NULL nr_cred"));
798
crfree(authcred);
799
authcred = crhold(nmp->nm_sockreq.nr_cred);
800
}
801
if (NFSHASINTEGRITY(nmp))
802
secflavour = RPCSEC_GSS_KRB5I;
803
else if (NFSHASPRIVACY(nmp))
804
secflavour = RPCSEC_GSS_KRB5P;
805
else
806
secflavour = RPCSEC_GSS_KRB5;
807
if (nrp->nr_srvprinc[0] == '\0')
808
srv_principal = NFSMNT_SRVKRBNAME(nmp);
809
else
810
srv_principal = nrp->nr_srvprinc;
811
} else if (nmp != NULL && (!NFSHASKERB(nmp) || NFSHASSYSKRB5(nmp)) &&
812
nd->nd_procnum != NFSPROC_NULL &&
813
(nd->nd_flag & ND_USEGSSNAME) != 0) {
814
/*
815
* Use the uid that did the mount when the RPC is doing
816
* NFSv4 system operations, as indicated by the
817
* ND_USEGSSNAME flag, for the AUTH_SYS case.
818
* The credentials in nm_sockreq.nr_cred were used for the
819
* mount.
820
*/
821
KASSERT(nmp->nm_sockreq.nr_cred != NULL,
822
("newnfs_request: NULL nr_cred"));
823
crfree(authcred);
824
authcred = crhold(nmp->nm_sockreq.nr_cred);
825
}
826
827
if (nmp != NULL) {
828
bzero(&nf, sizeof(struct nfs_feedback_arg));
829
nf.nf_mount = nmp;
830
nf.nf_td = td;
831
nf.nf_lastmsg = NFSD_MONOSEC -
832
((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay));
833
}
834
835
if (nd->nd_procnum == NFSPROC_NULL)
836
auth = authnone_create();
837
else if (usegssname) {
838
/*
839
* For this case, the authenticator is held in the
840
* nfssockreq structure, so don't release the reference count
841
* held on it. --> Don't AUTH_DESTROY() it in this function.
842
*/
843
if (nrp->nr_auth == NULL)
844
nrp->nr_auth = nfs_getauth(nrp, secflavour,
845
clnt_principal, srv_principal, NULL, authcred);
846
else
847
rpc_gss_refresh_auth_call(nrp->nr_auth);
848
auth = nrp->nr_auth;
849
} else
850
auth = nfs_getauth(nrp, secflavour, NULL,
851
srv_principal, NULL, authcred);
852
crfree(authcred);
853
if (auth == NULL) {
854
m_freem(nd->nd_mreq);
855
if (set_sigset)
856
newnfs_restore_sigmask(td, &oldset);
857
return (EACCES);
858
}
859
bzero(&ext, sizeof(ext));
860
ext.rc_auth = auth;
861
if (nmp != NULL) {
862
ext.rc_feedback = nfs_feedback;
863
ext.rc_feedback_arg = &nf;
864
}
865
866
procnum = nd->nd_procnum;
867
if ((nd->nd_flag & ND_NFSV4) &&
868
nd->nd_procnum != NFSPROC_NULL &&
869
nd->nd_procnum != NFSV4PROC_CBCOMPOUND)
870
procnum = NFSV4PROC_COMPOUND;
871
872
if (nmp != NULL) {
873
NFSINCRGLOBAL(nfsstatsv1.rpcrequests);
874
875
/* Map the procnum to the old NFSv2 one, as required. */
876
if ((nd->nd_flag & ND_NFSV2) != 0) {
877
if (nd->nd_procnum < NFS_V3NPROCS)
878
procnum = nfsv2_procid[nd->nd_procnum];
879
else
880
procnum = NFSV2PROC_NOOP;
881
}
882
883
/*
884
* Now only used for the R_DONTRECOVER case, but until that is
885
* supported within the krpc code, I need to keep a queue of
886
* outstanding RPCs for nfsv4 client requests.
887
*/
888
if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND)
889
rep = malloc(sizeof(struct nfsreq),
890
M_NFSDREQ, M_WAITOK);
891
#ifdef KDTRACE_HOOKS
892
if (dtrace_nfscl_nfs234_start_probe != NULL) {
893
uint32_t probe_id;
894
int probe_procnum;
895
896
if (nd->nd_flag & ND_NFSV4) {
897
probe_id =
898
nfscl_nfs4_start_probes[nd->nd_procnum];
899
probe_procnum = nd->nd_procnum;
900
} else if (nd->nd_flag & ND_NFSV3) {
901
probe_id = nfscl_nfs3_start_probes[procnum];
902
probe_procnum = procnum;
903
} else {
904
probe_id =
905
nfscl_nfs2_start_probes[nd->nd_procnum];
906
probe_procnum = procnum;
907
}
908
if (probe_id != 0)
909
(dtrace_nfscl_nfs234_start_probe)
910
(probe_id, vp, nd->nd_mreq, cred,
911
probe_procnum);
912
}
913
#endif
914
}
915
freeslot = -1; /* Set to slot that needs to be free'd */
916
tryagain:
917
slot = -1; /* Slot that needs a sequence# increment. */
918
/*
919
* This timeout specifies when a new socket should be created,
920
* along with new xid values. For UDP, this should be done
921
* infrequently, since retransmits of RPC requests should normally
922
* use the same xid.
923
*/
924
if (nmp == NULL) {
925
if (clp == NULL) {
926
timo.tv_sec = NFSV4_UPCALLTIMEO;
927
timo.tv_usec = 0;
928
} else {
929
timo.tv_sec = NFSV4_CALLBACKTIMEO / 1000;
930
timo.tv_usec = NFSV4_CALLBACKTIMEO * 1000;
931
}
932
} else {
933
if (nrp->nr_sotype != SOCK_DGRAM) {
934
timo.tv_usec = 0;
935
if ((nmp->nm_flag & NFSMNT_NFSV4))
936
timo.tv_sec = INT_MAX;
937
else
938
timo.tv_sec = NFS_TCPTIMEO;
939
} else {
940
if (NFSHASSOFT(nmp)) {
941
/*
942
* CLSET_RETRIES is set to 2, so this should be
943
* half of the total timeout required.
944
*/
945
timeo = nmp->nm_retry * nmp->nm_timeo / 2;
946
if (timeo < 1)
947
timeo = 1;
948
timo.tv_sec = timeo / NFS_HZ;
949
timo.tv_usec = (timeo % NFS_HZ) * 1000000 /
950
NFS_HZ;
951
} else {
952
/* For UDP hard mounts, use a large value. */
953
timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
954
timo.tv_usec = 0;
955
}
956
}
957
958
if (rep != NULL) {
959
rep->r_flags = 0;
960
rep->r_nmp = nmp;
961
/*
962
* Chain request into list of outstanding requests.
963
*/
964
NFSLOCKREQ();
965
TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain);
966
NFSUNLOCKREQ();
967
}
968
}
969
970
nd->nd_mrep = NULL;
971
if (clp != NULL && sep != NULL)
972
stat = clnt_bck_call(nrp->nr_client, &ext, procnum,
973
nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt);
974
else if (nextconn_set)
975
/*
976
* When there are multiple TCP connections, send the
977
* RPCs with large messages on the alternate TCP
978
* connection(s) in a round robin fashion.
979
* The small RPC messages are sent on the default
980
* TCP connection because they do not require much
981
* network bandwidth and separating them from the
982
* large RPC messages avoids them getting "log jammed"
983
* behind several large RPC messages.
984
*/
985
stat = CLNT_CALL_MBUF(nmp->nm_aconn[nextconn],
986
&ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo);
987
else
988
stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum,
989
nd->nd_mreq, &nd->nd_mrep, timo);
990
NFSCL_DEBUG(2, "clnt call=%d\n", stat);
991
992
if (rep != NULL) {
993
/*
994
* RPC done, unlink the request.
995
*/
996
NFSLOCKREQ();
997
TAILQ_REMOVE(&nfsd_reqq, rep, r_chain);
998
NFSUNLOCKREQ();
999
}
1000
1001
/*
1002
* If there was a successful reply and a tprintf msg.
1003
* tprintf a response.
1004
*/
1005
if (stat == RPC_SUCCESS) {
1006
error = 0;
1007
} else if (stat == RPC_TIMEDOUT) {
1008
NFSINCRGLOBAL(nfsstatsv1.rpctimeouts);
1009
error = ETIMEDOUT;
1010
} else if (stat == RPC_VERSMISMATCH) {
1011
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1012
error = EOPNOTSUPP;
1013
} else if (stat == RPC_PROGVERSMISMATCH) {
1014
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1015
error = EPROTONOSUPPORT;
1016
} else if (stat == RPC_CANTSEND || stat == RPC_CANTRECV ||
1017
stat == RPC_SYSTEMERROR || stat == RPC_INTR) {
1018
/* Check for a session slot that needs to be free'd. */
1019
if ((nd->nd_flag & (ND_NFSV41 | ND_HASSLOTID)) ==
1020
(ND_NFSV41 | ND_HASSLOTID) && nmp != NULL &&
1021
nd->nd_procnum != NFSPROC_NULL) {
1022
/*
1023
* This should only occur when either the MDS or
1024
* a client has an RPC against a DS fail.
1025
* This happens because these cases use "soft"
1026
* connections that can time out and fail.
1027
* The slot used for this RPC is now in a
1028
* non-deterministic state, but if the slot isn't
1029
* free'd, threads can get stuck waiting for a slot.
1030
*/
1031
if (sep == NULL)
1032
sep = nfsmnt_mdssession(nmp);
1033
/*
1034
* Bump the sequence# out of range, so that reuse of
1035
* this slot will result in an NFSERR_SEQMISORDERED
1036
* error and not a bogus cached RPC reply.
1037
*/
1038
mtx_lock(&sep->nfsess_mtx);
1039
sep->nfsess_slotseq[nd->nd_slotid] += 10;
1040
sep->nfsess_badslots |= (0x1ULL << nd->nd_slotid);
1041
mtx_unlock(&sep->nfsess_mtx);
1042
/* And free the slot. */
1043
nfsv4_freeslot(sep, nd->nd_slotid, true);
1044
}
1045
if (stat == RPC_INTR)
1046
error = EINTR;
1047
else {
1048
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1049
error = ENXIO;
1050
}
1051
} else if (stat == RPC_AUTHERROR) {
1052
/* Check for a session slot that needs to be free'd. */
1053
if ((nd->nd_flag & (ND_NFSV41 | ND_HASSLOTID)) ==
1054
(ND_NFSV41 | ND_HASSLOTID) && nmp != NULL &&
1055
nd->nd_procnum != NFSPROC_NULL) {
1056
/*
1057
* This can occur when a Kerberos/RPCSEC_GSS session
1058
* expires, due to TGT expiration.
1059
* Free the slot, resetting the slot's sequence#.
1060
*/
1061
if (sep == NULL)
1062
sep = nfsmnt_mdssession(nmp);
1063
nfsv4_freeslot(sep, nd->nd_slotid, true);
1064
}
1065
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1066
error = EACCES;
1067
} else {
1068
NFSINCRGLOBAL(nfsstatsv1.rpcinvalid);
1069
error = EACCES;
1070
}
1071
if (error) {
1072
m_freem(nd->nd_mreq);
1073
if (usegssname == 0)
1074
AUTH_DESTROY(auth);
1075
if (rep != NULL)
1076
free(rep, M_NFSDREQ);
1077
if (set_sigset)
1078
newnfs_restore_sigmask(td, &oldset);
1079
return (error);
1080
}
1081
1082
KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
1083
1084
/*
1085
* Search for any mbufs that are not a multiple of 4 bytes long
1086
* or with m_data not longword aligned.
1087
* These could cause pointer alignment problems, so copy them to
1088
* well aligned mbufs.
1089
*/
1090
newnfs_realign(&nd->nd_mrep, M_WAITOK);
1091
nd->nd_md = nd->nd_mrep;
1092
nd->nd_dpos = mtod(nd->nd_md, caddr_t);
1093
nd->nd_repstat = 0;
1094
if (nd->nd_procnum != NFSPROC_NULL &&
1095
nd->nd_procnum != NFSV4PROC_CBNULL) {
1096
/* If sep == NULL, set it to the default in nmp. */
1097
if (sep == NULL && nmp != NULL)
1098
sep = nfsmnt_mdssession(nmp);
1099
/*
1100
* and now the actual NFS xdr.
1101
*/
1102
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1103
nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl);
1104
if (nd->nd_repstat >= 10000)
1105
NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum,
1106
(int)nd->nd_repstat);
1107
1108
/*
1109
* Get rid of the tag, return count and SEQUENCE result for
1110
* NFSv4.
1111
*/
1112
if ((nd->nd_flag & ND_NFSV4) != 0 && nd->nd_repstat !=
1113
NFSERR_MINORVERMISMATCH) {
1114
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1115
i = fxdr_unsigned(int, *tl);
1116
error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
1117
if (error)
1118
goto nfsmout;
1119
NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1120
opcnt = fxdr_unsigned(int, *tl++);
1121
i = fxdr_unsigned(int, *tl++);
1122
j = fxdr_unsigned(int, *tl);
1123
if (j >= 10000)
1124
NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j);
1125
/*
1126
* If the first op is Sequence, free up the slot.
1127
*/
1128
if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) ||
1129
(clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0)) {
1130
NFSCL_DEBUG(1, "failed seq=%d\n", j);
1131
if (sep != NULL && i == NFSV4OP_SEQUENCE &&
1132
j == NFSERR_SEQMISORDERED) {
1133
mtx_lock(&sep->nfsess_mtx);
1134
sep->nfsess_badslots |=
1135
(0x1ULL << nd->nd_slotid);
1136
mtx_unlock(&sep->nfsess_mtx);
1137
}
1138
}
1139
if (((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) ||
1140
(clp != NULL && i == NFSV4OP_CBSEQUENCE &&
1141
j == 0)) && sep != NULL) {
1142
if (i == NFSV4OP_SEQUENCE)
1143
NFSM_DISSECT(tl, uint32_t *,
1144
NFSX_V4SESSIONID +
1145
5 * NFSX_UNSIGNED);
1146
else
1147
NFSM_DISSECT(tl, uint32_t *,
1148
NFSX_V4SESSIONID +
1149
4 * NFSX_UNSIGNED);
1150
mtx_lock(&sep->nfsess_mtx);
1151
if (bcmp(tl, sep->nfsess_sessionid,
1152
NFSX_V4SESSIONID) == 0) {
1153
tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
1154
retseq = fxdr_unsigned(uint32_t, *tl++);
1155
slot = fxdr_unsigned(int, *tl++);
1156
if ((nd->nd_flag & ND_HASSLOTID) != 0) {
1157
if (slot >= NFSV4_SLOTS ||
1158
(i == NFSV4OP_CBSEQUENCE &&
1159
slot >= NFSV4_CBSLOTS)) {
1160
printf("newnfs_request:"
1161
" Bogus slot\n");
1162
slot = nd->nd_slotid;
1163
} else if (slot !=
1164
nd->nd_slotid) {
1165
printf("newnfs_request:"
1166
" Wrong session "
1167
"srvslot=%d "
1168
"slot=%d\n", slot,
1169
nd->nd_slotid);
1170
if (i == NFSV4OP_SEQUENCE) {
1171
/*
1172
* Mark both slots as
1173
* bad, because we do
1174
* not know if the
1175
* server has advanced
1176
* the sequence# for
1177
* either of them.
1178
*/
1179
sep->nfsess_badslots |=
1180
(0x1ULL << slot);
1181
sep->nfsess_badslots |=
1182
(0x1ULL <<
1183
nd->nd_slotid);
1184
}
1185
slot = nd->nd_slotid;
1186
}
1187
freeslot = slot;
1188
} else if (slot != 0) {
1189
printf("newnfs_request: Bad "
1190
"session slot=%d\n", slot);
1191
slot = 0;
1192
}
1193
if (retseq != sep->nfsess_slotseq[slot])
1194
printf("retseq diff 0x%x\n",
1195
retseq);
1196
retval0 = fxdr_unsigned(uint32_t,*tl++);
1197
retval = fxdr_unsigned(uint32_t, *tl);
1198
if ((retval + 1) < sep->nfsess_foreslots
1199
) {
1200
sep->nfsess_foreslots = (retval
1201
+ 1);
1202
nfs_resetslots(sep);
1203
} else if ((retval + 1) >
1204
sep->nfsess_foreslots) {
1205
if (retval0 > retval)
1206
printf("Sess:highest > "
1207
"target_highest\n");
1208
sep->nfsess_foreslots =
1209
(retval < NFSV4_SLOTS) ?
1210
(retval + 1) : NFSV4_SLOTS;
1211
}
1212
}
1213
mtx_unlock(&sep->nfsess_mtx);
1214
1215
/* Grab the op and status for the next one. */
1216
if (opcnt > 1) {
1217
NFSM_DISSECT(tl, uint32_t *,
1218
2 * NFSX_UNSIGNED);
1219
i = fxdr_unsigned(int, *tl++);
1220
j = fxdr_unsigned(int, *tl);
1221
}
1222
}
1223
}
1224
if (nd->nd_repstat != 0) {
1225
if (nd->nd_repstat == NFSERR_BADSESSION &&
1226
nmp != NULL && dssep == NULL &&
1227
(nd->nd_flag & ND_NFSV41) != 0) {
1228
/*
1229
* If this is a client side MDS RPC, mark
1230
* the MDS session defunct and initiate
1231
* recovery, as required.
1232
* The nfsess_defunct field is protected by
1233
* the NFSLOCKMNT()/nm_mtx lock and not the
1234
* nfsess_mtx lock to simplify its handling,
1235
* for the MDS session. This lock is also
1236
* sufficient for nfsess_sessionid, since it
1237
* never changes in the structure.
1238
*/
1239
NFSCL_DEBUG(1, "Got badsession\n");
1240
NFSLOCKCLSTATE();
1241
NFSLOCKMNT(nmp);
1242
if (TAILQ_EMPTY(&nmp->nm_sess)) {
1243
NFSUNLOCKMNT(nmp);
1244
NFSUNLOCKCLSTATE();
1245
printf("If server has not rebooted, "
1246
"check NFS clients for unique "
1247
"/etc/hostid's\n");
1248
goto out;
1249
}
1250
sep = NFSMNT_MDSSESSION(nmp);
1251
if (bcmp(sep->nfsess_sessionid, nd->nd_sequence,
1252
NFSX_V4SESSIONID) == 0) {
1253
printf("Initiate recovery. If server "
1254
"has not rebooted, "
1255
"check NFS clients for unique "
1256
"/etc/hostid's\n");
1257
/* Initiate recovery. */
1258
sep->nfsess_defunct = 1;
1259
NFSCL_DEBUG(1, "Marked defunct\n");
1260
if (nmp->nm_clp != NULL) {
1261
nmp->nm_clp->nfsc_flags |=
1262
NFSCLFLAGS_RECOVER;
1263
wakeup(nmp->nm_clp);
1264
}
1265
}
1266
NFSUNLOCKCLSTATE();
1267
/*
1268
* Sleep for up to 1sec waiting for a new
1269
* session.
1270
*/
1271
mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO,
1272
"nfsbadsess", hz);
1273
/*
1274
* Get the session again, in case a new one
1275
* has been created during the sleep.
1276
*/
1277
sep = NFSMNT_MDSSESSION(nmp);
1278
NFSUNLOCKMNT(nmp);
1279
if ((nd->nd_flag & ND_LOOPBADSESS) != 0) {
1280
reterr = nfsv4_sequencelookup(nmp, sep,
1281
&slotpos, &maxslot, &slotseq,
1282
sessionid, true);
1283
if (reterr == 0) {
1284
/* Fill in new session info. */
1285
NFSCL_DEBUG(1,
1286
"Filling in new sequence\n");
1287
tl = nd->nd_sequence;
1288
bcopy(sessionid, tl,
1289
NFSX_V4SESSIONID);
1290
tl += NFSX_V4SESSIONID /
1291
NFSX_UNSIGNED;
1292
*tl++ = txdr_unsigned(slotseq);
1293
*tl++ = txdr_unsigned(slotpos);
1294
*tl = txdr_unsigned(maxslot);
1295
nd->nd_slotid = slotpos;
1296
nd->nd_flag |= ND_HASSLOTID;
1297
}
1298
if (reterr == NFSERR_BADSESSION ||
1299
reterr == 0) {
1300
NFSCL_DEBUG(1,
1301
"Badsession looping\n");
1302
m_freem(nd->nd_mrep);
1303
nd->nd_mrep = NULL;
1304
goto tryagain;
1305
}
1306
nd->nd_repstat = reterr;
1307
NFSCL_DEBUG(1, "Got err=%d\n", reterr);
1308
}
1309
}
1310
/*
1311
* When clp != NULL, it is a callback and all
1312
* callback operations can be retried for NFSERR_DELAY.
1313
*/
1314
if (((nd->nd_repstat == NFSERR_DELAY ||
1315
nd->nd_repstat == NFSERR_GRACE) &&
1316
(nd->nd_flag & ND_NFSV4) && (clp != NULL ||
1317
(nd->nd_procnum != NFSPROC_DELEGRETURN &&
1318
nd->nd_procnum != NFSPROC_SETATTR &&
1319
nd->nd_procnum != NFSPROC_READ &&
1320
nd->nd_procnum != NFSPROC_READDS &&
1321
nd->nd_procnum != NFSPROC_WRITE &&
1322
nd->nd_procnum != NFSPROC_WRITEDS &&
1323
nd->nd_procnum != NFSPROC_OPEN &&
1324
nd->nd_procnum != NFSPROC_OPENLAYGET &&
1325
nd->nd_procnum != NFSPROC_CREATE &&
1326
nd->nd_procnum != NFSPROC_CREATELAYGET &&
1327
nd->nd_procnum != NFSPROC_OPENCONFIRM &&
1328
nd->nd_procnum != NFSPROC_OPENDOWNGRADE &&
1329
nd->nd_procnum != NFSPROC_CLOSE &&
1330
nd->nd_procnum != NFSPROC_LOCK &&
1331
nd->nd_procnum != NFSPROC_LOCKU))) ||
1332
(nd->nd_repstat == NFSERR_DELAY &&
1333
(nd->nd_flag & ND_NFSV4) == 0) ||
1334
nd->nd_repstat == NFSERR_RESOURCE ||
1335
nd->nd_repstat == NFSERR_RETRYUNCACHEDREP) {
1336
/* Clip at NFS_TRYLATERDEL. */
1337
if (timespeccmp(&trylater_delay,
1338
&nfs_trylater_max, >))
1339
trylater_delay = nfs_trylater_max;
1340
getnanouptime(&waituntil);
1341
timespecadd(&waituntil, &trylater_delay,
1342
&waituntil);
1343
do {
1344
nfs_catnap(PZERO, 0, "nfstry");
1345
getnanouptime(&ts);
1346
} while (timespeccmp(&ts, &waituntil, <));
1347
timespecadd(&trylater_delay, &trylater_delay,
1348
&trylater_delay); /* Double each time. */
1349
if (slot != -1) {
1350
mtx_lock(&sep->nfsess_mtx);
1351
sep->nfsess_slotseq[slot]++;
1352
*nd->nd_slotseq = txdr_unsigned(
1353
sep->nfsess_slotseq[slot]);
1354
mtx_unlock(&sep->nfsess_mtx);
1355
}
1356
m_freem(nd->nd_mrep);
1357
nd->nd_mrep = NULL;
1358
goto tryagain;
1359
}
1360
1361
/*
1362
* If the File Handle was stale, invalidate the
1363
* lookup cache, just in case.
1364
* (vp != NULL implies a client side call)
1365
*/
1366
if (nd->nd_repstat == ESTALE && vp != NULL) {
1367
cache_purge(vp);
1368
if (ncl_call_invalcaches != NULL)
1369
(*ncl_call_invalcaches)(vp);
1370
}
1371
}
1372
if ((nd->nd_flag & ND_NFSV4) != 0) {
1373
/* Free the slot, as required. */
1374
if (freeslot != -1)
1375
nfsv4_freeslot(sep, freeslot, false);
1376
/*
1377
* If this op is Putfh, throw its results away.
1378
*/
1379
if (j >= 10000)
1380
NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j);
1381
if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) {
1382
NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED);
1383
i = fxdr_unsigned(int, *tl++);
1384
j = fxdr_unsigned(int, *tl);
1385
if (j >= 10000)
1386
NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i,
1387
j);
1388
/*
1389
* All Compounds that do an Op that must
1390
* be in sequence consist of NFSV4OP_PUTFH
1391
* followed by one of these. As such, we
1392
* can determine if the seqid# should be
1393
* incremented, here.
1394
*/
1395
if ((i == NFSV4OP_OPEN ||
1396
i == NFSV4OP_OPENCONFIRM ||
1397
i == NFSV4OP_OPENDOWNGRADE ||
1398
i == NFSV4OP_CLOSE ||
1399
i == NFSV4OP_LOCK ||
1400
i == NFSV4OP_LOCKU) &&
1401
(j == 0 ||
1402
(j != NFSERR_STALECLIENTID &&
1403
j != NFSERR_STALESTATEID &&
1404
j != NFSERR_BADSTATEID &&
1405
j != NFSERR_BADSEQID &&
1406
j != NFSERR_BADXDR &&
1407
j != NFSERR_RESOURCE &&
1408
j != NFSERR_NOFILEHANDLE)))
1409
nd->nd_flag |= ND_INCRSEQID;
1410
}
1411
/*
1412
* If this op's status is non-zero, mark
1413
* that there is no more data to process.
1414
* The exception is Setattr, which always has xdr
1415
* when it has failed.
1416
*/
1417
if (j != 0 && i != NFSV4OP_SETATTR)
1418
nd->nd_flag |= ND_NOMOREDATA;
1419
1420
/*
1421
* If R_DONTRECOVER is set, replace the stale error
1422
* reply, so that recovery isn't initiated.
1423
*/
1424
if ((nd->nd_repstat == NFSERR_STALECLIENTID ||
1425
nd->nd_repstat == NFSERR_BADSESSION ||
1426
nd->nd_repstat == NFSERR_STALESTATEID) &&
1427
rep != NULL && (rep->r_flags & R_DONTRECOVER))
1428
nd->nd_repstat = NFSERR_STALEDONTRECOVER;
1429
}
1430
}
1431
out:
1432
1433
#ifdef KDTRACE_HOOKS
1434
if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) {
1435
uint32_t probe_id;
1436
int probe_procnum;
1437
1438
if (nd->nd_flag & ND_NFSV4) {
1439
probe_id = nfscl_nfs4_done_probes[nd->nd_procnum];
1440
probe_procnum = nd->nd_procnum;
1441
} else if (nd->nd_flag & ND_NFSV3) {
1442
probe_id = nfscl_nfs3_done_probes[procnum];
1443
probe_procnum = procnum;
1444
} else {
1445
probe_id = nfscl_nfs2_done_probes[nd->nd_procnum];
1446
probe_procnum = procnum;
1447
}
1448
if (probe_id != 0)
1449
(dtrace_nfscl_nfs234_done_probe)(probe_id, vp,
1450
nd->nd_mreq, cred, probe_procnum, 0);
1451
}
1452
#endif
1453
1454
m_freem(nd->nd_mreq);
1455
if (usegssname == 0)
1456
AUTH_DESTROY(auth);
1457
if (rep != NULL)
1458
free(rep, M_NFSDREQ);
1459
if (set_sigset)
1460
newnfs_restore_sigmask(td, &oldset);
1461
return (0);
1462
nfsmout:
1463
m_freem(nd->nd_mrep);
1464
m_freem(nd->nd_mreq);
1465
if (usegssname == 0)
1466
AUTH_DESTROY(auth);
1467
if (rep != NULL)
1468
free(rep, M_NFSDREQ);
1469
if (set_sigset)
1470
newnfs_restore_sigmask(td, &oldset);
1471
return (error);
1472
}
1473
1474
/*
1475
* Reset slots above nfsess_foreslots that are not busy.
1476
*/
1477
void
1478
nfs_resetslots(struct nfsclsession *sep)
1479
{
1480
int i;
1481
uint64_t bitval;
1482
1483
mtx_assert(&sep->nfsess_mtx, MA_OWNED);
1484
bitval = (1 << sep->nfsess_foreslots);
1485
for (i = sep->nfsess_foreslots; i < NFSV4_SLOTS; i++) {
1486
if ((sep->nfsess_slots & bitval) == 0 &&
1487
(sep->nfsess_badslots & bitval) == 0)
1488
sep->nfsess_slotseq[i] = 0;
1489
bitval <<= 1;
1490
}
1491
}
1492
1493
/*
1494
* Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1495
* wait for all requests to complete. This is used by forced unmounts
1496
* to terminate any outstanding RPCs.
1497
*/
1498
int
1499
newnfs_nmcancelreqs(struct nfsmount *nmp)
1500
{
1501
struct nfsclds *dsp;
1502
struct __rpc_client *cl;
1503
int i;
1504
1505
if (nmp->nm_sockreq.nr_client != NULL)
1506
CLNT_CLOSE(nmp->nm_sockreq.nr_client);
1507
for (i = 0; i < nmp->nm_aconnect; i++)
1508
if (nmp->nm_aconn[i] != NULL)
1509
CLNT_CLOSE(nmp->nm_aconn[i]);
1510
lookformore:
1511
NFSLOCKMNT(nmp);
1512
TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
1513
NFSLOCKDS(dsp);
1514
if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1515
(dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 &&
1516
dsp->nfsclds_sockp != NULL &&
1517
dsp->nfsclds_sockp->nr_client != NULL) {
1518
dsp->nfsclds_flags |= NFSCLDS_CLOSED;
1519
cl = dsp->nfsclds_sockp->nr_client;
1520
NFSUNLOCKDS(dsp);
1521
NFSUNLOCKMNT(nmp);
1522
CLNT_CLOSE(cl);
1523
goto lookformore;
1524
}
1525
NFSUNLOCKDS(dsp);
1526
}
1527
NFSUNLOCKMNT(nmp);
1528
return (0);
1529
}
1530
1531
/*
1532
* Any signal that can interrupt an NFS operation in an intr mount
1533
* should be added to this set. SIGSTOP and SIGKILL cannot be masked.
1534
*/
1535
int newnfs_sig_set[] = {
1536
SIGINT,
1537
SIGTERM,
1538
SIGHUP,
1539
SIGKILL,
1540
SIGQUIT
1541
};
1542
1543
/*
1544
* Check to see if one of the signals in our subset is pending on
1545
* the process (in an intr mount).
1546
*/
1547
static int
1548
nfs_sig_pending(sigset_t set)
1549
{
1550
int i;
1551
1552
for (i = 0 ; i < nitems(newnfs_sig_set); i++)
1553
if (SIGISMEMBER(set, newnfs_sig_set[i]))
1554
return (1);
1555
return (0);
1556
}
1557
1558
/*
1559
* The set/restore sigmask functions are used to (temporarily) overwrite
1560
* the thread td_sigmask during an RPC call (for example). These are also
1561
* used in other places in the NFS client that might tsleep().
1562
*/
1563
void
1564
newnfs_set_sigmask(struct thread *td, sigset_t *oldset)
1565
{
1566
sigset_t newset;
1567
int i;
1568
struct proc *p;
1569
1570
SIGFILLSET(newset);
1571
if (td == NULL)
1572
td = curthread; /* XXX */
1573
p = td->td_proc;
1574
/* Remove the NFS set of signals from newset */
1575
PROC_LOCK(p);
1576
mtx_lock(&p->p_sigacts->ps_mtx);
1577
for (i = 0 ; i < nitems(newnfs_sig_set); i++) {
1578
/*
1579
* But make sure we leave the ones already masked
1580
* by the process, ie. remove the signal from the
1581
* temporary signalmask only if it wasn't already
1582
* in p_sigmask.
1583
*/
1584
if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) &&
1585
!SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i]))
1586
SIGDELSET(newset, newnfs_sig_set[i]);
1587
}
1588
mtx_unlock(&p->p_sigacts->ps_mtx);
1589
kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
1590
SIGPROCMASK_PROC_LOCKED);
1591
PROC_UNLOCK(p);
1592
}
1593
1594
void
1595
newnfs_restore_sigmask(struct thread *td, sigset_t *set)
1596
{
1597
if (td == NULL)
1598
td = curthread; /* XXX */
1599
kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
1600
}
1601
1602
/*
1603
* NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
1604
* old one after msleep() returns.
1605
*/
1606
int
1607
newnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
1608
{
1609
sigset_t oldset;
1610
int error;
1611
1612
if ((priority & PCATCH) == 0)
1613
return msleep(ident, mtx, priority, wmesg, timo);
1614
if (td == NULL)
1615
td = curthread; /* XXX */
1616
newnfs_set_sigmask(td, &oldset);
1617
error = msleep(ident, mtx, priority, wmesg, timo);
1618
newnfs_restore_sigmask(td, &oldset);
1619
return (error);
1620
}
1621
1622
/*
1623
* Test for a termination condition pending on the process.
1624
* This is used for NFSMNT_INT mounts.
1625
*/
1626
int
1627
newnfs_sigintr(struct nfsmount *nmp, struct thread *td)
1628
{
1629
struct proc *p;
1630
sigset_t tmpset;
1631
1632
/* Terminate all requests while attempting a forced unmount. */
1633
if (NFSCL_FORCEDISM(nmp->nm_mountp))
1634
return (EIO);
1635
if (!(nmp->nm_flag & NFSMNT_INT))
1636
return (0);
1637
if (td == NULL)
1638
return (0);
1639
p = td->td_proc;
1640
PROC_LOCK(p);
1641
tmpset = p->p_siglist;
1642
SIGSETOR(tmpset, td->td_siglist);
1643
SIGSETNAND(tmpset, td->td_sigmask);
1644
mtx_lock(&p->p_sigacts->ps_mtx);
1645
SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1646
mtx_unlock(&p->p_sigacts->ps_mtx);
1647
if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
1648
&& nfs_sig_pending(tmpset)) {
1649
PROC_UNLOCK(p);
1650
return (EINTR);
1651
}
1652
PROC_UNLOCK(p);
1653
return (0);
1654
}
1655
1656
static int
1657
nfs_msg(struct thread *td, const char *server, const char *msg, int error)
1658
{
1659
struct proc *p;
1660
1661
p = td ? td->td_proc : NULL;
1662
if (error) {
1663
tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n",
1664
server, msg, error);
1665
} else {
1666
tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
1667
}
1668
return (0);
1669
}
1670
1671
static void
1672
nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
1673
int error, int flags)
1674
{
1675
if (nmp == NULL)
1676
return;
1677
mtx_lock(&nmp->nm_mtx);
1678
if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
1679
nmp->nm_state |= NFSSTA_TIMEO;
1680
mtx_unlock(&nmp->nm_mtx);
1681
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1682
VQ_NOTRESP, 0);
1683
} else
1684
mtx_unlock(&nmp->nm_mtx);
1685
mtx_lock(&nmp->nm_mtx);
1686
if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1687
nmp->nm_state |= NFSSTA_LOCKTIMEO;
1688
mtx_unlock(&nmp->nm_mtx);
1689
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1690
VQ_NOTRESPLOCK, 0);
1691
} else
1692
mtx_unlock(&nmp->nm_mtx);
1693
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
1694
}
1695
1696
static void
1697
nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
1698
int flags, int tprintfmsg)
1699
{
1700
if (nmp == NULL)
1701
return;
1702
if (tprintfmsg) {
1703
nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
1704
}
1705
1706
mtx_lock(&nmp->nm_mtx);
1707
if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
1708
nmp->nm_state &= ~NFSSTA_TIMEO;
1709
mtx_unlock(&nmp->nm_mtx);
1710
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1711
VQ_NOTRESP, 1);
1712
} else
1713
mtx_unlock(&nmp->nm_mtx);
1714
1715
mtx_lock(&nmp->nm_mtx);
1716
if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1717
nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
1718
mtx_unlock(&nmp->nm_mtx);
1719
vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1720
VQ_NOTRESPLOCK, 1);
1721
} else
1722
mtx_unlock(&nmp->nm_mtx);
1723
}
1724
1725