Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/afs/fs_probe.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/* AFS fileserver probing
3
*
4
* Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5
* Written by David Howells ([email protected])
6
*/
7
8
#include <linux/sched.h>
9
#include <linux/slab.h>
10
#include "afs_fs.h"
11
#include "internal.h"
12
#include "protocol_afs.h"
13
#include "protocol_yfs.h"
14
15
static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
16
static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
17
18
struct afs_endpoint_state *afs_get_endpoint_state(struct afs_endpoint_state *estate,
19
enum afs_estate_trace where)
20
{
21
if (estate) {
22
int r;
23
24
__refcount_inc(&estate->ref, &r);
25
trace_afs_estate(estate->server_id, estate->probe_seq, r, where);
26
}
27
return estate;
28
}
29
30
static void afs_endpoint_state_rcu(struct rcu_head *rcu)
31
{
32
struct afs_endpoint_state *estate = container_of(rcu, struct afs_endpoint_state, rcu);
33
34
trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
35
afs_estate_trace_free);
36
afs_put_addrlist(estate->addresses, afs_alist_trace_put_estate);
37
kfree(estate);
38
}
39
40
void afs_put_endpoint_state(struct afs_endpoint_state *estate, enum afs_estate_trace where)
41
{
42
if (estate) {
43
unsigned int server_id = estate->server_id, probe_seq = estate->probe_seq;
44
bool dead;
45
int r;
46
47
dead = __refcount_dec_and_test(&estate->ref, &r);
48
trace_afs_estate(server_id, probe_seq, r, where);
49
if (dead)
50
call_rcu(&estate->rcu, afs_endpoint_state_rcu);
51
}
52
}
53
54
/*
55
* Start the probe polling timer. We have to supply it with an inc on the
56
* outstanding server count.
57
*/
58
static void afs_schedule_fs_probe(struct afs_net *net,
59
struct afs_server *server, bool fast)
60
{
61
unsigned long atj;
62
63
if (!net->live)
64
return;
65
66
atj = server->probed_at;
67
atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
68
69
afs_inc_servers_outstanding(net);
70
if (timer_reduce(&net->fs_probe_timer, atj))
71
afs_dec_servers_outstanding(net);
72
}
73
74
/*
75
* Handle the completion of a set of probes.
76
*/
77
static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server,
78
struct afs_endpoint_state *estate)
79
{
80
bool responded = test_bit(AFS_ESTATE_RESPONDED, &estate->flags);
81
82
write_seqlock(&net->fs_lock);
83
if (responded) {
84
list_add_tail(&server->probe_link, &net->fs_probe_slow);
85
} else {
86
server->rtt = UINT_MAX;
87
clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
88
list_add_tail(&server->probe_link, &net->fs_probe_fast);
89
}
90
91
write_sequnlock(&net->fs_lock);
92
93
afs_schedule_fs_probe(net, server, !responded);
94
}
95
96
/*
97
* Handle the completion of a probe.
98
*/
99
static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server,
100
struct afs_endpoint_state *estate)
101
{
102
_enter("");
103
104
if (atomic_dec_and_test(&estate->nr_probing))
105
afs_finished_fs_probe(net, server, estate);
106
107
wake_up_all(&server->probe_wq);
108
}
109
110
/*
111
* Handle inability to send a probe due to ENOMEM when trying to allocate a
112
* call struct.
113
*/
114
static void afs_fs_probe_not_done(struct afs_net *net,
115
struct afs_server *server,
116
struct afs_endpoint_state *estate,
117
int index)
118
{
119
_enter("");
120
121
trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
122
spin_lock(&server->probe_lock);
123
124
set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
125
if (estate->error == 0)
126
estate->error = -ENOMEM;
127
128
set_bit(index, &estate->failed_set);
129
130
spin_unlock(&server->probe_lock);
131
return afs_done_one_fs_probe(net, server, estate);
132
}
133
134
/*
135
* Process the result of probing a fileserver. This is called after successful
136
* or failed delivery of an FS.GetCapabilities operation.
137
*/
138
void afs_fileserver_probe_result(struct afs_call *call)
139
{
140
struct afs_endpoint_state *estate = call->probe;
141
struct afs_addr_list *alist = estate->addresses;
142
struct afs_address *addr = &alist->addrs[call->probe_index];
143
struct afs_server *server = call->server;
144
unsigned int index = call->probe_index;
145
unsigned int rtt_us = -1, cap0;
146
int ret = call->error;
147
148
_enter("%pU,%u", &server->uuid, index);
149
150
WRITE_ONCE(addr->last_error, ret);
151
152
spin_lock(&server->probe_lock);
153
154
switch (ret) {
155
case 0:
156
estate->error = 0;
157
goto responded;
158
case -ECONNABORTED:
159
if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) {
160
estate->abort_code = call->abort_code;
161
estate->error = ret;
162
}
163
goto responded;
164
case -ENOMEM:
165
case -ENONET:
166
clear_bit(index, &estate->responsive_set);
167
set_bit(AFS_ESTATE_LOCAL_FAILURE, &estate->flags);
168
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
169
goto out;
170
case -ECONNRESET: /* Responded, but call expired. */
171
case -ERFKILL:
172
case -EADDRNOTAVAIL:
173
case -ENETUNREACH:
174
case -EHOSTUNREACH:
175
case -EHOSTDOWN:
176
case -ECONNREFUSED:
177
case -ETIMEDOUT:
178
case -ETIME:
179
default:
180
clear_bit(index, &estate->responsive_set);
181
set_bit(index, &estate->failed_set);
182
if (!test_bit(AFS_ESTATE_RESPONDED, &estate->flags) &&
183
(estate->error == 0 ||
184
estate->error == -ETIMEDOUT ||
185
estate->error == -ETIME))
186
estate->error = ret;
187
trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
188
goto out;
189
}
190
191
responded:
192
clear_bit(index, &estate->failed_set);
193
194
if (call->service_id == YFS_FS_SERVICE) {
195
set_bit(AFS_ESTATE_IS_YFS, &estate->flags);
196
set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
197
server->service_id = call->service_id;
198
} else {
199
set_bit(AFS_ESTATE_NOT_YFS, &estate->flags);
200
if (!test_bit(AFS_ESTATE_IS_YFS, &estate->flags)) {
201
clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
202
server->service_id = call->service_id;
203
}
204
cap0 = ntohl(call->tmp);
205
if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
206
set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
207
else
208
clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
209
}
210
211
rtt_us = rxrpc_kernel_get_srtt(addr->peer);
212
if (rtt_us < estate->rtt) {
213
estate->rtt = rtt_us;
214
server->rtt = rtt_us;
215
alist->preferred = index;
216
}
217
218
smp_wmb(); /* Set rtt before responded. */
219
set_bit(AFS_ESTATE_RESPONDED, &estate->flags);
220
set_bit(index, &estate->responsive_set);
221
set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
222
out:
223
spin_unlock(&server->probe_lock);
224
225
trace_afs_fs_probe(server, false, estate, index, call->error, call->abort_code, rtt_us);
226
_debug("probe[%x] %pU [%u] %pISpc rtt=%d ret=%d",
227
estate->probe_seq, &server->uuid, index,
228
rxrpc_kernel_remote_addr(alist->addrs[index].peer),
229
rtt_us, ret);
230
231
return afs_done_one_fs_probe(call->net, server, estate);
232
}
233
234
/*
235
* Probe all of a fileserver's addresses to find out the best route and to
236
* query its capabilities.
237
*/
238
int afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
239
struct afs_addr_list *new_alist, struct key *key)
240
{
241
struct afs_endpoint_state *estate, *old;
242
struct afs_addr_list *old_alist = NULL, *alist;
243
unsigned long unprobed;
244
245
_enter("%pU", &server->uuid);
246
247
estate = kzalloc(sizeof(*estate), GFP_KERNEL);
248
if (!estate)
249
return -ENOMEM;
250
251
refcount_set(&estate->ref, 2);
252
estate->server_id = server->debug_id;
253
estate->rtt = UINT_MAX;
254
255
write_lock(&server->fs_lock);
256
257
old = rcu_dereference_protected(server->endpoint_state,
258
lockdep_is_held(&server->fs_lock));
259
if (old) {
260
estate->responsive_set = old->responsive_set;
261
if (!new_alist)
262
new_alist = old->addresses;
263
}
264
265
if (old_alist != new_alist)
266
afs_set_peer_appdata(server, old_alist, new_alist);
267
268
estate->addresses = afs_get_addrlist(new_alist, afs_alist_trace_get_estate);
269
alist = estate->addresses;
270
estate->probe_seq = ++server->probe_counter;
271
atomic_set(&estate->nr_probing, alist->nr_addrs);
272
273
if (new_alist)
274
server->addr_version = new_alist->version;
275
rcu_assign_pointer(server->endpoint_state, estate);
276
write_unlock(&server->fs_lock);
277
if (old)
278
set_bit(AFS_ESTATE_SUPERSEDED, &old->flags);
279
280
trace_afs_estate(estate->server_id, estate->probe_seq, refcount_read(&estate->ref),
281
afs_estate_trace_alloc_probe);
282
283
afs_get_address_preferences(net, new_alist);
284
285
server->probed_at = jiffies;
286
unprobed = (1UL << alist->nr_addrs) - 1;
287
while (unprobed) {
288
unsigned int index = 0, i;
289
int best_prio = -1;
290
291
for (i = 0; i < alist->nr_addrs; i++) {
292
if (test_bit(i, &unprobed) &&
293
alist->addrs[i].prio > best_prio) {
294
index = i;
295
best_prio = alist->addrs[i].prio;
296
}
297
}
298
__clear_bit(index, &unprobed);
299
300
trace_afs_fs_probe(server, true, estate, index, 0, 0, 0);
301
if (!afs_fs_get_capabilities(net, server, estate, index, key))
302
afs_fs_probe_not_done(net, server, estate, index);
303
}
304
305
afs_put_endpoint_state(old, afs_estate_trace_put_probe);
306
afs_put_endpoint_state(estate, afs_estate_trace_put_probe);
307
return 0;
308
}
309
310
/*
311
* Wait for the first as-yet untried fileserver to respond, for the probe state
312
* to be superseded or for all probes to finish.
313
*/
314
int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr)
315
{
316
struct afs_endpoint_state *estate;
317
struct afs_server_list *slist = op->server_list;
318
bool still_probing = true;
319
int ret = 0, i;
320
321
_enter("%u", slist->nr_servers);
322
323
for (i = 0; i < slist->nr_servers; i++) {
324
estate = states[i].endpoint_state;
325
if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
326
return 2;
327
if (atomic_read(&estate->nr_probing))
328
still_probing = true;
329
if (estate->responsive_set & states[i].untried_addrs)
330
return 1;
331
}
332
if (!still_probing)
333
return 0;
334
335
for (i = 0; i < slist->nr_servers; i++)
336
add_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
337
338
for (;;) {
339
still_probing = false;
340
341
set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
342
for (i = 0; i < slist->nr_servers; i++) {
343
estate = states[i].endpoint_state;
344
if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) {
345
ret = 2;
346
goto stop;
347
}
348
if (atomic_read(&estate->nr_probing))
349
still_probing = true;
350
if (estate->responsive_set & states[i].untried_addrs) {
351
ret = 1;
352
goto stop;
353
}
354
}
355
356
if (!still_probing || signal_pending(current))
357
goto stop;
358
schedule();
359
}
360
361
stop:
362
set_current_state(TASK_RUNNING);
363
364
for (i = 0; i < slist->nr_servers; i++)
365
remove_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter);
366
367
if (!ret && signal_pending(current))
368
ret = -ERESTARTSYS;
369
return ret;
370
}
371
372
/*
373
* Probe timer. We have an increment on fs_outstanding that we need to pass
374
* along to the work item.
375
*/
376
void afs_fs_probe_timer(struct timer_list *timer)
377
{
378
struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
379
380
if (!net->live || !queue_work(afs_wq, &net->fs_prober))
381
afs_dec_servers_outstanding(net);
382
}
383
384
/*
385
* Dispatch a probe to a server.
386
*/
387
static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server)
388
__releases(&net->fs_lock)
389
{
390
struct key *key = NULL;
391
392
/* We remove it from the queues here - it will be added back to
393
* one of the queues on the completion of the probe.
394
*/
395
list_del_init(&server->probe_link);
396
397
afs_get_server(server, afs_server_trace_get_probe);
398
write_sequnlock(&net->fs_lock);
399
400
afs_fs_probe_fileserver(net, server, NULL, key);
401
afs_put_server(net, server, afs_server_trace_put_probe);
402
}
403
404
/*
405
* Probe a server immediately without waiting for its due time to come
406
* round. This is used when all of the addresses have been tried.
407
*/
408
void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
409
{
410
write_seqlock(&net->fs_lock);
411
if (!list_empty(&server->probe_link))
412
return afs_dispatch_fs_probe(net, server);
413
write_sequnlock(&net->fs_lock);
414
}
415
416
/*
417
* Probe dispatcher to regularly dispatch probes to keep NAT alive.
418
*/
419
void afs_fs_probe_dispatcher(struct work_struct *work)
420
{
421
struct afs_net *net = container_of(work, struct afs_net, fs_prober);
422
struct afs_server *fast, *slow, *server;
423
unsigned long nowj, timer_at, poll_at;
424
bool first_pass = true, set_timer = false;
425
426
if (!net->live) {
427
afs_dec_servers_outstanding(net);
428
return;
429
}
430
431
_enter("");
432
433
if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
434
afs_dec_servers_outstanding(net);
435
_leave(" [none]");
436
return;
437
}
438
439
again:
440
write_seqlock(&net->fs_lock);
441
442
fast = slow = server = NULL;
443
nowj = jiffies;
444
timer_at = nowj + MAX_JIFFY_OFFSET;
445
446
if (!list_empty(&net->fs_probe_fast)) {
447
fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
448
poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
449
if (time_before(nowj, poll_at)) {
450
timer_at = poll_at;
451
set_timer = true;
452
fast = NULL;
453
}
454
}
455
456
if (!list_empty(&net->fs_probe_slow)) {
457
slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
458
poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
459
if (time_before(nowj, poll_at)) {
460
if (time_before(poll_at, timer_at))
461
timer_at = poll_at;
462
set_timer = true;
463
slow = NULL;
464
}
465
}
466
467
server = fast ?: slow;
468
if (server)
469
_debug("probe %pU", &server->uuid);
470
471
if (server && (first_pass || !need_resched())) {
472
afs_dispatch_fs_probe(net, server);
473
first_pass = false;
474
goto again;
475
}
476
477
write_sequnlock(&net->fs_lock);
478
479
if (server) {
480
if (!queue_work(afs_wq, &net->fs_prober))
481
afs_dec_servers_outstanding(net);
482
_leave(" [requeue]");
483
} else if (set_timer) {
484
if (timer_reduce(&net->fs_probe_timer, timer_at))
485
afs_dec_servers_outstanding(net);
486
_leave(" [timer]");
487
} else {
488
afs_dec_servers_outstanding(net);
489
_leave(" [quiesce]");
490
}
491
}
492
493
/*
494
* Wait for a probe on a particular fileserver to complete for 2s.
495
*/
496
int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate,
497
unsigned long exclude, bool is_intr)
498
{
499
struct wait_queue_entry wait;
500
unsigned long timo = 2 * HZ;
501
502
if (atomic_read(&estate->nr_probing) == 0)
503
goto dont_wait;
504
505
init_wait_entry(&wait, 0);
506
for (;;) {
507
prepare_to_wait_event(&server->probe_wq, &wait,
508
is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
509
if (timo == 0 ||
510
test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags) ||
511
(estate->responsive_set & ~exclude) ||
512
atomic_read(&estate->nr_probing) == 0 ||
513
(is_intr && signal_pending(current)))
514
break;
515
timo = schedule_timeout(timo);
516
}
517
518
finish_wait(&server->probe_wq, &wait);
519
520
dont_wait:
521
if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags))
522
return 0;
523
if (estate->responsive_set & ~exclude)
524
return 1;
525
if (is_intr && signal_pending(current))
526
return -ERESTARTSYS;
527
if (timo == 0)
528
return -ETIME;
529
return -EDESTADDRREQ;
530
}
531
532
/*
533
* Clean up the probing when the namespace is killed off.
534
*/
535
void afs_fs_probe_cleanup(struct afs_net *net)
536
{
537
if (timer_delete_sync(&net->fs_probe_timer))
538
afs_dec_servers_outstanding(net);
539
}
540
541