Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/afs/vl_rotate.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/* Handle vlserver selection and rotation.
3
*
4
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
5
* Written by David Howells ([email protected])
6
*/
7
8
#include <linux/kernel.h>
9
#include <linux/sched.h>
10
#include <linux/sched/signal.h>
11
#include "internal.h"
12
#include "afs_vl.h"
13
14
/*
15
* Begin an operation on a volume location server.
16
*/
17
bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
18
struct key *key)
19
{
20
static atomic_t debug_ids;
21
22
memset(vc, 0, sizeof(*vc));
23
vc->cell = cell;
24
vc->key = key;
25
vc->cumul_error.error = -EDESTADDRREQ;
26
vc->nr_iterations = -1;
27
28
if (signal_pending(current)) {
29
vc->cumul_error.error = -EINTR;
30
vc->flags |= AFS_VL_CURSOR_STOP;
31
return false;
32
}
33
34
vc->debug_id = atomic_inc_return(&debug_ids);
35
return true;
36
}
37
38
/*
39
* Begin iteration through a server list, starting with the last used server if
40
* possible, or the last recorded good server if not.
41
*/
42
static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
43
{
44
struct afs_cell *cell = vc->cell;
45
unsigned int dns_lookup_count;
46
47
if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
48
cell->dns_expiry <= ktime_get_real_seconds()) {
49
dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
50
set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
51
afs_queue_cell(cell, afs_cell_trace_queue_dns);
52
53
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
54
if (wait_var_event_interruptible(
55
&cell->dns_lookup_count,
56
smp_load_acquire(&cell->dns_lookup_count)
57
!= dns_lookup_count) < 0) {
58
vc->cumul_error.error = -ERESTARTSYS;
59
return false;
60
}
61
}
62
63
/* Status load is ordered after lookup counter load */
64
if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
65
pr_warn("No record of cell %s\n", cell->name);
66
vc->cumul_error.error = -ENOENT;
67
return false;
68
}
69
70
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
71
vc->cumul_error.error = -EDESTADDRREQ;
72
return false;
73
}
74
}
75
76
read_lock(&cell->vl_servers_lock);
77
vc->server_list = afs_get_vlserverlist(
78
rcu_dereference_protected(cell->vl_servers,
79
lockdep_is_held(&cell->vl_servers_lock)));
80
read_unlock(&cell->vl_servers_lock);
81
if (!vc->server_list->nr_servers)
82
return false;
83
84
vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1;
85
vc->server_index = -1;
86
return true;
87
}
88
89
/*
90
* Select the vlserver to use. May be called multiple times to rotate
91
* through the vlservers.
92
*/
93
bool afs_select_vlserver(struct afs_vl_cursor *vc)
94
{
95
struct afs_addr_list *alist = vc->alist;
96
struct afs_vlserver *vlserver;
97
unsigned long set, failed;
98
unsigned int rtt;
99
s32 abort_code = vc->call_abort_code;
100
int error = vc->call_error, i;
101
102
vc->nr_iterations++;
103
104
_enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
105
vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers,
106
vc->addr_index, vc->addr_tried,
107
error, abort_code);
108
109
if (vc->flags & AFS_VL_CURSOR_STOP) {
110
_leave(" = f [stopped]");
111
return false;
112
}
113
114
if (vc->nr_iterations == 0)
115
goto start;
116
117
WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error);
118
119
/* Evaluate the result of the previous operation, if there was one. */
120
switch (error) {
121
default:
122
case 0:
123
/* Success or local failure. Stop. */
124
vc->cumul_error.error = error;
125
vc->flags |= AFS_VL_CURSOR_STOP;
126
_leave(" = f [okay/local %d]", vc->cumul_error.error);
127
return false;
128
129
case -ECONNABORTED:
130
/* The far side rejected the operation on some grounds. This
131
* might involve the server being busy or the volume having been moved.
132
*/
133
switch (abort_code) {
134
case AFSVL_IO:
135
case AFSVL_BADVOLOPER:
136
case AFSVL_NOMEM:
137
/* The server went weird. */
138
afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code);
139
//write_lock(&vc->cell->vl_servers_lock);
140
//vc->server_list->weird_mask |= 1 << vc->server_index;
141
//write_unlock(&vc->cell->vl_servers_lock);
142
goto next_server;
143
144
default:
145
afs_prioritise_error(&vc->cumul_error, error, abort_code);
146
goto failed;
147
}
148
149
case -ERFKILL:
150
case -EADDRNOTAVAIL:
151
case -ENETUNREACH:
152
case -EHOSTUNREACH:
153
case -EHOSTDOWN:
154
case -ECONNREFUSED:
155
case -ETIMEDOUT:
156
case -ETIME:
157
_debug("no conn %d", error);
158
afs_prioritise_error(&vc->cumul_error, error, 0);
159
goto iterate_address;
160
161
case -ECONNRESET:
162
_debug("call reset");
163
afs_prioritise_error(&vc->cumul_error, error, 0);
164
vc->flags |= AFS_VL_CURSOR_RETRY;
165
goto next_server;
166
167
case -EOPNOTSUPP:
168
_debug("notsupp");
169
goto next_server;
170
}
171
172
restart_from_beginning:
173
_debug("restart");
174
if (vc->call_responded &&
175
vc->addr_index != vc->alist->preferred &&
176
test_bit(alist->preferred, &vc->addr_tried))
177
WRITE_ONCE(alist->preferred, vc->addr_index);
178
afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart);
179
alist = vc->alist = NULL;
180
181
afs_put_vlserverlist(vc->cell->net, vc->server_list);
182
vc->server_list = NULL;
183
if (vc->flags & AFS_VL_CURSOR_RETRIED)
184
goto failed;
185
vc->flags |= AFS_VL_CURSOR_RETRIED;
186
start:
187
_debug("start");
188
ASSERTCMP(alist, ==, NULL);
189
190
if (!afs_start_vl_iteration(vc))
191
goto failed;
192
193
error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
194
if (error < 0) {
195
afs_prioritise_error(&vc->cumul_error, error, 0);
196
goto failed;
197
}
198
199
pick_server:
200
_debug("pick [%lx]", vc->untried_servers);
201
ASSERTCMP(alist, ==, NULL);
202
203
error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers);
204
if (error < 0) {
205
afs_prioritise_error(&vc->cumul_error, error, 0);
206
goto failed;
207
}
208
209
/* Pick the untried server with the lowest RTT. */
210
vc->server_index = vc->server_list->preferred;
211
if (test_bit(vc->server_index, &vc->untried_servers))
212
goto selected_server;
213
214
vc->server_index = -1;
215
rtt = UINT_MAX;
216
for (i = 0; i < vc->server_list->nr_servers; i++) {
217
struct afs_vlserver *s = vc->server_list->servers[i].server;
218
219
if (!test_bit(i, &vc->untried_servers) ||
220
!test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
221
continue;
222
if (s->probe.rtt <= rtt) {
223
vc->server_index = i;
224
rtt = s->probe.rtt;
225
}
226
}
227
228
if (vc->server_index == -1)
229
goto no_more_servers;
230
231
selected_server:
232
_debug("use %d", vc->server_index);
233
__clear_bit(vc->server_index, &vc->untried_servers);
234
235
/* We're starting on a different vlserver from the list. We need to
236
* check it, find its address list and probe its capabilities before we
237
* use it.
238
*/
239
vlserver = vc->server_list->servers[vc->server_index].server;
240
vc->server = vlserver;
241
242
_debug("USING VLSERVER: %s", vlserver->name);
243
244
read_lock(&vlserver->lock);
245
alist = rcu_dereference_protected(vlserver->addresses,
246
lockdep_is_held(&vlserver->lock));
247
vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set);
248
read_unlock(&vlserver->lock);
249
250
vc->addr_tried = 0;
251
vc->addr_index = -1;
252
253
iterate_address:
254
/* Iterate over the current server's address list to try and find an
255
* address on which it will respond to us.
256
*/
257
set = READ_ONCE(alist->responded);
258
failed = READ_ONCE(alist->probe_failed);
259
vc->addr_index = READ_ONCE(alist->preferred);
260
261
_debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index);
262
263
set &= ~(failed | vc->addr_tried);
264
265
if (!set)
266
goto next_server;
267
268
if (!test_bit(vc->addr_index, &set))
269
vc->addr_index = __ffs(set);
270
271
set_bit(vc->addr_index, &vc->addr_tried);
272
vc->alist = alist;
273
274
_debug("VL address %d/%d", vc->addr_index, alist->nr_addrs);
275
276
vc->call_responded = false;
277
_leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer));
278
return true;
279
280
next_server:
281
_debug("next");
282
ASSERT(alist);
283
if (vc->call_responded &&
284
vc->addr_index != alist->preferred &&
285
test_bit(alist->preferred, &vc->addr_tried))
286
WRITE_ONCE(alist->preferred, vc->addr_index);
287
afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next);
288
alist = vc->alist = NULL;
289
goto pick_server;
290
291
no_more_servers:
292
/* That's all the servers poked to no good effect. Try again if some
293
* of them were busy.
294
*/
295
if (vc->flags & AFS_VL_CURSOR_RETRY)
296
goto restart_from_beginning;
297
298
for (i = 0; i < vc->server_list->nr_servers; i++) {
299
struct afs_vlserver *s = vc->server_list->servers[i].server;
300
301
if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
302
vc->cumul_error.responded = true;
303
afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error),
304
s->probe.abort_code);
305
}
306
307
failed:
308
if (alist) {
309
if (vc->call_responded &&
310
vc->addr_index != alist->preferred &&
311
test_bit(alist->preferred, &vc->addr_tried))
312
WRITE_ONCE(alist->preferred, vc->addr_index);
313
afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail);
314
alist = vc->alist = NULL;
315
}
316
vc->flags |= AFS_VL_CURSOR_STOP;
317
_leave(" = f [failed %d]", vc->cumul_error.error);
318
return false;
319
}
320
321
/*
322
* Dump cursor state in the case of the error being EDESTADDRREQ.
323
*/
324
static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
325
{
326
struct afs_cell *cell = vc->cell;
327
static int count;
328
int i;
329
330
if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
331
return;
332
count++;
333
334
rcu_read_lock();
335
pr_notice("EDESTADDR occurred\n");
336
pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
337
pr_notice("DNS: src=%u st=%u lc=%x\n",
338
cell->dns_source, cell->dns_status, cell->dns_lookup_count);
339
pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
340
vc->untried_servers, vc->server_index, vc->nr_iterations,
341
vc->flags, vc->cumul_error.error);
342
pr_notice("VC: call er=%d ac=%d r=%u\n",
343
vc->call_error, vc->call_abort_code, vc->call_responded);
344
345
if (vc->server_list) {
346
const struct afs_vlserver_list *sl = vc->server_list;
347
pr_notice("VC: SL nr=%u ix=%u\n",
348
sl->nr_servers, sl->index);
349
for (i = 0; i < sl->nr_servers; i++) {
350
const struct afs_vlserver *s = sl->servers[i].server;
351
pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
352
s->name, s->port, s->flags, s->probe.error);
353
if (s->addresses) {
354
const struct afs_addr_list *a =
355
rcu_dereference(s->addresses);
356
pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
357
a->nr_ipv4, a->nr_addrs, a->max_addrs,
358
a->preferred);
359
pr_notice("VC: - R=%lx F=%lx\n",
360
a->responded, a->probe_failed);
361
if (a == vc->alist)
362
pr_notice("VC: - current\n");
363
}
364
}
365
}
366
367
pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index);
368
rcu_read_unlock();
369
}
370
371
/*
372
* Tidy up a volume location server cursor and unlock the vnode.
373
*/
374
int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
375
{
376
struct afs_net *net = vc->cell->net;
377
378
_enter("VC=%x+%x", vc->debug_id, vc->nr_iterations);
379
380
switch (vc->cumul_error.error) {
381
case -EDESTADDRREQ:
382
case -EADDRNOTAVAIL:
383
case -ENETUNREACH:
384
case -EHOSTUNREACH:
385
afs_vl_dump_edestaddrreq(vc);
386
break;
387
}
388
389
if (vc->alist) {
390
if (vc->call_responded &&
391
vc->addr_index != vc->alist->preferred &&
392
test_bit(vc->alist->preferred, &vc->addr_tried))
393
WRITE_ONCE(vc->alist->preferred, vc->addr_index);
394
afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end);
395
vc->alist = NULL;
396
}
397
afs_put_vlserverlist(net, vc->server_list);
398
return vc->cumul_error.error;
399
}
400
401