Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/block/aoe/aoedev.c
26282 views
1
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
2
/*
3
* aoedev.c
4
* AoE device utility functions; maintains device list.
5
*/
6
7
#include <linux/hdreg.h>
8
#include <linux/blk-mq.h>
9
#include <linux/netdevice.h>
10
#include <linux/delay.h>
11
#include <linux/slab.h>
12
#include <linux/bitmap.h>
13
#include <linux/kdev_t.h>
14
#include <linux/moduleparam.h>
15
#include <linux/string.h>
16
#include "aoe.h"
17
18
static void freetgt(struct aoedev *d, struct aoetgt *t);
19
static void skbpoolfree(struct aoedev *d);
20
21
static int aoe_dyndevs = 1;
22
module_param(aoe_dyndevs, int, 0644);
23
MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
24
25
static struct aoedev *devlist;
26
static DEFINE_SPINLOCK(devlist_lock);
27
28
/* Because some systems will have one, many, or no
29
* - partitions,
30
* - slots per shelf,
31
* - or shelves,
32
* we need some flexibility in the way the minor numbers
33
* are allocated. So they are dynamic.
34
*/
35
#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
36
37
static DEFINE_SPINLOCK(used_minors_lock);
38
static DECLARE_BITMAP(used_minors, N_DEVS);
39
40
static int
41
minor_get_dyn(ulong *sysminor)
42
{
43
ulong flags;
44
ulong n;
45
int error = 0;
46
47
spin_lock_irqsave(&used_minors_lock, flags);
48
n = find_first_zero_bit(used_minors, N_DEVS);
49
if (n < N_DEVS)
50
set_bit(n, used_minors);
51
else
52
error = -1;
53
spin_unlock_irqrestore(&used_minors_lock, flags);
54
55
*sysminor = n * AOE_PARTITIONS;
56
return error;
57
}
58
59
static int
60
minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
61
{
62
ulong flags;
63
ulong n;
64
int error = 0;
65
enum {
66
/* for backwards compatibility when !aoe_dyndevs,
67
* a static number of supported slots per shelf */
68
NPERSHELF = 16,
69
};
70
71
if (aoemin >= NPERSHELF) {
72
pr_err("aoe: %s %d slots per shelf\n",
73
"static minor device numbers support only",
74
NPERSHELF);
75
error = -1;
76
goto out;
77
}
78
79
n = aoemaj * NPERSHELF + aoemin;
80
if (n >= N_DEVS) {
81
pr_err("aoe: %s with e%ld.%d\n",
82
"cannot use static minor device numbers",
83
aoemaj, aoemin);
84
error = -1;
85
goto out;
86
}
87
88
spin_lock_irqsave(&used_minors_lock, flags);
89
if (test_bit(n, used_minors)) {
90
pr_err("aoe: %s %lu\n",
91
"existing device already has static minor number",
92
n);
93
error = -1;
94
} else
95
set_bit(n, used_minors);
96
spin_unlock_irqrestore(&used_minors_lock, flags);
97
*sysminor = n * AOE_PARTITIONS;
98
out:
99
return error;
100
}
101
102
static int
103
minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
104
{
105
if (aoe_dyndevs)
106
return minor_get_dyn(sysminor);
107
else
108
return minor_get_static(sysminor, aoemaj, aoemin);
109
}
110
111
static void
112
minor_free(ulong minor)
113
{
114
ulong flags;
115
116
minor /= AOE_PARTITIONS;
117
BUG_ON(minor >= N_DEVS);
118
119
spin_lock_irqsave(&used_minors_lock, flags);
120
BUG_ON(!test_bit(minor, used_minors));
121
clear_bit(minor, used_minors);
122
spin_unlock_irqrestore(&used_minors_lock, flags);
123
}
124
125
/*
126
* Users who grab a pointer to the device with aoedev_by_aoeaddr
127
* automatically get a reference count and must be responsible
128
* for performing a aoedev_put. With the addition of async
129
* kthread processing I'm no longer confident that we can
130
* guarantee consistency in the face of device flushes.
131
*
132
* For the time being, we only bother to add extra references for
133
* frames sitting on the iocq. When the kthreads finish processing
134
* these frames, they will aoedev_put the device.
135
*/
136
137
void
138
aoedev_put(struct aoedev *d)
139
{
140
ulong flags;
141
142
spin_lock_irqsave(&devlist_lock, flags);
143
d->ref--;
144
spin_unlock_irqrestore(&devlist_lock, flags);
145
}
146
147
static void
148
dummy_timer(struct timer_list *t)
149
{
150
struct aoedev *d;
151
152
d = timer_container_of(d, t, timer);
153
if (d->flags & DEVFL_TKILL)
154
return;
155
d->timer.expires = jiffies + HZ;
156
add_timer(&d->timer);
157
}
158
159
static void
160
aoe_failip(struct aoedev *d)
161
{
162
struct request *rq;
163
struct aoe_req *req;
164
struct bio *bio;
165
166
aoe_failbuf(d, d->ip.buf);
167
rq = d->ip.rq;
168
if (rq == NULL)
169
return;
170
171
req = blk_mq_rq_to_pdu(rq);
172
while ((bio = d->ip.nxbio)) {
173
bio->bi_status = BLK_STS_IOERR;
174
d->ip.nxbio = bio->bi_next;
175
req->nr_bios--;
176
}
177
178
if (!req->nr_bios)
179
aoe_end_request(d, rq, 0);
180
}
181
182
static void
183
downdev_frame(struct list_head *pos)
184
{
185
struct frame *f;
186
187
f = list_entry(pos, struct frame, head);
188
list_del(pos);
189
if (f->buf) {
190
f->buf->nframesout--;
191
aoe_failbuf(f->t->d, f->buf);
192
}
193
aoe_freetframe(f);
194
}
195
196
void
197
aoedev_downdev(struct aoedev *d)
198
{
199
struct aoetgt *t, **tt, **te;
200
struct list_head *head, *pos, *nx;
201
struct request *rq, *rqnext;
202
int i;
203
unsigned long flags;
204
205
spin_lock_irqsave(&d->lock, flags);
206
d->flags &= ~(DEVFL_UP | DEVFL_DEAD);
207
spin_unlock_irqrestore(&d->lock, flags);
208
209
/* clean out active and to-be-retransmitted buffers */
210
for (i = 0; i < NFACTIVE; i++) {
211
head = &d->factive[i];
212
list_for_each_safe(pos, nx, head)
213
downdev_frame(pos);
214
}
215
head = &d->rexmitq;
216
list_for_each_safe(pos, nx, head)
217
downdev_frame(pos);
218
219
/* reset window dressings */
220
tt = d->targets;
221
te = tt + d->ntargets;
222
for (; tt < te && (t = *tt); tt++) {
223
aoecmd_wreset(t);
224
t->nout = 0;
225
}
226
227
/* clean out the in-process request (if any) */
228
aoe_failip(d);
229
230
/* clean out any queued block requests */
231
list_for_each_entry_safe(rq, rqnext, &d->rq_list, queuelist) {
232
list_del_init(&rq->queuelist);
233
blk_mq_start_request(rq);
234
blk_mq_end_request(rq, BLK_STS_IOERR);
235
}
236
237
/* fast fail all pending I/O */
238
if (d->blkq) {
239
/* UP is cleared, freeze+quiesce to insure all are errored */
240
unsigned int memflags = blk_mq_freeze_queue(d->blkq);
241
242
blk_mq_quiesce_queue(d->blkq);
243
blk_mq_unquiesce_queue(d->blkq);
244
blk_mq_unfreeze_queue(d->blkq, memflags);
245
}
246
247
if (d->gd)
248
set_capacity(d->gd, 0);
249
}
250
251
/* return whether the user asked for this particular
252
* device to be flushed
253
*/
254
static int
255
user_req(char *s, size_t slen, struct aoedev *d)
256
{
257
const char *p;
258
size_t lim;
259
260
if (!d->gd)
261
return 0;
262
p = kbasename(d->gd->disk_name);
263
lim = sizeof(d->gd->disk_name);
264
lim -= p - d->gd->disk_name;
265
if (slen < lim)
266
lim = slen;
267
268
return !strncmp(s, p, lim);
269
}
270
271
static void
272
freedev(struct aoedev *d)
273
{
274
struct aoetgt **t, **e;
275
int freeing = 0;
276
unsigned long flags;
277
278
spin_lock_irqsave(&d->lock, flags);
279
if (d->flags & DEVFL_TKILL
280
&& !(d->flags & DEVFL_FREEING)) {
281
d->flags |= DEVFL_FREEING;
282
freeing = 1;
283
}
284
spin_unlock_irqrestore(&d->lock, flags);
285
if (!freeing)
286
return;
287
288
timer_delete_sync(&d->timer);
289
if (d->gd) {
290
aoedisk_rm_debugfs(d);
291
del_gendisk(d->gd);
292
put_disk(d->gd);
293
blk_mq_free_tag_set(&d->tag_set);
294
}
295
t = d->targets;
296
e = t + d->ntargets;
297
for (; t < e && *t; t++)
298
freetgt(d, *t);
299
300
mempool_destroy(d->bufpool);
301
skbpoolfree(d);
302
minor_free(d->sysminor);
303
304
spin_lock_irqsave(&d->lock, flags);
305
d->flags |= DEVFL_FREED;
306
spin_unlock_irqrestore(&d->lock, flags);
307
}
308
309
enum flush_parms {
310
NOT_EXITING = 0,
311
EXITING = 1,
312
};
313
314
static int
315
flush(const char __user *str, size_t cnt, int exiting)
316
{
317
ulong flags;
318
struct aoedev *d, **dd;
319
char buf[16];
320
int all = 0;
321
int specified = 0; /* flush a specific device */
322
unsigned int skipflags;
323
324
skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
325
326
if (!exiting && cnt >= 3) {
327
if (cnt > sizeof buf)
328
cnt = sizeof buf;
329
if (copy_from_user(buf, str, cnt))
330
return -EFAULT;
331
all = !strncmp(buf, "all", 3);
332
if (!all)
333
specified = 1;
334
}
335
336
flush_workqueue(aoe_wq);
337
/* pass one: do aoedev_downdev, which might sleep */
338
restart1:
339
spin_lock_irqsave(&devlist_lock, flags);
340
for (d = devlist; d; d = d->next) {
341
spin_lock(&d->lock);
342
if (d->flags & DEVFL_TKILL)
343
goto cont;
344
345
if (exiting) {
346
/* unconditionally take each device down */
347
} else if (specified) {
348
if (!user_req(buf, cnt, d))
349
goto cont;
350
} else if ((!all && (d->flags & DEVFL_UP))
351
|| d->flags & skipflags
352
|| d->nopen
353
|| d->ref)
354
goto cont;
355
356
spin_unlock(&d->lock);
357
spin_unlock_irqrestore(&devlist_lock, flags);
358
aoedev_downdev(d);
359
d->flags |= DEVFL_TKILL;
360
goto restart1;
361
cont:
362
spin_unlock(&d->lock);
363
}
364
spin_unlock_irqrestore(&devlist_lock, flags);
365
366
/* pass two: call freedev, which might sleep,
367
* for aoedevs marked with DEVFL_TKILL
368
*/
369
restart2:
370
spin_lock_irqsave(&devlist_lock, flags);
371
for (d = devlist; d; d = d->next) {
372
spin_lock(&d->lock);
373
if (d->flags & DEVFL_TKILL
374
&& !(d->flags & DEVFL_FREEING)) {
375
spin_unlock(&d->lock);
376
spin_unlock_irqrestore(&devlist_lock, flags);
377
freedev(d);
378
goto restart2;
379
}
380
spin_unlock(&d->lock);
381
}
382
383
/* pass three: remove aoedevs marked with DEVFL_FREED */
384
for (dd = &devlist, d = *dd; d; d = *dd) {
385
struct aoedev *doomed = NULL;
386
387
spin_lock(&d->lock);
388
if (d->flags & DEVFL_FREED) {
389
*dd = d->next;
390
doomed = d;
391
} else {
392
dd = &d->next;
393
}
394
spin_unlock(&d->lock);
395
if (doomed)
396
kfree(doomed->targets);
397
kfree(doomed);
398
}
399
spin_unlock_irqrestore(&devlist_lock, flags);
400
401
return 0;
402
}
403
404
int
405
aoedev_flush(const char __user *str, size_t cnt)
406
{
407
return flush(str, cnt, NOT_EXITING);
408
}
409
410
/* This has been confirmed to occur once with Tms=3*1000 due to the
411
* driver changing link and not processing its transmit ring. The
412
* problem is hard enough to solve by returning an error that I'm
413
* still punting on "solving" this.
414
*/
415
static void
416
skbfree(struct sk_buff *skb)
417
{
418
enum { Sms = 250, Tms = 30 * 1000};
419
int i = Tms / Sms;
420
421
if (skb == NULL)
422
return;
423
while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
424
msleep(Sms);
425
if (i < 0) {
426
printk(KERN_ERR
427
"aoe: %s holds ref: %s\n",
428
skb->dev ? skb->dev->name : "netif",
429
"cannot free skb -- memory leaked.");
430
return;
431
}
432
skb->truesize -= skb->data_len;
433
skb_shinfo(skb)->nr_frags = skb->data_len = 0;
434
skb_trim(skb, 0);
435
dev_kfree_skb(skb);
436
}
437
438
static void
439
skbpoolfree(struct aoedev *d)
440
{
441
struct sk_buff *skb, *tmp;
442
443
skb_queue_walk_safe(&d->skbpool, skb, tmp)
444
skbfree(skb);
445
446
__skb_queue_head_init(&d->skbpool);
447
}
448
449
/* find it or allocate it */
450
struct aoedev *
451
aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
452
{
453
struct aoedev *d;
454
int i;
455
ulong flags;
456
ulong sysminor = 0;
457
458
spin_lock_irqsave(&devlist_lock, flags);
459
460
for (d=devlist; d; d=d->next)
461
if (d->aoemajor == maj && d->aoeminor == min) {
462
spin_lock(&d->lock);
463
if (d->flags & DEVFL_TKILL) {
464
spin_unlock(&d->lock);
465
d = NULL;
466
goto out;
467
}
468
d->ref++;
469
spin_unlock(&d->lock);
470
break;
471
}
472
if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
473
goto out;
474
d = kcalloc(1, sizeof *d, GFP_ATOMIC);
475
if (!d)
476
goto out;
477
d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
478
if (!d->targets) {
479
kfree(d);
480
d = NULL;
481
goto out;
482
}
483
d->ntargets = NTARGETS;
484
INIT_WORK(&d->work, aoecmd_sleepwork);
485
spin_lock_init(&d->lock);
486
INIT_LIST_HEAD(&d->rq_list);
487
skb_queue_head_init(&d->skbpool);
488
timer_setup(&d->timer, dummy_timer, 0);
489
d->timer.expires = jiffies + HZ;
490
add_timer(&d->timer);
491
d->bufpool = NULL; /* defer to aoeblk_gdalloc */
492
d->tgt = d->targets;
493
d->ref = 1;
494
for (i = 0; i < NFACTIVE; i++)
495
INIT_LIST_HEAD(&d->factive[i]);
496
INIT_LIST_HEAD(&d->rexmitq);
497
d->sysminor = sysminor;
498
d->aoemajor = maj;
499
d->aoeminor = min;
500
d->rttavg = RTTAVG_INIT;
501
d->rttdev = RTTDEV_INIT;
502
d->next = devlist;
503
devlist = d;
504
out:
505
spin_unlock_irqrestore(&devlist_lock, flags);
506
return d;
507
}
508
509
static void
510
freetgt(struct aoedev *d, struct aoetgt *t)
511
{
512
struct frame *f;
513
struct list_head *pos, *nx, *head;
514
struct aoeif *ifp;
515
516
for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
517
if (!ifp->nd)
518
break;
519
dev_put(ifp->nd);
520
}
521
522
head = &t->ffree;
523
list_for_each_safe(pos, nx, head) {
524
list_del(pos);
525
f = list_entry(pos, struct frame, head);
526
skbfree(f->skb);
527
kfree(f);
528
}
529
kfree(t);
530
}
531
532
void
533
aoedev_exit(void)
534
{
535
flush_workqueue(aoe_wq);
536
flush(NULL, 0, EXITING);
537
}
538
539
int __init
540
aoedev_init(void)
541
{
542
return 0;
543
}
544
545