Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/block/drbd/drbd_main.c
15180 views
1
/*
2
drbd.c
3
4
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6
Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7
Copyright (C) 1999-2008, Philipp Reisner <[email protected]>.
8
Copyright (C) 2002-2008, Lars Ellenberg <[email protected]>.
9
10
Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11
from Logicworks, Inc. for making SDP replication support possible.
12
13
drbd is free software; you can redistribute it and/or modify
14
it under the terms of the GNU General Public License as published by
15
the Free Software Foundation; either version 2, or (at your option)
16
any later version.
17
18
drbd is distributed in the hope that it will be useful,
19
but WITHOUT ANY WARRANTY; without even the implied warranty of
20
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
GNU General Public License for more details.
22
23
You should have received a copy of the GNU General Public License
24
along with drbd; see the file COPYING. If not, write to
25
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27
*/
28
29
#include <linux/module.h>
30
#include <linux/drbd.h>
31
#include <asm/uaccess.h>
32
#include <asm/types.h>
33
#include <net/sock.h>
34
#include <linux/ctype.h>
35
#include <linux/mutex.h>
36
#include <linux/fs.h>
37
#include <linux/file.h>
38
#include <linux/proc_fs.h>
39
#include <linux/init.h>
40
#include <linux/mm.h>
41
#include <linux/memcontrol.h>
42
#include <linux/mm_inline.h>
43
#include <linux/slab.h>
44
#include <linux/random.h>
45
#include <linux/reboot.h>
46
#include <linux/notifier.h>
47
#include <linux/kthread.h>
48
49
#define __KERNEL_SYSCALLS__
50
#include <linux/unistd.h>
51
#include <linux/vmalloc.h>
52
53
#include <linux/drbd_limits.h>
54
#include "drbd_int.h"
55
#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57
#include "drbd_vli.h"
58
59
struct after_state_chg_work {
60
struct drbd_work w;
61
union drbd_state os;
62
union drbd_state ns;
63
enum chg_state_flags flags;
64
struct completion *done;
65
};
66
67
static DEFINE_MUTEX(drbd_main_mutex);
68
int drbdd_init(struct drbd_thread *);
69
int drbd_worker(struct drbd_thread *);
70
int drbd_asender(struct drbd_thread *);
71
72
int drbd_init(void);
73
static int drbd_open(struct block_device *bdev, fmode_t mode);
74
static int drbd_release(struct gendisk *gd, fmode_t mode);
75
static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
76
static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
77
union drbd_state ns, enum chg_state_flags flags);
78
static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
79
static void md_sync_timer_fn(unsigned long data);
80
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
81
static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
82
83
MODULE_AUTHOR("Philipp Reisner <[email protected]>, "
84
"Lars Ellenberg <[email protected]>");
85
MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
86
MODULE_VERSION(REL_VERSION);
87
MODULE_LICENSE("GPL");
88
MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
89
__stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
90
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
91
92
#include <linux/moduleparam.h>
93
/* allow_open_on_secondary */
94
MODULE_PARM_DESC(allow_oos, "DONT USE!");
95
/* thanks to these macros, if compiled into the kernel (not-module),
96
* this becomes the boot parameter drbd.minor_count */
97
module_param(minor_count, uint, 0444);
98
module_param(disable_sendpage, bool, 0644);
99
module_param(allow_oos, bool, 0);
100
module_param(cn_idx, uint, 0444);
101
module_param(proc_details, int, 0644);
102
103
#ifdef CONFIG_DRBD_FAULT_INJECTION
104
int enable_faults;
105
int fault_rate;
106
static int fault_count;
107
int fault_devs;
108
/* bitmap of enabled faults */
109
module_param(enable_faults, int, 0664);
110
/* fault rate % value - applies to all enabled faults */
111
module_param(fault_rate, int, 0664);
112
/* count of faults inserted */
113
module_param(fault_count, int, 0664);
114
/* bitmap of devices to insert faults on */
115
module_param(fault_devs, int, 0644);
116
#endif
117
118
/* module parameter, defined */
119
unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
120
int disable_sendpage;
121
int allow_oos;
122
unsigned int cn_idx = CN_IDX_DRBD;
123
int proc_details; /* Detail level in proc drbd*/
124
125
/* Module parameter for setting the user mode helper program
126
* to run. Default is /sbin/drbdadm */
127
char usermode_helper[80] = "/sbin/drbdadm";
128
129
module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
130
131
/* in 2.6.x, our device mapping and config info contains our virtual gendisks
132
* as member "struct gendisk *vdisk;"
133
*/
134
struct drbd_conf **minor_table;
135
136
struct kmem_cache *drbd_request_cache;
137
struct kmem_cache *drbd_ee_cache; /* epoch entries */
138
struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
139
struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
140
mempool_t *drbd_request_mempool;
141
mempool_t *drbd_ee_mempool;
142
143
/* I do not use a standard mempool, because:
144
1) I want to hand out the pre-allocated objects first.
145
2) I want to be able to interrupt sleeping allocation with a signal.
146
Note: This is a single linked list, the next pointer is the private
147
member of struct page.
148
*/
149
struct page *drbd_pp_pool;
150
spinlock_t drbd_pp_lock;
151
int drbd_pp_vacant;
152
wait_queue_head_t drbd_pp_wait;
153
154
DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
155
156
static const struct block_device_operations drbd_ops = {
157
.owner = THIS_MODULE,
158
.open = drbd_open,
159
.release = drbd_release,
160
};
161
162
#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
163
164
#ifdef __CHECKER__
165
/* When checking with sparse, and this is an inline function, sparse will
166
give tons of false positives. When this is a real functions sparse works.
167
*/
168
int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
169
{
170
int io_allowed;
171
172
atomic_inc(&mdev->local_cnt);
173
io_allowed = (mdev->state.disk >= mins);
174
if (!io_allowed) {
175
if (atomic_dec_and_test(&mdev->local_cnt))
176
wake_up(&mdev->misc_wait);
177
}
178
return io_allowed;
179
}
180
181
#endif
182
183
/**
184
* DOC: The transfer log
185
*
186
* The transfer log is a single linked list of &struct drbd_tl_epoch objects.
187
* mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
188
* of the list. There is always at least one &struct drbd_tl_epoch object.
189
*
190
* Each &struct drbd_tl_epoch has a circular double linked list of requests
191
* attached.
192
*/
193
static int tl_init(struct drbd_conf *mdev)
194
{
195
struct drbd_tl_epoch *b;
196
197
/* during device minor initialization, we may well use GFP_KERNEL */
198
b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
199
if (!b)
200
return 0;
201
INIT_LIST_HEAD(&b->requests);
202
INIT_LIST_HEAD(&b->w.list);
203
b->next = NULL;
204
b->br_number = 4711;
205
b->n_writes = 0;
206
b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
207
208
mdev->oldest_tle = b;
209
mdev->newest_tle = b;
210
INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
211
212
mdev->tl_hash = NULL;
213
mdev->tl_hash_s = 0;
214
215
return 1;
216
}
217
218
static void tl_cleanup(struct drbd_conf *mdev)
219
{
220
D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
221
D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
222
kfree(mdev->oldest_tle);
223
mdev->oldest_tle = NULL;
224
kfree(mdev->unused_spare_tle);
225
mdev->unused_spare_tle = NULL;
226
kfree(mdev->tl_hash);
227
mdev->tl_hash = NULL;
228
mdev->tl_hash_s = 0;
229
}
230
231
/**
232
* _tl_add_barrier() - Adds a barrier to the transfer log
233
* @mdev: DRBD device.
234
* @new: Barrier to be added before the current head of the TL.
235
*
236
* The caller must hold the req_lock.
237
*/
238
void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
239
{
240
struct drbd_tl_epoch *newest_before;
241
242
INIT_LIST_HEAD(&new->requests);
243
INIT_LIST_HEAD(&new->w.list);
244
new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
245
new->next = NULL;
246
new->n_writes = 0;
247
248
newest_before = mdev->newest_tle;
249
/* never send a barrier number == 0, because that is special-cased
250
* when using TCQ for our write ordering code */
251
new->br_number = (newest_before->br_number+1) ?: 1;
252
if (mdev->newest_tle != new) {
253
mdev->newest_tle->next = new;
254
mdev->newest_tle = new;
255
}
256
}
257
258
/**
259
* tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
260
* @mdev: DRBD device.
261
* @barrier_nr: Expected identifier of the DRBD write barrier packet.
262
* @set_size: Expected number of requests before that barrier.
263
*
264
* In case the passed barrier_nr or set_size does not match the oldest
265
* &struct drbd_tl_epoch objects this function will cause a termination
266
* of the connection.
267
*/
268
void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
269
unsigned int set_size)
270
{
271
struct drbd_tl_epoch *b, *nob; /* next old barrier */
272
struct list_head *le, *tle;
273
struct drbd_request *r;
274
275
spin_lock_irq(&mdev->req_lock);
276
277
b = mdev->oldest_tle;
278
279
/* first some paranoia code */
280
if (b == NULL) {
281
dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
282
barrier_nr);
283
goto bail;
284
}
285
if (b->br_number != barrier_nr) {
286
dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
287
barrier_nr, b->br_number);
288
goto bail;
289
}
290
if (b->n_writes != set_size) {
291
dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
292
barrier_nr, set_size, b->n_writes);
293
goto bail;
294
}
295
296
/* Clean up list of requests processed during current epoch */
297
list_for_each_safe(le, tle, &b->requests) {
298
r = list_entry(le, struct drbd_request, tl_requests);
299
_req_mod(r, barrier_acked);
300
}
301
/* There could be requests on the list waiting for completion
302
of the write to the local disk. To avoid corruptions of
303
slab's data structures we have to remove the lists head.
304
305
Also there could have been a barrier ack out of sequence, overtaking
306
the write acks - which would be a bug and violating write ordering.
307
To not deadlock in case we lose connection while such requests are
308
still pending, we need some way to find them for the
309
_req_mode(connection_lost_while_pending).
310
311
These have been list_move'd to the out_of_sequence_requests list in
312
_req_mod(, barrier_acked) above.
313
*/
314
list_del_init(&b->requests);
315
316
nob = b->next;
317
if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
318
_tl_add_barrier(mdev, b);
319
if (nob)
320
mdev->oldest_tle = nob;
321
/* if nob == NULL b was the only barrier, and becomes the new
322
barrier. Therefore mdev->oldest_tle points already to b */
323
} else {
324
D_ASSERT(nob != NULL);
325
mdev->oldest_tle = nob;
326
kfree(b);
327
}
328
329
spin_unlock_irq(&mdev->req_lock);
330
dec_ap_pending(mdev);
331
332
return;
333
334
bail:
335
spin_unlock_irq(&mdev->req_lock);
336
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
337
}
338
339
340
/**
341
* _tl_restart() - Walks the transfer log, and applies an action to all requests
342
* @mdev: DRBD device.
343
* @what: The action/event to perform with all request objects
344
*
345
* @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
346
* restart_frozen_disk_io.
347
*/
348
static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
349
{
350
struct drbd_tl_epoch *b, *tmp, **pn;
351
struct list_head *le, *tle, carry_reads;
352
struct drbd_request *req;
353
int rv, n_writes, n_reads;
354
355
b = mdev->oldest_tle;
356
pn = &mdev->oldest_tle;
357
while (b) {
358
n_writes = 0;
359
n_reads = 0;
360
INIT_LIST_HEAD(&carry_reads);
361
list_for_each_safe(le, tle, &b->requests) {
362
req = list_entry(le, struct drbd_request, tl_requests);
363
rv = _req_mod(req, what);
364
365
n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
366
n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
367
}
368
tmp = b->next;
369
370
if (n_writes) {
371
if (what == resend) {
372
b->n_writes = n_writes;
373
if (b->w.cb == NULL) {
374
b->w.cb = w_send_barrier;
375
inc_ap_pending(mdev);
376
set_bit(CREATE_BARRIER, &mdev->flags);
377
}
378
379
drbd_queue_work(&mdev->data.work, &b->w);
380
}
381
pn = &b->next;
382
} else {
383
if (n_reads)
384
list_add(&carry_reads, &b->requests);
385
/* there could still be requests on that ring list,
386
* in case local io is still pending */
387
list_del(&b->requests);
388
389
/* dec_ap_pending corresponding to queue_barrier.
390
* the newest barrier may not have been queued yet,
391
* in which case w.cb is still NULL. */
392
if (b->w.cb != NULL)
393
dec_ap_pending(mdev);
394
395
if (b == mdev->newest_tle) {
396
/* recycle, but reinit! */
397
D_ASSERT(tmp == NULL);
398
INIT_LIST_HEAD(&b->requests);
399
list_splice(&carry_reads, &b->requests);
400
INIT_LIST_HEAD(&b->w.list);
401
b->w.cb = NULL;
402
b->br_number = net_random();
403
b->n_writes = 0;
404
405
*pn = b;
406
break;
407
}
408
*pn = tmp;
409
kfree(b);
410
}
411
b = tmp;
412
list_splice(&carry_reads, &b->requests);
413
}
414
}
415
416
417
/**
418
* tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
419
* @mdev: DRBD device.
420
*
421
* This is called after the connection to the peer was lost. The storage covered
422
* by the requests on the transfer gets marked as our of sync. Called from the
423
* receiver thread and the worker thread.
424
*/
425
void tl_clear(struct drbd_conf *mdev)
426
{
427
struct list_head *le, *tle;
428
struct drbd_request *r;
429
430
spin_lock_irq(&mdev->req_lock);
431
432
_tl_restart(mdev, connection_lost_while_pending);
433
434
/* we expect this list to be empty. */
435
D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
436
437
/* but just in case, clean it up anyways! */
438
list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
439
r = list_entry(le, struct drbd_request, tl_requests);
440
/* It would be nice to complete outside of spinlock.
441
* But this is easier for now. */
442
_req_mod(r, connection_lost_while_pending);
443
}
444
445
/* ensure bit indicating barrier is required is clear */
446
clear_bit(CREATE_BARRIER, &mdev->flags);
447
448
memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
449
450
spin_unlock_irq(&mdev->req_lock);
451
}
452
453
void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
454
{
455
spin_lock_irq(&mdev->req_lock);
456
_tl_restart(mdev, what);
457
spin_unlock_irq(&mdev->req_lock);
458
}
459
460
/**
461
* cl_wide_st_chg() - true if the state change is a cluster wide one
462
* @mdev: DRBD device.
463
* @os: old (current) state.
464
* @ns: new (wanted) state.
465
*/
466
static int cl_wide_st_chg(struct drbd_conf *mdev,
467
union drbd_state os, union drbd_state ns)
468
{
469
return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
470
((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
471
(os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
472
(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
473
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
474
(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
475
(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
476
}
477
478
enum drbd_state_rv
479
drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
480
union drbd_state mask, union drbd_state val)
481
{
482
unsigned long flags;
483
union drbd_state os, ns;
484
enum drbd_state_rv rv;
485
486
spin_lock_irqsave(&mdev->req_lock, flags);
487
os = mdev->state;
488
ns.i = (os.i & ~mask.i) | val.i;
489
rv = _drbd_set_state(mdev, ns, f, NULL);
490
ns = mdev->state;
491
spin_unlock_irqrestore(&mdev->req_lock, flags);
492
493
return rv;
494
}
495
496
/**
497
* drbd_force_state() - Impose a change which happens outside our control on our state
498
* @mdev: DRBD device.
499
* @mask: mask of state bits to change.
500
* @val: value of new state bits.
501
*/
502
void drbd_force_state(struct drbd_conf *mdev,
503
union drbd_state mask, union drbd_state val)
504
{
505
drbd_change_state(mdev, CS_HARD, mask, val);
506
}
507
508
static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
509
static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
510
union drbd_state,
511
union drbd_state);
512
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
513
union drbd_state ns, const char **warn_sync_abort);
514
int drbd_send_state_req(struct drbd_conf *,
515
union drbd_state, union drbd_state);
516
517
static enum drbd_state_rv
518
_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
519
union drbd_state val)
520
{
521
union drbd_state os, ns;
522
unsigned long flags;
523
enum drbd_state_rv rv;
524
525
if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
526
return SS_CW_SUCCESS;
527
528
if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
529
return SS_CW_FAILED_BY_PEER;
530
531
rv = 0;
532
spin_lock_irqsave(&mdev->req_lock, flags);
533
os = mdev->state;
534
ns.i = (os.i & ~mask.i) | val.i;
535
ns = sanitize_state(mdev, os, ns, NULL);
536
537
if (!cl_wide_st_chg(mdev, os, ns))
538
rv = SS_CW_NO_NEED;
539
if (!rv) {
540
rv = is_valid_state(mdev, ns);
541
if (rv == SS_SUCCESS) {
542
rv = is_valid_state_transition(mdev, ns, os);
543
if (rv == SS_SUCCESS)
544
rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
545
}
546
}
547
spin_unlock_irqrestore(&mdev->req_lock, flags);
548
549
return rv;
550
}
551
552
/**
553
* drbd_req_state() - Perform an eventually cluster wide state change
554
* @mdev: DRBD device.
555
* @mask: mask of state bits to change.
556
* @val: value of new state bits.
557
* @f: flags
558
*
559
* Should not be called directly, use drbd_request_state() or
560
* _drbd_request_state().
561
*/
562
static enum drbd_state_rv
563
drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
564
union drbd_state val, enum chg_state_flags f)
565
{
566
struct completion done;
567
unsigned long flags;
568
union drbd_state os, ns;
569
enum drbd_state_rv rv;
570
571
init_completion(&done);
572
573
if (f & CS_SERIALIZE)
574
mutex_lock(&mdev->state_mutex);
575
576
spin_lock_irqsave(&mdev->req_lock, flags);
577
os = mdev->state;
578
ns.i = (os.i & ~mask.i) | val.i;
579
ns = sanitize_state(mdev, os, ns, NULL);
580
581
if (cl_wide_st_chg(mdev, os, ns)) {
582
rv = is_valid_state(mdev, ns);
583
if (rv == SS_SUCCESS)
584
rv = is_valid_state_transition(mdev, ns, os);
585
spin_unlock_irqrestore(&mdev->req_lock, flags);
586
587
if (rv < SS_SUCCESS) {
588
if (f & CS_VERBOSE)
589
print_st_err(mdev, os, ns, rv);
590
goto abort;
591
}
592
593
drbd_state_lock(mdev);
594
if (!drbd_send_state_req(mdev, mask, val)) {
595
drbd_state_unlock(mdev);
596
rv = SS_CW_FAILED_BY_PEER;
597
if (f & CS_VERBOSE)
598
print_st_err(mdev, os, ns, rv);
599
goto abort;
600
}
601
602
wait_event(mdev->state_wait,
603
(rv = _req_st_cond(mdev, mask, val)));
604
605
if (rv < SS_SUCCESS) {
606
drbd_state_unlock(mdev);
607
if (f & CS_VERBOSE)
608
print_st_err(mdev, os, ns, rv);
609
goto abort;
610
}
611
spin_lock_irqsave(&mdev->req_lock, flags);
612
os = mdev->state;
613
ns.i = (os.i & ~mask.i) | val.i;
614
rv = _drbd_set_state(mdev, ns, f, &done);
615
drbd_state_unlock(mdev);
616
} else {
617
rv = _drbd_set_state(mdev, ns, f, &done);
618
}
619
620
spin_unlock_irqrestore(&mdev->req_lock, flags);
621
622
if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
623
D_ASSERT(current != mdev->worker.task);
624
wait_for_completion(&done);
625
}
626
627
abort:
628
if (f & CS_SERIALIZE)
629
mutex_unlock(&mdev->state_mutex);
630
631
return rv;
632
}
633
634
/**
635
* _drbd_request_state() - Request a state change (with flags)
636
* @mdev: DRBD device.
637
* @mask: mask of state bits to change.
638
* @val: value of new state bits.
639
* @f: flags
640
*
641
* Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
642
* flag, or when logging of failed state change requests is not desired.
643
*/
644
enum drbd_state_rv
645
_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
646
union drbd_state val, enum chg_state_flags f)
647
{
648
enum drbd_state_rv rv;
649
650
wait_event(mdev->state_wait,
651
(rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
652
653
return rv;
654
}
655
656
static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
657
{
658
dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
659
name,
660
drbd_conn_str(ns.conn),
661
drbd_role_str(ns.role),
662
drbd_role_str(ns.peer),
663
drbd_disk_str(ns.disk),
664
drbd_disk_str(ns.pdsk),
665
is_susp(ns) ? 's' : 'r',
666
ns.aftr_isp ? 'a' : '-',
667
ns.peer_isp ? 'p' : '-',
668
ns.user_isp ? 'u' : '-'
669
);
670
}
671
672
void print_st_err(struct drbd_conf *mdev, union drbd_state os,
673
union drbd_state ns, enum drbd_state_rv err)
674
{
675
if (err == SS_IN_TRANSIENT_STATE)
676
return;
677
dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
678
print_st(mdev, " state", os);
679
print_st(mdev, "wanted", ns);
680
}
681
682
683
/**
684
* is_valid_state() - Returns an SS_ error code if ns is not valid
685
* @mdev: DRBD device.
686
* @ns: State to consider.
687
*/
688
static enum drbd_state_rv
689
is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
690
{
691
/* See drbd_state_sw_errors in drbd_strings.c */
692
693
enum drbd_fencing_p fp;
694
enum drbd_state_rv rv = SS_SUCCESS;
695
696
fp = FP_DONT_CARE;
697
if (get_ldev(mdev)) {
698
fp = mdev->ldev->dc.fencing;
699
put_ldev(mdev);
700
}
701
702
if (get_net_conf(mdev)) {
703
if (!mdev->net_conf->two_primaries &&
704
ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
705
rv = SS_TWO_PRIMARIES;
706
put_net_conf(mdev);
707
}
708
709
if (rv <= 0)
710
/* already found a reason to abort */;
711
else if (ns.role == R_SECONDARY && mdev->open_cnt)
712
rv = SS_DEVICE_IN_USE;
713
714
else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
715
rv = SS_NO_UP_TO_DATE_DISK;
716
717
else if (fp >= FP_RESOURCE &&
718
ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
719
rv = SS_PRIMARY_NOP;
720
721
else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
722
rv = SS_NO_UP_TO_DATE_DISK;
723
724
else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
725
rv = SS_NO_LOCAL_DISK;
726
727
else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
728
rv = SS_NO_REMOTE_DISK;
729
730
else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
731
rv = SS_NO_UP_TO_DATE_DISK;
732
733
else if ((ns.conn == C_CONNECTED ||
734
ns.conn == C_WF_BITMAP_S ||
735
ns.conn == C_SYNC_SOURCE ||
736
ns.conn == C_PAUSED_SYNC_S) &&
737
ns.disk == D_OUTDATED)
738
rv = SS_CONNECTED_OUTDATES;
739
740
else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
741
(mdev->sync_conf.verify_alg[0] == 0))
742
rv = SS_NO_VERIFY_ALG;
743
744
else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
745
mdev->agreed_pro_version < 88)
746
rv = SS_NOT_SUPPORTED;
747
748
else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
749
rv = SS_CONNECTED_OUTDATES;
750
751
return rv;
752
}
753
754
/**
755
* is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
756
* @mdev: DRBD device.
757
* @ns: new state.
758
* @os: old state.
759
*/
760
static enum drbd_state_rv
761
is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
762
union drbd_state os)
763
{
764
enum drbd_state_rv rv = SS_SUCCESS;
765
766
if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
767
os.conn > C_CONNECTED)
768
rv = SS_RESYNC_RUNNING;
769
770
if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
771
rv = SS_ALREADY_STANDALONE;
772
773
if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
774
rv = SS_IS_DISKLESS;
775
776
if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
777
rv = SS_NO_NET_CONFIG;
778
779
if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
780
rv = SS_LOWER_THAN_OUTDATED;
781
782
if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
783
rv = SS_IN_TRANSIENT_STATE;
784
785
if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
786
rv = SS_IN_TRANSIENT_STATE;
787
788
if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
789
rv = SS_NEED_CONNECTION;
790
791
if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
792
ns.conn != os.conn && os.conn > C_CONNECTED)
793
rv = SS_RESYNC_RUNNING;
794
795
if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
796
os.conn < C_CONNECTED)
797
rv = SS_NEED_CONNECTION;
798
799
if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
800
&& os.conn < C_WF_REPORT_PARAMS)
801
rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
802
803
return rv;
804
}
805
806
/**
807
* sanitize_state() - Resolves implicitly necessary additional changes to a state transition
808
* @mdev: DRBD device.
809
* @os: old state.
810
* @ns: new state.
811
* @warn_sync_abort:
812
*
813
* When we loose connection, we have to set the state of the peers disk (pdsk)
814
* to D_UNKNOWN. This rule and many more along those lines are in this function.
815
*/
816
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
817
union drbd_state ns, const char **warn_sync_abort)
818
{
819
enum drbd_fencing_p fp;
820
enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
821
822
fp = FP_DONT_CARE;
823
if (get_ldev(mdev)) {
824
fp = mdev->ldev->dc.fencing;
825
put_ldev(mdev);
826
}
827
828
/* Disallow Network errors to configure a device's network part */
829
if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
830
os.conn <= C_DISCONNECTING)
831
ns.conn = os.conn;
832
833
/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
834
* If you try to go into some Sync* state, that shall fail (elsewhere). */
835
if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
836
ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
837
ns.conn = os.conn;
838
839
/* we cannot fail (again) if we already detached */
840
if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
841
ns.disk = D_DISKLESS;
842
843
/* if we are only D_ATTACHING yet,
844
* we can (and should) go directly to D_DISKLESS. */
845
if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
846
ns.disk = D_DISKLESS;
847
848
/* After C_DISCONNECTING only C_STANDALONE may follow */
849
if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
850
ns.conn = os.conn;
851
852
if (ns.conn < C_CONNECTED) {
853
ns.peer_isp = 0;
854
ns.peer = R_UNKNOWN;
855
if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
856
ns.pdsk = D_UNKNOWN;
857
}
858
859
/* Clear the aftr_isp when becoming unconfigured */
860
if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
861
ns.aftr_isp = 0;
862
863
/* Abort resync if a disk fails/detaches */
864
if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
865
(ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
866
if (warn_sync_abort)
867
*warn_sync_abort =
868
os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
869
"Online-verify" : "Resync";
870
ns.conn = C_CONNECTED;
871
}
872
873
/* Connection breaks down before we finished "Negotiating" */
874
if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
875
get_ldev_if_state(mdev, D_NEGOTIATING)) {
876
if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
877
ns.disk = mdev->new_state_tmp.disk;
878
ns.pdsk = mdev->new_state_tmp.pdsk;
879
} else {
880
dev_alert(DEV, "Connection lost while negotiating, no data!\n");
881
ns.disk = D_DISKLESS;
882
ns.pdsk = D_UNKNOWN;
883
}
884
put_ldev(mdev);
885
}
886
887
/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
888
if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
889
if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
890
ns.disk = D_UP_TO_DATE;
891
if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
892
ns.pdsk = D_UP_TO_DATE;
893
}
894
895
/* Implications of the connection stat on the disk states */
896
disk_min = D_DISKLESS;
897
disk_max = D_UP_TO_DATE;
898
pdsk_min = D_INCONSISTENT;
899
pdsk_max = D_UNKNOWN;
900
switch ((enum drbd_conns)ns.conn) {
901
case C_WF_BITMAP_T:
902
case C_PAUSED_SYNC_T:
903
case C_STARTING_SYNC_T:
904
case C_WF_SYNC_UUID:
905
case C_BEHIND:
906
disk_min = D_INCONSISTENT;
907
disk_max = D_OUTDATED;
908
pdsk_min = D_UP_TO_DATE;
909
pdsk_max = D_UP_TO_DATE;
910
break;
911
case C_VERIFY_S:
912
case C_VERIFY_T:
913
disk_min = D_UP_TO_DATE;
914
disk_max = D_UP_TO_DATE;
915
pdsk_min = D_UP_TO_DATE;
916
pdsk_max = D_UP_TO_DATE;
917
break;
918
case C_CONNECTED:
919
disk_min = D_DISKLESS;
920
disk_max = D_UP_TO_DATE;
921
pdsk_min = D_DISKLESS;
922
pdsk_max = D_UP_TO_DATE;
923
break;
924
case C_WF_BITMAP_S:
925
case C_PAUSED_SYNC_S:
926
case C_STARTING_SYNC_S:
927
case C_AHEAD:
928
disk_min = D_UP_TO_DATE;
929
disk_max = D_UP_TO_DATE;
930
pdsk_min = D_INCONSISTENT;
931
pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
932
break;
933
case C_SYNC_TARGET:
934
disk_min = D_INCONSISTENT;
935
disk_max = D_INCONSISTENT;
936
pdsk_min = D_UP_TO_DATE;
937
pdsk_max = D_UP_TO_DATE;
938
break;
939
case C_SYNC_SOURCE:
940
disk_min = D_UP_TO_DATE;
941
disk_max = D_UP_TO_DATE;
942
pdsk_min = D_INCONSISTENT;
943
pdsk_max = D_INCONSISTENT;
944
break;
945
case C_STANDALONE:
946
case C_DISCONNECTING:
947
case C_UNCONNECTED:
948
case C_TIMEOUT:
949
case C_BROKEN_PIPE:
950
case C_NETWORK_FAILURE:
951
case C_PROTOCOL_ERROR:
952
case C_TEAR_DOWN:
953
case C_WF_CONNECTION:
954
case C_WF_REPORT_PARAMS:
955
case C_MASK:
956
break;
957
}
958
if (ns.disk > disk_max)
959
ns.disk = disk_max;
960
961
if (ns.disk < disk_min) {
962
dev_warn(DEV, "Implicitly set disk from %s to %s\n",
963
drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
964
ns.disk = disk_min;
965
}
966
if (ns.pdsk > pdsk_max)
967
ns.pdsk = pdsk_max;
968
969
if (ns.pdsk < pdsk_min) {
970
dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
971
drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
972
ns.pdsk = pdsk_min;
973
}
974
975
if (fp == FP_STONITH &&
976
(ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
977
!(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
978
ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
979
980
if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
981
(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
982
!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
983
ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
984
985
if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
986
if (ns.conn == C_SYNC_SOURCE)
987
ns.conn = C_PAUSED_SYNC_S;
988
if (ns.conn == C_SYNC_TARGET)
989
ns.conn = C_PAUSED_SYNC_T;
990
} else {
991
if (ns.conn == C_PAUSED_SYNC_S)
992
ns.conn = C_SYNC_SOURCE;
993
if (ns.conn == C_PAUSED_SYNC_T)
994
ns.conn = C_SYNC_TARGET;
995
}
996
997
return ns;
998
}
999
1000
/* helper for __drbd_set_state */
1001
static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
1002
{
1003
if (mdev->agreed_pro_version < 90)
1004
mdev->ov_start_sector = 0;
1005
mdev->rs_total = drbd_bm_bits(mdev);
1006
mdev->ov_position = 0;
1007
if (cs == C_VERIFY_T) {
1008
/* starting online verify from an arbitrary position
1009
* does not fit well into the existing protocol.
1010
* on C_VERIFY_T, we initialize ov_left and friends
1011
* implicitly in receive_DataRequest once the
1012
* first P_OV_REQUEST is received */
1013
mdev->ov_start_sector = ~(sector_t)0;
1014
} else {
1015
unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
1016
if (bit >= mdev->rs_total) {
1017
mdev->ov_start_sector =
1018
BM_BIT_TO_SECT(mdev->rs_total - 1);
1019
mdev->rs_total = 1;
1020
} else
1021
mdev->rs_total -= bit;
1022
mdev->ov_position = mdev->ov_start_sector;
1023
}
1024
mdev->ov_left = mdev->rs_total;
1025
}
1026
1027
static void drbd_resume_al(struct drbd_conf *mdev)
1028
{
1029
if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
1030
dev_info(DEV, "Resumed AL updates\n");
1031
}
1032
1033
/**
1034
* __drbd_set_state() - Set a new DRBD state
1035
* @mdev: DRBD device.
1036
* @ns: new state.
1037
* @flags: Flags
1038
* @done: Optional completion, that will get completed after the after_state_ch() finished
1039
*
1040
* Caller needs to hold req_lock, and global_state_lock. Do not call directly.
1041
*/
1042
enum drbd_state_rv
1043
__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1044
enum chg_state_flags flags, struct completion *done)
1045
{
1046
union drbd_state os;
1047
enum drbd_state_rv rv = SS_SUCCESS;
1048
const char *warn_sync_abort = NULL;
1049
struct after_state_chg_work *ascw;
1050
1051
os = mdev->state;
1052
1053
ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
1054
1055
if (ns.i == os.i)
1056
return SS_NOTHING_TO_DO;
1057
1058
if (!(flags & CS_HARD)) {
1059
/* pre-state-change checks ; only look at ns */
1060
/* See drbd_state_sw_errors in drbd_strings.c */
1061
1062
rv = is_valid_state(mdev, ns);
1063
if (rv < SS_SUCCESS) {
1064
/* If the old state was illegal as well, then let
1065
this happen...*/
1066
1067
if (is_valid_state(mdev, os) == rv)
1068
rv = is_valid_state_transition(mdev, ns, os);
1069
} else
1070
rv = is_valid_state_transition(mdev, ns, os);
1071
}
1072
1073
if (rv < SS_SUCCESS) {
1074
if (flags & CS_VERBOSE)
1075
print_st_err(mdev, os, ns, rv);
1076
return rv;
1077
}
1078
1079
if (warn_sync_abort)
1080
dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
1081
1082
{
1083
char *pbp, pb[300];
1084
pbp = pb;
1085
*pbp = 0;
1086
if (ns.role != os.role)
1087
pbp += sprintf(pbp, "role( %s -> %s ) ",
1088
drbd_role_str(os.role),
1089
drbd_role_str(ns.role));
1090
if (ns.peer != os.peer)
1091
pbp += sprintf(pbp, "peer( %s -> %s ) ",
1092
drbd_role_str(os.peer),
1093
drbd_role_str(ns.peer));
1094
if (ns.conn != os.conn)
1095
pbp += sprintf(pbp, "conn( %s -> %s ) ",
1096
drbd_conn_str(os.conn),
1097
drbd_conn_str(ns.conn));
1098
if (ns.disk != os.disk)
1099
pbp += sprintf(pbp, "disk( %s -> %s ) ",
1100
drbd_disk_str(os.disk),
1101
drbd_disk_str(ns.disk));
1102
if (ns.pdsk != os.pdsk)
1103
pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
1104
drbd_disk_str(os.pdsk),
1105
drbd_disk_str(ns.pdsk));
1106
if (is_susp(ns) != is_susp(os))
1107
pbp += sprintf(pbp, "susp( %d -> %d ) ",
1108
is_susp(os),
1109
is_susp(ns));
1110
if (ns.aftr_isp != os.aftr_isp)
1111
pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
1112
os.aftr_isp,
1113
ns.aftr_isp);
1114
if (ns.peer_isp != os.peer_isp)
1115
pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
1116
os.peer_isp,
1117
ns.peer_isp);
1118
if (ns.user_isp != os.user_isp)
1119
pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
1120
os.user_isp,
1121
ns.user_isp);
1122
dev_info(DEV, "%s\n", pb);
1123
}
1124
1125
/* solve the race between becoming unconfigured,
1126
* worker doing the cleanup, and
1127
* admin reconfiguring us:
1128
* on (re)configure, first set CONFIG_PENDING,
1129
* then wait for a potentially exiting worker,
1130
* start the worker, and schedule one no_op.
1131
* then proceed with configuration.
1132
*/
1133
if (ns.disk == D_DISKLESS &&
1134
ns.conn == C_STANDALONE &&
1135
ns.role == R_SECONDARY &&
1136
!test_and_set_bit(CONFIG_PENDING, &mdev->flags))
1137
set_bit(DEVICE_DYING, &mdev->flags);
1138
1139
/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
1140
* on the ldev here, to be sure the transition -> D_DISKLESS resp.
1141
* drbd_ldev_destroy() won't happen before our corresponding
1142
* after_state_ch works run, where we put_ldev again. */
1143
if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
1144
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1145
atomic_inc(&mdev->local_cnt);
1146
1147
mdev->state = ns;
1148
1149
if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1150
drbd_print_uuids(mdev, "attached to UUIDs");
1151
1152
wake_up(&mdev->misc_wait);
1153
wake_up(&mdev->state_wait);
1154
1155
/* aborted verify run. log the last position */
1156
if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1157
ns.conn < C_CONNECTED) {
1158
mdev->ov_start_sector =
1159
BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
1160
dev_info(DEV, "Online Verify reached sector %llu\n",
1161
(unsigned long long)mdev->ov_start_sector);
1162
}
1163
1164
if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
1165
(ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
1166
dev_info(DEV, "Syncer continues.\n");
1167
mdev->rs_paused += (long)jiffies
1168
-(long)mdev->rs_mark_time[mdev->rs_last_mark];
1169
if (ns.conn == C_SYNC_TARGET)
1170
mod_timer(&mdev->resync_timer, jiffies);
1171
}
1172
1173
if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
1174
(ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
1175
dev_info(DEV, "Resync suspended\n");
1176
mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
1177
}
1178
1179
if (os.conn == C_CONNECTED &&
1180
(ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
1181
unsigned long now = jiffies;
1182
int i;
1183
1184
set_ov_position(mdev, ns.conn);
1185
mdev->rs_start = now;
1186
mdev->rs_last_events = 0;
1187
mdev->rs_last_sect_ev = 0;
1188
mdev->ov_last_oos_size = 0;
1189
mdev->ov_last_oos_start = 0;
1190
1191
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1192
mdev->rs_mark_left[i] = mdev->ov_left;
1193
mdev->rs_mark_time[i] = now;
1194
}
1195
1196
drbd_rs_controller_reset(mdev);
1197
1198
if (ns.conn == C_VERIFY_S) {
1199
dev_info(DEV, "Starting Online Verify from sector %llu\n",
1200
(unsigned long long)mdev->ov_position);
1201
mod_timer(&mdev->resync_timer, jiffies);
1202
}
1203
}
1204
1205
if (get_ldev(mdev)) {
1206
u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
1207
MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
1208
MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
1209
1210
if (test_bit(CRASHED_PRIMARY, &mdev->flags))
1211
mdf |= MDF_CRASHED_PRIMARY;
1212
if (mdev->state.role == R_PRIMARY ||
1213
(mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
1214
mdf |= MDF_PRIMARY_IND;
1215
if (mdev->state.conn > C_WF_REPORT_PARAMS)
1216
mdf |= MDF_CONNECTED_IND;
1217
if (mdev->state.disk > D_INCONSISTENT)
1218
mdf |= MDF_CONSISTENT;
1219
if (mdev->state.disk > D_OUTDATED)
1220
mdf |= MDF_WAS_UP_TO_DATE;
1221
if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
1222
mdf |= MDF_PEER_OUT_DATED;
1223
if (mdf != mdev->ldev->md.flags) {
1224
mdev->ldev->md.flags = mdf;
1225
drbd_md_mark_dirty(mdev);
1226
}
1227
if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
1228
drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
1229
put_ldev(mdev);
1230
}
1231
1232
/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1233
if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
1234
os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
1235
set_bit(CONSIDER_RESYNC, &mdev->flags);
1236
1237
/* Receiver should clean up itself */
1238
if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1239
drbd_thread_stop_nowait(&mdev->receiver);
1240
1241
/* Now the receiver finished cleaning up itself, it should die */
1242
if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1243
drbd_thread_stop_nowait(&mdev->receiver);
1244
1245
/* Upon network failure, we need to restart the receiver. */
1246
if (os.conn > C_TEAR_DOWN &&
1247
ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1248
drbd_thread_restart_nowait(&mdev->receiver);
1249
1250
/* Resume AL writing if we get a connection */
1251
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1252
drbd_resume_al(mdev);
1253
1254
ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1255
if (ascw) {
1256
ascw->os = os;
1257
ascw->ns = ns;
1258
ascw->flags = flags;
1259
ascw->w.cb = w_after_state_ch;
1260
ascw->done = done;
1261
drbd_queue_work(&mdev->data.work, &ascw->w);
1262
} else {
1263
dev_warn(DEV, "Could not kmalloc an ascw\n");
1264
}
1265
1266
return rv;
1267
}
1268
1269
static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1270
{
1271
struct after_state_chg_work *ascw =
1272
container_of(w, struct after_state_chg_work, w);
1273
after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
1274
if (ascw->flags & CS_WAIT_COMPLETE) {
1275
D_ASSERT(ascw->done != NULL);
1276
complete(ascw->done);
1277
}
1278
kfree(ascw);
1279
1280
return 1;
1281
}
1282
1283
static void abw_start_sync(struct drbd_conf *mdev, int rv)
1284
{
1285
if (rv) {
1286
dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
1287
_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
1288
return;
1289
}
1290
1291
switch (mdev->state.conn) {
1292
case C_STARTING_SYNC_T:
1293
_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
1294
break;
1295
case C_STARTING_SYNC_S:
1296
drbd_start_resync(mdev, C_SYNC_SOURCE);
1297
break;
1298
}
1299
}
1300
1301
int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1302
int (*io_fn)(struct drbd_conf *),
1303
char *why, enum bm_flag flags)
1304
{
1305
int rv;
1306
1307
D_ASSERT(current == mdev->worker.task);
1308
1309
/* open coded non-blocking drbd_suspend_io(mdev); */
1310
set_bit(SUSPEND_IO, &mdev->flags);
1311
1312
drbd_bm_lock(mdev, why, flags);
1313
rv = io_fn(mdev);
1314
drbd_bm_unlock(mdev);
1315
1316
drbd_resume_io(mdev);
1317
1318
return rv;
1319
}
1320
1321
/**
1322
* after_state_ch() - Perform after state change actions that may sleep
1323
* @mdev: DRBD device.
1324
* @os: old state.
1325
* @ns: new state.
1326
* @flags: Flags
1327
*/
1328
static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1329
union drbd_state ns, enum chg_state_flags flags)
1330
{
1331
enum drbd_fencing_p fp;
1332
enum drbd_req_event what = nothing;
1333
union drbd_state nsm = (union drbd_state){ .i = -1 };
1334
1335
if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
1336
clear_bit(CRASHED_PRIMARY, &mdev->flags);
1337
if (mdev->p_uuid)
1338
mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
1339
}
1340
1341
fp = FP_DONT_CARE;
1342
if (get_ldev(mdev)) {
1343
fp = mdev->ldev->dc.fencing;
1344
put_ldev(mdev);
1345
}
1346
1347
/* Inform userspace about the change... */
1348
drbd_bcast_state(mdev, ns);
1349
1350
if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1351
(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1352
drbd_khelper(mdev, "pri-on-incon-degr");
1353
1354
/* Here we have the actions that are performed after a
1355
state change. This function might sleep */
1356
1357
nsm.i = -1;
1358
if (ns.susp_nod) {
1359
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1360
what = resend;
1361
1362
if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
1363
what = restart_frozen_disk_io;
1364
1365
if (what != nothing)
1366
nsm.susp_nod = 0;
1367
}
1368
1369
if (ns.susp_fen) {
1370
/* case1: The outdate peer handler is successful: */
1371
if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
1372
tl_clear(mdev);
1373
if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1374
drbd_uuid_new_current(mdev);
1375
clear_bit(NEW_CUR_UUID, &mdev->flags);
1376
}
1377
spin_lock_irq(&mdev->req_lock);
1378
_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
1379
spin_unlock_irq(&mdev->req_lock);
1380
}
1381
/* case2: The connection was established again: */
1382
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1383
clear_bit(NEW_CUR_UUID, &mdev->flags);
1384
what = resend;
1385
nsm.susp_fen = 0;
1386
}
1387
}
1388
1389
if (what != nothing) {
1390
spin_lock_irq(&mdev->req_lock);
1391
_tl_restart(mdev, what);
1392
nsm.i &= mdev->state.i;
1393
_drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
1394
spin_unlock_irq(&mdev->req_lock);
1395
}
1396
1397
/* Became sync source. With protocol >= 96, we still need to send out
1398
* the sync uuid now. Need to do that before any drbd_send_state, or
1399
* the other side may go "paused sync" before receiving the sync uuids,
1400
* which is unexpected. */
1401
if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1402
(ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1403
mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
1404
drbd_gen_and_send_sync_uuid(mdev);
1405
put_ldev(mdev);
1406
}
1407
1408
/* Do not change the order of the if above and the two below... */
1409
if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1410
drbd_send_uuids(mdev);
1411
drbd_send_state(mdev);
1412
}
1413
/* No point in queuing send_bitmap if we don't have a connection
1414
* anymore, so check also the _current_ state, not only the new state
1415
* at the time this work was queued. */
1416
if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1417
mdev->state.conn == C_WF_BITMAP_S)
1418
drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
1419
"send_bitmap (WFBitMapS)",
1420
BM_LOCKED_TEST_ALLOWED);
1421
1422
/* Lost contact to peer's copy of the data */
1423
if ((os.pdsk >= D_INCONSISTENT &&
1424
os.pdsk != D_UNKNOWN &&
1425
os.pdsk != D_OUTDATED)
1426
&& (ns.pdsk < D_INCONSISTENT ||
1427
ns.pdsk == D_UNKNOWN ||
1428
ns.pdsk == D_OUTDATED)) {
1429
if (get_ldev(mdev)) {
1430
if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1431
mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1432
if (is_susp(mdev->state)) {
1433
set_bit(NEW_CUR_UUID, &mdev->flags);
1434
} else {
1435
drbd_uuid_new_current(mdev);
1436
drbd_send_uuids(mdev);
1437
}
1438
}
1439
put_ldev(mdev);
1440
}
1441
}
1442
1443
if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1444
if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1445
drbd_uuid_new_current(mdev);
1446
drbd_send_uuids(mdev);
1447
}
1448
1449
/* D_DISKLESS Peer becomes secondary */
1450
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1451
/* We may still be Primary ourselves.
1452
* No harm done if the bitmap still changes,
1453
* redirtied pages will follow later. */
1454
drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1455
"demote diskless peer", BM_LOCKED_SET_ALLOWED);
1456
put_ldev(mdev);
1457
}
1458
1459
/* Write out all changed bits on demote.
1460
* Though, no need to da that just yet
1461
* if there is a resync going on still */
1462
if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1463
mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
1464
/* No changes to the bitmap expected this time, so assert that,
1465
* even though no harm was done if it did change. */
1466
drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1467
"demote", BM_LOCKED_TEST_ALLOWED);
1468
put_ldev(mdev);
1469
}
1470
1471
/* Last part of the attaching process ... */
1472
if (ns.conn >= C_CONNECTED &&
1473
os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1474
drbd_send_sizes(mdev, 0, 0); /* to start sync... */
1475
drbd_send_uuids(mdev);
1476
drbd_send_state(mdev);
1477
}
1478
1479
/* We want to pause/continue resync, tell peer. */
1480
if (ns.conn >= C_CONNECTED &&
1481
((os.aftr_isp != ns.aftr_isp) ||
1482
(os.user_isp != ns.user_isp)))
1483
drbd_send_state(mdev);
1484
1485
/* In case one of the isp bits got set, suspend other devices. */
1486
if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1487
(ns.aftr_isp || ns.peer_isp || ns.user_isp))
1488
suspend_other_sg(mdev);
1489
1490
/* Make sure the peer gets informed about eventual state
1491
changes (ISP bits) while we were in WFReportParams. */
1492
if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1493
drbd_send_state(mdev);
1494
1495
if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1496
drbd_send_state(mdev);
1497
1498
/* We are in the progress to start a full sync... */
1499
if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1500
(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1501
/* no other bitmap changes expected during this phase */
1502
drbd_queue_bitmap_io(mdev,
1503
&drbd_bmio_set_n_write, &abw_start_sync,
1504
"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
1505
1506
/* We are invalidating our self... */
1507
if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1508
os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1509
/* other bitmap operation expected during this phase */
1510
drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1511
"set_n_write from invalidate", BM_LOCKED_MASK);
1512
1513
/* first half of local IO error, failure to attach,
1514
* or administrative detach */
1515
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1516
enum drbd_io_error_p eh;
1517
int was_io_error;
1518
/* corresponding get_ldev was in __drbd_set_state, to serialize
1519
* our cleanup here with the transition to D_DISKLESS,
1520
* so it is safe to dreference ldev here. */
1521
eh = mdev->ldev->dc.on_io_error;
1522
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
1523
1524
/* current state still has to be D_FAILED,
1525
* there is only one way out: to D_DISKLESS,
1526
* and that may only happen after our put_ldev below. */
1527
if (mdev->state.disk != D_FAILED)
1528
dev_err(DEV,
1529
"ASSERT FAILED: disk is %s during detach\n",
1530
drbd_disk_str(mdev->state.disk));
1531
1532
if (drbd_send_state(mdev))
1533
dev_warn(DEV, "Notified peer that I am detaching my disk\n");
1534
else
1535
dev_err(DEV, "Sending state for detaching disk failed\n");
1536
1537
drbd_rs_cancel_all(mdev);
1538
1539
/* In case we want to get something to stable storage still,
1540
* this may be the last chance.
1541
* Following put_ldev may transition to D_DISKLESS. */
1542
drbd_md_sync(mdev);
1543
put_ldev(mdev);
1544
1545
if (was_io_error && eh == EP_CALL_HELPER)
1546
drbd_khelper(mdev, "local-io-error");
1547
}
1548
1549
/* second half of local IO error, failure to attach,
1550
* or administrative detach,
1551
* after local_cnt references have reached zero again */
1552
if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1553
/* We must still be diskless,
1554
* re-attach has to be serialized with this! */
1555
if (mdev->state.disk != D_DISKLESS)
1556
dev_err(DEV,
1557
"ASSERT FAILED: disk is %s while going diskless\n",
1558
drbd_disk_str(mdev->state.disk));
1559
1560
mdev->rs_total = 0;
1561
mdev->rs_failed = 0;
1562
atomic_set(&mdev->rs_pending_cnt, 0);
1563
1564
if (drbd_send_state(mdev))
1565
dev_warn(DEV, "Notified peer that I'm now diskless.\n");
1566
/* corresponding get_ldev in __drbd_set_state
1567
* this may finally trigger drbd_ldev_destroy. */
1568
put_ldev(mdev);
1569
}
1570
1571
/* Notify peer that I had a local IO error, and did not detached.. */
1572
if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
1573
drbd_send_state(mdev);
1574
1575
/* Disks got bigger while they were detached */
1576
if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1577
test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
1578
if (ns.conn == C_CONNECTED)
1579
resync_after_online_grow(mdev);
1580
}
1581
1582
/* A resync finished or aborted, wake paused devices... */
1583
if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1584
(os.peer_isp && !ns.peer_isp) ||
1585
(os.user_isp && !ns.user_isp))
1586
resume_next_sg(mdev);
1587
1588
/* sync target done with resync. Explicitly notify peer, even though
1589
* it should (at least for non-empty resyncs) already know itself. */
1590
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1591
drbd_send_state(mdev);
1592
1593
/* This triggers bitmap writeout of potentially still unwritten pages
1594
* if the resync finished cleanly, or aborted because of peer disk
1595
* failure, or because of connection loss.
1596
* For resync aborted because of local disk failure, we cannot do
1597
* any bitmap writeout anymore.
1598
* No harm done if some bits change during this phase.
1599
*/
1600
if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1601
drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
1602
"write from resync_finished", BM_LOCKED_SET_ALLOWED);
1603
put_ldev(mdev);
1604
}
1605
1606
/* free tl_hash if we Got thawed and are C_STANDALONE */
1607
if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
1608
drbd_free_tl_hash(mdev);
1609
1610
/* Upon network connection, we need to start the receiver */
1611
if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
1612
drbd_thread_start(&mdev->receiver);
1613
1614
/* Terminate worker thread if we are unconfigured - it will be
1615
restarted as needed... */
1616
if (ns.disk == D_DISKLESS &&
1617
ns.conn == C_STANDALONE &&
1618
ns.role == R_SECONDARY) {
1619
if (os.aftr_isp != ns.aftr_isp)
1620
resume_next_sg(mdev);
1621
/* set in __drbd_set_state, unless CONFIG_PENDING was set */
1622
if (test_bit(DEVICE_DYING, &mdev->flags))
1623
drbd_thread_stop_nowait(&mdev->worker);
1624
}
1625
1626
drbd_md_sync(mdev);
1627
}
1628
1629
1630
static int drbd_thread_setup(void *arg)
1631
{
1632
struct drbd_thread *thi = (struct drbd_thread *) arg;
1633
struct drbd_conf *mdev = thi->mdev;
1634
unsigned long flags;
1635
int retval;
1636
1637
restart:
1638
retval = thi->function(thi);
1639
1640
spin_lock_irqsave(&thi->t_lock, flags);
1641
1642
/* if the receiver has been "Exiting", the last thing it did
1643
* was set the conn state to "StandAlone",
1644
* if now a re-connect request comes in, conn state goes C_UNCONNECTED,
1645
* and receiver thread will be "started".
1646
* drbd_thread_start needs to set "Restarting" in that case.
1647
* t_state check and assignment needs to be within the same spinlock,
1648
* so either thread_start sees Exiting, and can remap to Restarting,
1649
* or thread_start see None, and can proceed as normal.
1650
*/
1651
1652
if (thi->t_state == Restarting) {
1653
dev_info(DEV, "Restarting %s\n", current->comm);
1654
thi->t_state = Running;
1655
spin_unlock_irqrestore(&thi->t_lock, flags);
1656
goto restart;
1657
}
1658
1659
thi->task = NULL;
1660
thi->t_state = None;
1661
smp_mb();
1662
complete(&thi->stop);
1663
spin_unlock_irqrestore(&thi->t_lock, flags);
1664
1665
dev_info(DEV, "Terminating %s\n", current->comm);
1666
1667
/* Release mod reference taken when thread was started */
1668
module_put(THIS_MODULE);
1669
return retval;
1670
}
1671
1672
static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
1673
int (*func) (struct drbd_thread *))
1674
{
1675
spin_lock_init(&thi->t_lock);
1676
thi->task = NULL;
1677
thi->t_state = None;
1678
thi->function = func;
1679
thi->mdev = mdev;
1680
}
1681
1682
int drbd_thread_start(struct drbd_thread *thi)
1683
{
1684
struct drbd_conf *mdev = thi->mdev;
1685
struct task_struct *nt;
1686
unsigned long flags;
1687
1688
const char *me =
1689
thi == &mdev->receiver ? "receiver" :
1690
thi == &mdev->asender ? "asender" :
1691
thi == &mdev->worker ? "worker" : "NONSENSE";
1692
1693
/* is used from state engine doing drbd_thread_stop_nowait,
1694
* while holding the req lock irqsave */
1695
spin_lock_irqsave(&thi->t_lock, flags);
1696
1697
switch (thi->t_state) {
1698
case None:
1699
dev_info(DEV, "Starting %s thread (from %s [%d])\n",
1700
me, current->comm, current->pid);
1701
1702
/* Get ref on module for thread - this is released when thread exits */
1703
if (!try_module_get(THIS_MODULE)) {
1704
dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
1705
spin_unlock_irqrestore(&thi->t_lock, flags);
1706
return false;
1707
}
1708
1709
init_completion(&thi->stop);
1710
D_ASSERT(thi->task == NULL);
1711
thi->reset_cpu_mask = 1;
1712
thi->t_state = Running;
1713
spin_unlock_irqrestore(&thi->t_lock, flags);
1714
flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
1715
1716
nt = kthread_create(drbd_thread_setup, (void *) thi,
1717
"drbd%d_%s", mdev_to_minor(mdev), me);
1718
1719
if (IS_ERR(nt)) {
1720
dev_err(DEV, "Couldn't start thread\n");
1721
1722
module_put(THIS_MODULE);
1723
return false;
1724
}
1725
spin_lock_irqsave(&thi->t_lock, flags);
1726
thi->task = nt;
1727
thi->t_state = Running;
1728
spin_unlock_irqrestore(&thi->t_lock, flags);
1729
wake_up_process(nt);
1730
break;
1731
case Exiting:
1732
thi->t_state = Restarting;
1733
dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
1734
me, current->comm, current->pid);
1735
/* fall through */
1736
case Running:
1737
case Restarting:
1738
default:
1739
spin_unlock_irqrestore(&thi->t_lock, flags);
1740
break;
1741
}
1742
1743
return true;
1744
}
1745
1746
1747
void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
1748
{
1749
unsigned long flags;
1750
1751
enum drbd_thread_state ns = restart ? Restarting : Exiting;
1752
1753
/* may be called from state engine, holding the req lock irqsave */
1754
spin_lock_irqsave(&thi->t_lock, flags);
1755
1756
if (thi->t_state == None) {
1757
spin_unlock_irqrestore(&thi->t_lock, flags);
1758
if (restart)
1759
drbd_thread_start(thi);
1760
return;
1761
}
1762
1763
if (thi->t_state != ns) {
1764
if (thi->task == NULL) {
1765
spin_unlock_irqrestore(&thi->t_lock, flags);
1766
return;
1767
}
1768
1769
thi->t_state = ns;
1770
smp_mb();
1771
init_completion(&thi->stop);
1772
if (thi->task != current)
1773
force_sig(DRBD_SIGKILL, thi->task);
1774
1775
}
1776
1777
spin_unlock_irqrestore(&thi->t_lock, flags);
1778
1779
if (wait)
1780
wait_for_completion(&thi->stop);
1781
}
1782
1783
#ifdef CONFIG_SMP
1784
/**
1785
* drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
1786
* @mdev: DRBD device.
1787
*
1788
* Forces all threads of a device onto the same CPU. This is beneficial for
1789
* DRBD's performance. May be overwritten by user's configuration.
1790
*/
1791
void drbd_calc_cpu_mask(struct drbd_conf *mdev)
1792
{
1793
int ord, cpu;
1794
1795
/* user override. */
1796
if (cpumask_weight(mdev->cpu_mask))
1797
return;
1798
1799
ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
1800
for_each_online_cpu(cpu) {
1801
if (ord-- == 0) {
1802
cpumask_set_cpu(cpu, mdev->cpu_mask);
1803
return;
1804
}
1805
}
1806
/* should not be reached */
1807
cpumask_setall(mdev->cpu_mask);
1808
}
1809
1810
/**
1811
* drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
1812
* @mdev: DRBD device.
1813
*
1814
* call in the "main loop" of _all_ threads, no need for any mutex, current won't die
1815
* prematurely.
1816
*/
1817
void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
1818
{
1819
struct task_struct *p = current;
1820
struct drbd_thread *thi =
1821
p == mdev->asender.task ? &mdev->asender :
1822
p == mdev->receiver.task ? &mdev->receiver :
1823
p == mdev->worker.task ? &mdev->worker :
1824
NULL;
1825
ERR_IF(thi == NULL)
1826
return;
1827
if (!thi->reset_cpu_mask)
1828
return;
1829
thi->reset_cpu_mask = 0;
1830
set_cpus_allowed_ptr(p, mdev->cpu_mask);
1831
}
1832
#endif
1833
1834
/* the appropriate socket mutex must be held already */
1835
int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1836
enum drbd_packets cmd, struct p_header80 *h,
1837
size_t size, unsigned msg_flags)
1838
{
1839
int sent, ok;
1840
1841
ERR_IF(!h) return false;
1842
ERR_IF(!size) return false;
1843
1844
h->magic = BE_DRBD_MAGIC;
1845
h->command = cpu_to_be16(cmd);
1846
h->length = cpu_to_be16(size-sizeof(struct p_header80));
1847
1848
sent = drbd_send(mdev, sock, h, size, msg_flags);
1849
1850
ok = (sent == size);
1851
if (!ok && !signal_pending(current))
1852
dev_warn(DEV, "short sent %s size=%d sent=%d\n",
1853
cmdname(cmd), (int)size, sent);
1854
return ok;
1855
}
1856
1857
/* don't pass the socket. we may only look at it
1858
* when we hold the appropriate socket mutex.
1859
*/
1860
int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
1861
enum drbd_packets cmd, struct p_header80 *h, size_t size)
1862
{
1863
int ok = 0;
1864
struct socket *sock;
1865
1866
if (use_data_socket) {
1867
mutex_lock(&mdev->data.mutex);
1868
sock = mdev->data.socket;
1869
} else {
1870
mutex_lock(&mdev->meta.mutex);
1871
sock = mdev->meta.socket;
1872
}
1873
1874
/* drbd_disconnect() could have called drbd_free_sock()
1875
* while we were waiting in down()... */
1876
if (likely(sock != NULL))
1877
ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
1878
1879
if (use_data_socket)
1880
mutex_unlock(&mdev->data.mutex);
1881
else
1882
mutex_unlock(&mdev->meta.mutex);
1883
return ok;
1884
}
1885
1886
int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
1887
size_t size)
1888
{
1889
struct p_header80 h;
1890
int ok;
1891
1892
h.magic = BE_DRBD_MAGIC;
1893
h.command = cpu_to_be16(cmd);
1894
h.length = cpu_to_be16(size);
1895
1896
if (!drbd_get_data_sock(mdev))
1897
return 0;
1898
1899
ok = (sizeof(h) ==
1900
drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
1901
ok = ok && (size ==
1902
drbd_send(mdev, mdev->data.socket, data, size, 0));
1903
1904
drbd_put_data_sock(mdev);
1905
1906
return ok;
1907
}
1908
1909
int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
1910
{
1911
struct p_rs_param_95 *p;
1912
struct socket *sock;
1913
int size, rv;
1914
const int apv = mdev->agreed_pro_version;
1915
1916
size = apv <= 87 ? sizeof(struct p_rs_param)
1917
: apv == 88 ? sizeof(struct p_rs_param)
1918
+ strlen(mdev->sync_conf.verify_alg) + 1
1919
: apv <= 94 ? sizeof(struct p_rs_param_89)
1920
: /* apv >= 95 */ sizeof(struct p_rs_param_95);
1921
1922
/* used from admin command context and receiver/worker context.
1923
* to avoid kmalloc, grab the socket right here,
1924
* then use the pre-allocated sbuf there */
1925
mutex_lock(&mdev->data.mutex);
1926
sock = mdev->data.socket;
1927
1928
if (likely(sock != NULL)) {
1929
enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
1930
1931
p = &mdev->data.sbuf.rs_param_95;
1932
1933
/* initialize verify_alg and csums_alg */
1934
memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
1935
1936
p->rate = cpu_to_be32(sc->rate);
1937
p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
1938
p->c_delay_target = cpu_to_be32(sc->c_delay_target);
1939
p->c_fill_target = cpu_to_be32(sc->c_fill_target);
1940
p->c_max_rate = cpu_to_be32(sc->c_max_rate);
1941
1942
if (apv >= 88)
1943
strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
1944
if (apv >= 89)
1945
strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
1946
1947
rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
1948
} else
1949
rv = 0; /* not ok */
1950
1951
mutex_unlock(&mdev->data.mutex);
1952
1953
return rv;
1954
}
1955
1956
int drbd_send_protocol(struct drbd_conf *mdev)
1957
{
1958
struct p_protocol *p;
1959
int size, cf, rv;
1960
1961
size = sizeof(struct p_protocol);
1962
1963
if (mdev->agreed_pro_version >= 87)
1964
size += strlen(mdev->net_conf->integrity_alg) + 1;
1965
1966
/* we must not recurse into our own queue,
1967
* as that is blocked during handshake */
1968
p = kmalloc(size, GFP_NOIO);
1969
if (p == NULL)
1970
return 0;
1971
1972
p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
1973
p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
1974
p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
1975
p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
1976
p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
1977
1978
cf = 0;
1979
if (mdev->net_conf->want_lose)
1980
cf |= CF_WANT_LOSE;
1981
if (mdev->net_conf->dry_run) {
1982
if (mdev->agreed_pro_version >= 92)
1983
cf |= CF_DRY_RUN;
1984
else {
1985
dev_err(DEV, "--dry-run is not supported by peer");
1986
kfree(p);
1987
return -1;
1988
}
1989
}
1990
p->conn_flags = cpu_to_be32(cf);
1991
1992
if (mdev->agreed_pro_version >= 87)
1993
strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
1994
1995
rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
1996
(struct p_header80 *)p, size);
1997
kfree(p);
1998
return rv;
1999
}
2000
2001
int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
2002
{
2003
struct p_uuids p;
2004
int i;
2005
2006
if (!get_ldev_if_state(mdev, D_NEGOTIATING))
2007
return 1;
2008
2009
for (i = UI_CURRENT; i < UI_SIZE; i++)
2010
p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
2011
2012
mdev->comm_bm_set = drbd_bm_total_weight(mdev);
2013
p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
2014
uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
2015
uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
2016
uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
2017
p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
2018
2019
put_ldev(mdev);
2020
2021
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
2022
(struct p_header80 *)&p, sizeof(p));
2023
}
2024
2025
int drbd_send_uuids(struct drbd_conf *mdev)
2026
{
2027
return _drbd_send_uuids(mdev, 0);
2028
}
2029
2030
int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
2031
{
2032
return _drbd_send_uuids(mdev, 8);
2033
}
2034
2035
void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
2036
{
2037
if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2038
u64 *uuid = mdev->ldev->md.uuid;
2039
dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
2040
text,
2041
(unsigned long long)uuid[UI_CURRENT],
2042
(unsigned long long)uuid[UI_BITMAP],
2043
(unsigned long long)uuid[UI_HISTORY_START],
2044
(unsigned long long)uuid[UI_HISTORY_END]);
2045
put_ldev(mdev);
2046
} else {
2047
dev_info(DEV, "%s effective data uuid: %016llX\n",
2048
text,
2049
(unsigned long long)mdev->ed_uuid);
2050
}
2051
}
2052
2053
int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
2054
{
2055
struct p_rs_uuid p;
2056
u64 uuid;
2057
2058
D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
2059
2060
uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
2061
drbd_uuid_set(mdev, UI_BITMAP, uuid);
2062
drbd_print_uuids(mdev, "updated sync UUID");
2063
drbd_md_sync(mdev);
2064
p.uuid = cpu_to_be64(uuid);
2065
2066
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
2067
(struct p_header80 *)&p, sizeof(p));
2068
}
2069
2070
int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
2071
{
2072
struct p_sizes p;
2073
sector_t d_size, u_size;
2074
int q_order_type, max_bio_size;
2075
int ok;
2076
2077
if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2078
D_ASSERT(mdev->ldev->backing_bdev);
2079
d_size = drbd_get_max_capacity(mdev->ldev);
2080
u_size = mdev->ldev->dc.disk_size;
2081
q_order_type = drbd_queue_order_type(mdev);
2082
max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
2083
max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
2084
put_ldev(mdev);
2085
} else {
2086
d_size = 0;
2087
u_size = 0;
2088
q_order_type = QUEUE_ORDERED_NONE;
2089
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
2090
}
2091
2092
p.d_size = cpu_to_be64(d_size);
2093
p.u_size = cpu_to_be64(u_size);
2094
p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
2095
p.max_bio_size = cpu_to_be32(max_bio_size);
2096
p.queue_order_type = cpu_to_be16(q_order_type);
2097
p.dds_flags = cpu_to_be16(flags);
2098
2099
ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
2100
(struct p_header80 *)&p, sizeof(p));
2101
return ok;
2102
}
2103
2104
/**
2105
* drbd_send_state() - Sends the drbd state to the peer
2106
* @mdev: DRBD device.
2107
*/
2108
int drbd_send_state(struct drbd_conf *mdev)
2109
{
2110
struct socket *sock;
2111
struct p_state p;
2112
int ok = 0;
2113
2114
/* Grab state lock so we wont send state if we're in the middle
2115
* of a cluster wide state change on another thread */
2116
drbd_state_lock(mdev);
2117
2118
mutex_lock(&mdev->data.mutex);
2119
2120
p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
2121
sock = mdev->data.socket;
2122
2123
if (likely(sock != NULL)) {
2124
ok = _drbd_send_cmd(mdev, sock, P_STATE,
2125
(struct p_header80 *)&p, sizeof(p), 0);
2126
}
2127
2128
mutex_unlock(&mdev->data.mutex);
2129
2130
drbd_state_unlock(mdev);
2131
return ok;
2132
}
2133
2134
int drbd_send_state_req(struct drbd_conf *mdev,
2135
union drbd_state mask, union drbd_state val)
2136
{
2137
struct p_req_state p;
2138
2139
p.mask = cpu_to_be32(mask.i);
2140
p.val = cpu_to_be32(val.i);
2141
2142
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
2143
(struct p_header80 *)&p, sizeof(p));
2144
}
2145
2146
int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
2147
{
2148
struct p_req_state_reply p;
2149
2150
p.retcode = cpu_to_be32(retcode);
2151
2152
return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
2153
(struct p_header80 *)&p, sizeof(p));
2154
}
2155
2156
int fill_bitmap_rle_bits(struct drbd_conf *mdev,
2157
struct p_compressed_bm *p,
2158
struct bm_xfer_ctx *c)
2159
{
2160
struct bitstream bs;
2161
unsigned long plain_bits;
2162
unsigned long tmp;
2163
unsigned long rl;
2164
unsigned len;
2165
unsigned toggle;
2166
int bits;
2167
2168
/* may we use this feature? */
2169
if ((mdev->sync_conf.use_rle == 0) ||
2170
(mdev->agreed_pro_version < 90))
2171
return 0;
2172
2173
if (c->bit_offset >= c->bm_bits)
2174
return 0; /* nothing to do. */
2175
2176
/* use at most thus many bytes */
2177
bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
2178
memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
2179
/* plain bits covered in this code string */
2180
plain_bits = 0;
2181
2182
/* p->encoding & 0x80 stores whether the first run length is set.
2183
* bit offset is implicit.
2184
* start with toggle == 2 to be able to tell the first iteration */
2185
toggle = 2;
2186
2187
/* see how much plain bits we can stuff into one packet
2188
* using RLE and VLI. */
2189
do {
2190
tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
2191
: _drbd_bm_find_next(mdev, c->bit_offset);
2192
if (tmp == -1UL)
2193
tmp = c->bm_bits;
2194
rl = tmp - c->bit_offset;
2195
2196
if (toggle == 2) { /* first iteration */
2197
if (rl == 0) {
2198
/* the first checked bit was set,
2199
* store start value, */
2200
DCBP_set_start(p, 1);
2201
/* but skip encoding of zero run length */
2202
toggle = !toggle;
2203
continue;
2204
}
2205
DCBP_set_start(p, 0);
2206
}
2207
2208
/* paranoia: catch zero runlength.
2209
* can only happen if bitmap is modified while we scan it. */
2210
if (rl == 0) {
2211
dev_err(DEV, "unexpected zero runlength while encoding bitmap "
2212
"t:%u bo:%lu\n", toggle, c->bit_offset);
2213
return -1;
2214
}
2215
2216
bits = vli_encode_bits(&bs, rl);
2217
if (bits == -ENOBUFS) /* buffer full */
2218
break;
2219
if (bits <= 0) {
2220
dev_err(DEV, "error while encoding bitmap: %d\n", bits);
2221
return 0;
2222
}
2223
2224
toggle = !toggle;
2225
plain_bits += rl;
2226
c->bit_offset = tmp;
2227
} while (c->bit_offset < c->bm_bits);
2228
2229
len = bs.cur.b - p->code + !!bs.cur.bit;
2230
2231
if (plain_bits < (len << 3)) {
2232
/* incompressible with this method.
2233
* we need to rewind both word and bit position. */
2234
c->bit_offset -= plain_bits;
2235
bm_xfer_ctx_bit_to_word_offset(c);
2236
c->bit_offset = c->word_offset * BITS_PER_LONG;
2237
return 0;
2238
}
2239
2240
/* RLE + VLI was able to compress it just fine.
2241
* update c->word_offset. */
2242
bm_xfer_ctx_bit_to_word_offset(c);
2243
2244
/* store pad_bits */
2245
DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
2246
2247
return len;
2248
}
2249
2250
/**
2251
* send_bitmap_rle_or_plain
2252
*
2253
* Return 0 when done, 1 when another iteration is needed, and a negative error
2254
* code upon failure.
2255
*/
2256
static int
2257
send_bitmap_rle_or_plain(struct drbd_conf *mdev,
2258
struct p_header80 *h, struct bm_xfer_ctx *c)
2259
{
2260
struct p_compressed_bm *p = (void*)h;
2261
unsigned long num_words;
2262
int len;
2263
int ok;
2264
2265
len = fill_bitmap_rle_bits(mdev, p, c);
2266
2267
if (len < 0)
2268
return -EIO;
2269
2270
if (len) {
2271
DCBP_set_code(p, RLE_VLI_Bits);
2272
ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
2273
sizeof(*p) + len, 0);
2274
2275
c->packets[0]++;
2276
c->bytes[0] += sizeof(*p) + len;
2277
2278
if (c->bit_offset >= c->bm_bits)
2279
len = 0; /* DONE */
2280
} else {
2281
/* was not compressible.
2282
* send a buffer full of plain text bits instead. */
2283
num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
2284
len = num_words * sizeof(long);
2285
if (len)
2286
drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
2287
ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
2288
h, sizeof(struct p_header80) + len, 0);
2289
c->word_offset += num_words;
2290
c->bit_offset = c->word_offset * BITS_PER_LONG;
2291
2292
c->packets[1]++;
2293
c->bytes[1] += sizeof(struct p_header80) + len;
2294
2295
if (c->bit_offset > c->bm_bits)
2296
c->bit_offset = c->bm_bits;
2297
}
2298
if (ok) {
2299
if (len == 0) {
2300
INFO_bm_xfer_stats(mdev, "send", c);
2301
return 0;
2302
} else
2303
return 1;
2304
}
2305
return -EIO;
2306
}
2307
2308
/* See the comment at receive_bitmap() */
2309
int _drbd_send_bitmap(struct drbd_conf *mdev)
2310
{
2311
struct bm_xfer_ctx c;
2312
struct p_header80 *p;
2313
int err;
2314
2315
ERR_IF(!mdev->bitmap) return false;
2316
2317
/* maybe we should use some per thread scratch page,
2318
* and allocate that during initial device creation? */
2319
p = (struct p_header80 *) __get_free_page(GFP_NOIO);
2320
if (!p) {
2321
dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
2322
return false;
2323
}
2324
2325
if (get_ldev(mdev)) {
2326
if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
2327
dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
2328
drbd_bm_set_all(mdev);
2329
if (drbd_bm_write(mdev)) {
2330
/* write_bm did fail! Leave full sync flag set in Meta P_DATA
2331
* but otherwise process as per normal - need to tell other
2332
* side that a full resync is required! */
2333
dev_err(DEV, "Failed to write bitmap to disk!\n");
2334
} else {
2335
drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
2336
drbd_md_sync(mdev);
2337
}
2338
}
2339
put_ldev(mdev);
2340
}
2341
2342
c = (struct bm_xfer_ctx) {
2343
.bm_bits = drbd_bm_bits(mdev),
2344
.bm_words = drbd_bm_words(mdev),
2345
};
2346
2347
do {
2348
err = send_bitmap_rle_or_plain(mdev, p, &c);
2349
} while (err > 0);
2350
2351
free_page((unsigned long) p);
2352
return err == 0;
2353
}
2354
2355
int drbd_send_bitmap(struct drbd_conf *mdev)
2356
{
2357
int err;
2358
2359
if (!drbd_get_data_sock(mdev))
2360
return -1;
2361
err = !_drbd_send_bitmap(mdev);
2362
drbd_put_data_sock(mdev);
2363
return err;
2364
}
2365
2366
int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
2367
{
2368
int ok;
2369
struct p_barrier_ack p;
2370
2371
p.barrier = barrier_nr;
2372
p.set_size = cpu_to_be32(set_size);
2373
2374
if (mdev->state.conn < C_CONNECTED)
2375
return false;
2376
ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
2377
(struct p_header80 *)&p, sizeof(p));
2378
return ok;
2379
}
2380
2381
/**
2382
* _drbd_send_ack() - Sends an ack packet
2383
* @mdev: DRBD device.
2384
* @cmd: Packet command code.
2385
* @sector: sector, needs to be in big endian byte order
2386
* @blksize: size in byte, needs to be in big endian byte order
2387
* @block_id: Id, big endian byte order
2388
*/
2389
static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
2390
u64 sector,
2391
u32 blksize,
2392
u64 block_id)
2393
{
2394
int ok;
2395
struct p_block_ack p;
2396
2397
p.sector = sector;
2398
p.block_id = block_id;
2399
p.blksize = blksize;
2400
p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
2401
2402
if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
2403
return false;
2404
ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
2405
(struct p_header80 *)&p, sizeof(p));
2406
return ok;
2407
}
2408
2409
/* dp->sector and dp->block_id already/still in network byte order,
2410
* data_size is payload size according to dp->head,
2411
* and may need to be corrected for digest size. */
2412
int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
2413
struct p_data *dp, int data_size)
2414
{
2415
data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
2416
crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
2417
return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
2418
dp->block_id);
2419
}
2420
2421
int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
2422
struct p_block_req *rp)
2423
{
2424
return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
2425
}
2426
2427
/**
2428
* drbd_send_ack() - Sends an ack packet
2429
* @mdev: DRBD device.
2430
* @cmd: Packet command code.
2431
* @e: Epoch entry.
2432
*/
2433
int drbd_send_ack(struct drbd_conf *mdev,
2434
enum drbd_packets cmd, struct drbd_epoch_entry *e)
2435
{
2436
return _drbd_send_ack(mdev, cmd,
2437
cpu_to_be64(e->sector),
2438
cpu_to_be32(e->size),
2439
e->block_id);
2440
}
2441
2442
/* This function misuses the block_id field to signal if the blocks
2443
* are is sync or not. */
2444
int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
2445
sector_t sector, int blksize, u64 block_id)
2446
{
2447
return _drbd_send_ack(mdev, cmd,
2448
cpu_to_be64(sector),
2449
cpu_to_be32(blksize),
2450
cpu_to_be64(block_id));
2451
}
2452
2453
int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
2454
sector_t sector, int size, u64 block_id)
2455
{
2456
int ok;
2457
struct p_block_req p;
2458
2459
p.sector = cpu_to_be64(sector);
2460
p.block_id = block_id;
2461
p.blksize = cpu_to_be32(size);
2462
2463
ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
2464
(struct p_header80 *)&p, sizeof(p));
2465
return ok;
2466
}
2467
2468
int drbd_send_drequest_csum(struct drbd_conf *mdev,
2469
sector_t sector, int size,
2470
void *digest, int digest_size,
2471
enum drbd_packets cmd)
2472
{
2473
int ok;
2474
struct p_block_req p;
2475
2476
p.sector = cpu_to_be64(sector);
2477
p.block_id = BE_DRBD_MAGIC + 0xbeef;
2478
p.blksize = cpu_to_be32(size);
2479
2480
p.head.magic = BE_DRBD_MAGIC;
2481
p.head.command = cpu_to_be16(cmd);
2482
p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
2483
2484
mutex_lock(&mdev->data.mutex);
2485
2486
ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
2487
ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
2488
2489
mutex_unlock(&mdev->data.mutex);
2490
2491
return ok;
2492
}
2493
2494
int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
2495
{
2496
int ok;
2497
struct p_block_req p;
2498
2499
p.sector = cpu_to_be64(sector);
2500
p.block_id = BE_DRBD_MAGIC + 0xbabe;
2501
p.blksize = cpu_to_be32(size);
2502
2503
ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
2504
(struct p_header80 *)&p, sizeof(p));
2505
return ok;
2506
}
2507
2508
/* called on sndtimeo
2509
* returns false if we should retry,
2510
* true if we think connection is dead
2511
*/
2512
static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
2513
{
2514
int drop_it;
2515
/* long elapsed = (long)(jiffies - mdev->last_received); */
2516
2517
drop_it = mdev->meta.socket == sock
2518
|| !mdev->asender.task
2519
|| get_t_state(&mdev->asender) != Running
2520
|| mdev->state.conn < C_CONNECTED;
2521
2522
if (drop_it)
2523
return true;
2524
2525
drop_it = !--mdev->ko_count;
2526
if (!drop_it) {
2527
dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
2528
current->comm, current->pid, mdev->ko_count);
2529
request_ping(mdev);
2530
}
2531
2532
return drop_it; /* && (mdev->state == R_PRIMARY) */;
2533
}
2534
2535
/* The idea of sendpage seems to be to put some kind of reference
2536
* to the page into the skb, and to hand it over to the NIC. In
2537
* this process get_page() gets called.
2538
*
2539
* As soon as the page was really sent over the network put_page()
2540
* gets called by some part of the network layer. [ NIC driver? ]
2541
*
2542
* [ get_page() / put_page() increment/decrement the count. If count
2543
* reaches 0 the page will be freed. ]
2544
*
2545
* This works nicely with pages from FSs.
2546
* But this means that in protocol A we might signal IO completion too early!
2547
*
2548
* In order not to corrupt data during a resync we must make sure
2549
* that we do not reuse our own buffer pages (EEs) to early, therefore
2550
* we have the net_ee list.
2551
*
2552
* XFS seems to have problems, still, it submits pages with page_count == 0!
2553
* As a workaround, we disable sendpage on pages
2554
* with page_count == 0 or PageSlab.
2555
*/
2556
static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
2557
int offset, size_t size, unsigned msg_flags)
2558
{
2559
int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
2560
kunmap(page);
2561
if (sent == size)
2562
mdev->send_cnt += size>>9;
2563
return sent == size;
2564
}
2565
2566
static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
2567
int offset, size_t size, unsigned msg_flags)
2568
{
2569
mm_segment_t oldfs = get_fs();
2570
int sent, ok;
2571
int len = size;
2572
2573
/* e.g. XFS meta- & log-data is in slab pages, which have a
2574
* page_count of 0 and/or have PageSlab() set.
2575
* we cannot use send_page for those, as that does get_page();
2576
* put_page(); and would cause either a VM_BUG directly, or
2577
* __page_cache_release a page that would actually still be referenced
2578
* by someone, leading to some obscure delayed Oops somewhere else. */
2579
if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
2580
return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
2581
2582
msg_flags |= MSG_NOSIGNAL;
2583
drbd_update_congested(mdev);
2584
set_fs(KERNEL_DS);
2585
do {
2586
sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
2587
offset, len,
2588
msg_flags);
2589
if (sent == -EAGAIN) {
2590
if (we_should_drop_the_connection(mdev,
2591
mdev->data.socket))
2592
break;
2593
else
2594
continue;
2595
}
2596
if (sent <= 0) {
2597
dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
2598
__func__, (int)size, len, sent);
2599
break;
2600
}
2601
len -= sent;
2602
offset += sent;
2603
} while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
2604
set_fs(oldfs);
2605
clear_bit(NET_CONGESTED, &mdev->flags);
2606
2607
ok = (len == 0);
2608
if (likely(ok))
2609
mdev->send_cnt += size>>9;
2610
return ok;
2611
}
2612
2613
static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
2614
{
2615
struct bio_vec *bvec;
2616
int i;
2617
/* hint all but last page with MSG_MORE */
2618
__bio_for_each_segment(bvec, bio, i, 0) {
2619
if (!_drbd_no_send_page(mdev, bvec->bv_page,
2620
bvec->bv_offset, bvec->bv_len,
2621
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
2622
return 0;
2623
}
2624
return 1;
2625
}
2626
2627
static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2628
{
2629
struct bio_vec *bvec;
2630
int i;
2631
/* hint all but last page with MSG_MORE */
2632
__bio_for_each_segment(bvec, bio, i, 0) {
2633
if (!_drbd_send_page(mdev, bvec->bv_page,
2634
bvec->bv_offset, bvec->bv_len,
2635
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
2636
return 0;
2637
}
2638
return 1;
2639
}
2640
2641
static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2642
{
2643
struct page *page = e->pages;
2644
unsigned len = e->size;
2645
/* hint all but last page with MSG_MORE */
2646
page_chain_for_each(page) {
2647
unsigned l = min_t(unsigned, len, PAGE_SIZE);
2648
if (!_drbd_send_page(mdev, page, 0, l,
2649
page_chain_next(page) ? MSG_MORE : 0))
2650
return 0;
2651
len -= l;
2652
}
2653
return 1;
2654
}
2655
2656
static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
2657
{
2658
if (mdev->agreed_pro_version >= 95)
2659
return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
2660
(bi_rw & REQ_FUA ? DP_FUA : 0) |
2661
(bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
2662
(bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
2663
else
2664
return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
2665
}
2666
2667
/* Used to send write requests
2668
* R_PRIMARY -> Peer (P_DATA)
2669
*/
2670
int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2671
{
2672
int ok = 1;
2673
struct p_data p;
2674
unsigned int dp_flags = 0;
2675
void *dgb;
2676
int dgs;
2677
2678
if (!drbd_get_data_sock(mdev))
2679
return 0;
2680
2681
dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2682
crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2683
2684
if (req->size <= DRBD_MAX_SIZE_H80_PACKET) {
2685
p.head.h80.magic = BE_DRBD_MAGIC;
2686
p.head.h80.command = cpu_to_be16(P_DATA);
2687
p.head.h80.length =
2688
cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2689
} else {
2690
p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2691
p.head.h95.command = cpu_to_be16(P_DATA);
2692
p.head.h95.length =
2693
cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2694
}
2695
2696
p.sector = cpu_to_be64(req->sector);
2697
p.block_id = (unsigned long)req;
2698
p.seq_num = cpu_to_be32(req->seq_num =
2699
atomic_add_return(1, &mdev->packet_seq));
2700
2701
dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
2702
2703
if (mdev->state.conn >= C_SYNC_SOURCE &&
2704
mdev->state.conn <= C_PAUSED_SYNC_T)
2705
dp_flags |= DP_MAY_SET_IN_SYNC;
2706
2707
p.dp_flags = cpu_to_be32(dp_flags);
2708
set_bit(UNPLUG_REMOTE, &mdev->flags);
2709
ok = (sizeof(p) ==
2710
drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
2711
if (ok && dgs) {
2712
dgb = mdev->int_dig_out;
2713
drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
2714
ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2715
}
2716
if (ok) {
2717
/* For protocol A, we have to memcpy the payload into
2718
* socket buffers, as we may complete right away
2719
* as soon as we handed it over to tcp, at which point the data
2720
* pages may become invalid.
2721
*
2722
* For data-integrity enabled, we copy it as well, so we can be
2723
* sure that even if the bio pages may still be modified, it
2724
* won't change the data on the wire, thus if the digest checks
2725
* out ok after sending on this side, but does not fit on the
2726
* receiving side, we sure have detected corruption elsewhere.
2727
*/
2728
if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
2729
ok = _drbd_send_bio(mdev, req->master_bio);
2730
else
2731
ok = _drbd_send_zc_bio(mdev, req->master_bio);
2732
2733
/* double check digest, sometimes buffers have been modified in flight. */
2734
if (dgs > 0 && dgs <= 64) {
2735
/* 64 byte, 512 bit, is the largest digest size
2736
* currently supported in kernel crypto. */
2737
unsigned char digest[64];
2738
drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
2739
if (memcmp(mdev->int_dig_out, digest, dgs)) {
2740
dev_warn(DEV,
2741
"Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
2742
(unsigned long long)req->sector, req->size);
2743
}
2744
} /* else if (dgs > 64) {
2745
... Be noisy about digest too large ...
2746
} */
2747
}
2748
2749
drbd_put_data_sock(mdev);
2750
2751
return ok;
2752
}
2753
2754
/* answer packet, used to send data back for read requests:
2755
* Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
2756
* C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
2757
*/
2758
int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2759
struct drbd_epoch_entry *e)
2760
{
2761
int ok;
2762
struct p_data p;
2763
void *dgb;
2764
int dgs;
2765
2766
dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2767
crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2768
2769
if (e->size <= DRBD_MAX_SIZE_H80_PACKET) {
2770
p.head.h80.magic = BE_DRBD_MAGIC;
2771
p.head.h80.command = cpu_to_be16(cmd);
2772
p.head.h80.length =
2773
cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2774
} else {
2775
p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2776
p.head.h95.command = cpu_to_be16(cmd);
2777
p.head.h95.length =
2778
cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2779
}
2780
2781
p.sector = cpu_to_be64(e->sector);
2782
p.block_id = e->block_id;
2783
/* p.seq_num = 0; No sequence numbers here.. */
2784
2785
/* Only called by our kernel thread.
2786
* This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
2787
* in response to admin command or module unload.
2788
*/
2789
if (!drbd_get_data_sock(mdev))
2790
return 0;
2791
2792
ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
2793
if (ok && dgs) {
2794
dgb = mdev->int_dig_out;
2795
drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
2796
ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
2797
}
2798
if (ok)
2799
ok = _drbd_send_zc_ee(mdev, e);
2800
2801
drbd_put_data_sock(mdev);
2802
2803
return ok;
2804
}
2805
2806
int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
2807
{
2808
struct p_block_desc p;
2809
2810
p.sector = cpu_to_be64(req->sector);
2811
p.blksize = cpu_to_be32(req->size);
2812
2813
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
2814
}
2815
2816
/*
2817
drbd_send distinguishes two cases:
2818
2819
Packets sent via the data socket "sock"
2820
and packets sent via the meta data socket "msock"
2821
2822
sock msock
2823
-----------------+-------------------------+------------------------------
2824
timeout conf.timeout / 2 conf.timeout / 2
2825
timeout action send a ping via msock Abort communication
2826
and close all sockets
2827
*/
2828
2829
/*
2830
* you must have down()ed the appropriate [m]sock_mutex elsewhere!
2831
*/
2832
int drbd_send(struct drbd_conf *mdev, struct socket *sock,
2833
void *buf, size_t size, unsigned msg_flags)
2834
{
2835
struct kvec iov;
2836
struct msghdr msg;
2837
int rv, sent = 0;
2838
2839
if (!sock)
2840
return -1000;
2841
2842
/* THINK if (signal_pending) return ... ? */
2843
2844
iov.iov_base = buf;
2845
iov.iov_len = size;
2846
2847
msg.msg_name = NULL;
2848
msg.msg_namelen = 0;
2849
msg.msg_control = NULL;
2850
msg.msg_controllen = 0;
2851
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
2852
2853
if (sock == mdev->data.socket) {
2854
mdev->ko_count = mdev->net_conf->ko_count;
2855
drbd_update_congested(mdev);
2856
}
2857
do {
2858
/* STRANGE
2859
* tcp_sendmsg does _not_ use its size parameter at all ?
2860
*
2861
* -EAGAIN on timeout, -EINTR on signal.
2862
*/
2863
/* THINK
2864
* do we need to block DRBD_SIG if sock == &meta.socket ??
2865
* otherwise wake_asender() might interrupt some send_*Ack !
2866
*/
2867
rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
2868
if (rv == -EAGAIN) {
2869
if (we_should_drop_the_connection(mdev, sock))
2870
break;
2871
else
2872
continue;
2873
}
2874
D_ASSERT(rv != 0);
2875
if (rv == -EINTR) {
2876
flush_signals(current);
2877
rv = 0;
2878
}
2879
if (rv < 0)
2880
break;
2881
sent += rv;
2882
iov.iov_base += rv;
2883
iov.iov_len -= rv;
2884
} while (sent < size);
2885
2886
if (sock == mdev->data.socket)
2887
clear_bit(NET_CONGESTED, &mdev->flags);
2888
2889
if (rv <= 0) {
2890
if (rv != -EAGAIN) {
2891
dev_err(DEV, "%s_sendmsg returned %d\n",
2892
sock == mdev->meta.socket ? "msock" : "sock",
2893
rv);
2894
drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
2895
} else
2896
drbd_force_state(mdev, NS(conn, C_TIMEOUT));
2897
}
2898
2899
return sent;
2900
}
2901
2902
static int drbd_open(struct block_device *bdev, fmode_t mode)
2903
{
2904
struct drbd_conf *mdev = bdev->bd_disk->private_data;
2905
unsigned long flags;
2906
int rv = 0;
2907
2908
mutex_lock(&drbd_main_mutex);
2909
spin_lock_irqsave(&mdev->req_lock, flags);
2910
/* to have a stable mdev->state.role
2911
* and no race with updating open_cnt */
2912
2913
if (mdev->state.role != R_PRIMARY) {
2914
if (mode & FMODE_WRITE)
2915
rv = -EROFS;
2916
else if (!allow_oos)
2917
rv = -EMEDIUMTYPE;
2918
}
2919
2920
if (!rv)
2921
mdev->open_cnt++;
2922
spin_unlock_irqrestore(&mdev->req_lock, flags);
2923
mutex_unlock(&drbd_main_mutex);
2924
2925
return rv;
2926
}
2927
2928
static int drbd_release(struct gendisk *gd, fmode_t mode)
2929
{
2930
struct drbd_conf *mdev = gd->private_data;
2931
mutex_lock(&drbd_main_mutex);
2932
mdev->open_cnt--;
2933
mutex_unlock(&drbd_main_mutex);
2934
return 0;
2935
}
2936
2937
static void drbd_set_defaults(struct drbd_conf *mdev)
2938
{
2939
/* This way we get a compile error when sync_conf grows,
2940
and we forgot to initialize it here */
2941
mdev->sync_conf = (struct syncer_conf) {
2942
/* .rate = */ DRBD_RATE_DEF,
2943
/* .after = */ DRBD_AFTER_DEF,
2944
/* .al_extents = */ DRBD_AL_EXTENTS_DEF,
2945
/* .verify_alg = */ {}, 0,
2946
/* .cpu_mask = */ {}, 0,
2947
/* .csums_alg = */ {}, 0,
2948
/* .use_rle = */ 0,
2949
/* .on_no_data = */ DRBD_ON_NO_DATA_DEF,
2950
/* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF,
2951
/* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF,
2952
/* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF,
2953
/* .c_max_rate = */ DRBD_C_MAX_RATE_DEF,
2954
/* .c_min_rate = */ DRBD_C_MIN_RATE_DEF
2955
};
2956
2957
/* Have to use that way, because the layout differs between
2958
big endian and little endian */
2959
mdev->state = (union drbd_state) {
2960
{ .role = R_SECONDARY,
2961
.peer = R_UNKNOWN,
2962
.conn = C_STANDALONE,
2963
.disk = D_DISKLESS,
2964
.pdsk = D_UNKNOWN,
2965
.susp = 0,
2966
.susp_nod = 0,
2967
.susp_fen = 0
2968
} };
2969
}
2970
2971
void drbd_init_set_defaults(struct drbd_conf *mdev)
2972
{
2973
/* the memset(,0,) did most of this.
2974
* note: only assignments, no allocation in here */
2975
2976
drbd_set_defaults(mdev);
2977
2978
atomic_set(&mdev->ap_bio_cnt, 0);
2979
atomic_set(&mdev->ap_pending_cnt, 0);
2980
atomic_set(&mdev->rs_pending_cnt, 0);
2981
atomic_set(&mdev->unacked_cnt, 0);
2982
atomic_set(&mdev->local_cnt, 0);
2983
atomic_set(&mdev->net_cnt, 0);
2984
atomic_set(&mdev->packet_seq, 0);
2985
atomic_set(&mdev->pp_in_use, 0);
2986
atomic_set(&mdev->pp_in_use_by_net, 0);
2987
atomic_set(&mdev->rs_sect_in, 0);
2988
atomic_set(&mdev->rs_sect_ev, 0);
2989
atomic_set(&mdev->ap_in_flight, 0);
2990
2991
mutex_init(&mdev->md_io_mutex);
2992
mutex_init(&mdev->data.mutex);
2993
mutex_init(&mdev->meta.mutex);
2994
sema_init(&mdev->data.work.s, 0);
2995
sema_init(&mdev->meta.work.s, 0);
2996
mutex_init(&mdev->state_mutex);
2997
2998
spin_lock_init(&mdev->data.work.q_lock);
2999
spin_lock_init(&mdev->meta.work.q_lock);
3000
3001
spin_lock_init(&mdev->al_lock);
3002
spin_lock_init(&mdev->req_lock);
3003
spin_lock_init(&mdev->peer_seq_lock);
3004
spin_lock_init(&mdev->epoch_lock);
3005
3006
INIT_LIST_HEAD(&mdev->active_ee);
3007
INIT_LIST_HEAD(&mdev->sync_ee);
3008
INIT_LIST_HEAD(&mdev->done_ee);
3009
INIT_LIST_HEAD(&mdev->read_ee);
3010
INIT_LIST_HEAD(&mdev->net_ee);
3011
INIT_LIST_HEAD(&mdev->resync_reads);
3012
INIT_LIST_HEAD(&mdev->data.work.q);
3013
INIT_LIST_HEAD(&mdev->meta.work.q);
3014
INIT_LIST_HEAD(&mdev->resync_work.list);
3015
INIT_LIST_HEAD(&mdev->unplug_work.list);
3016
INIT_LIST_HEAD(&mdev->go_diskless.list);
3017
INIT_LIST_HEAD(&mdev->md_sync_work.list);
3018
INIT_LIST_HEAD(&mdev->start_resync_work.list);
3019
INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
3020
3021
mdev->resync_work.cb = w_resync_timer;
3022
mdev->unplug_work.cb = w_send_write_hint;
3023
mdev->go_diskless.cb = w_go_diskless;
3024
mdev->md_sync_work.cb = w_md_sync;
3025
mdev->bm_io_work.w.cb = w_bitmap_io;
3026
mdev->start_resync_work.cb = w_start_resync;
3027
init_timer(&mdev->resync_timer);
3028
init_timer(&mdev->md_sync_timer);
3029
init_timer(&mdev->start_resync_timer);
3030
init_timer(&mdev->request_timer);
3031
mdev->resync_timer.function = resync_timer_fn;
3032
mdev->resync_timer.data = (unsigned long) mdev;
3033
mdev->md_sync_timer.function = md_sync_timer_fn;
3034
mdev->md_sync_timer.data = (unsigned long) mdev;
3035
mdev->start_resync_timer.function = start_resync_timer_fn;
3036
mdev->start_resync_timer.data = (unsigned long) mdev;
3037
mdev->request_timer.function = request_timer_fn;
3038
mdev->request_timer.data = (unsigned long) mdev;
3039
3040
init_waitqueue_head(&mdev->misc_wait);
3041
init_waitqueue_head(&mdev->state_wait);
3042
init_waitqueue_head(&mdev->net_cnt_wait);
3043
init_waitqueue_head(&mdev->ee_wait);
3044
init_waitqueue_head(&mdev->al_wait);
3045
init_waitqueue_head(&mdev->seq_wait);
3046
3047
drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
3048
drbd_thread_init(mdev, &mdev->worker, drbd_worker);
3049
drbd_thread_init(mdev, &mdev->asender, drbd_asender);
3050
3051
mdev->agreed_pro_version = PRO_VERSION_MAX;
3052
mdev->write_ordering = WO_bdev_flush;
3053
mdev->resync_wenr = LC_FREE;
3054
mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
3055
mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
3056
}
3057
3058
void drbd_mdev_cleanup(struct drbd_conf *mdev)
3059
{
3060
int i;
3061
if (mdev->receiver.t_state != None)
3062
dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
3063
mdev->receiver.t_state);
3064
3065
/* no need to lock it, I'm the only thread alive */
3066
if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
3067
dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
3068
mdev->al_writ_cnt =
3069
mdev->bm_writ_cnt =
3070
mdev->read_cnt =
3071
mdev->recv_cnt =
3072
mdev->send_cnt =
3073
mdev->writ_cnt =
3074
mdev->p_size =
3075
mdev->rs_start =
3076
mdev->rs_total =
3077
mdev->rs_failed = 0;
3078
mdev->rs_last_events = 0;
3079
mdev->rs_last_sect_ev = 0;
3080
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
3081
mdev->rs_mark_left[i] = 0;
3082
mdev->rs_mark_time[i] = 0;
3083
}
3084
D_ASSERT(mdev->net_conf == NULL);
3085
3086
drbd_set_my_capacity(mdev, 0);
3087
if (mdev->bitmap) {
3088
/* maybe never allocated. */
3089
drbd_bm_resize(mdev, 0, 1);
3090
drbd_bm_cleanup(mdev);
3091
}
3092
3093
drbd_free_resources(mdev);
3094
clear_bit(AL_SUSPENDED, &mdev->flags);
3095
3096
/*
3097
* currently we drbd_init_ee only on module load, so
3098
* we may do drbd_release_ee only on module unload!
3099
*/
3100
D_ASSERT(list_empty(&mdev->active_ee));
3101
D_ASSERT(list_empty(&mdev->sync_ee));
3102
D_ASSERT(list_empty(&mdev->done_ee));
3103
D_ASSERT(list_empty(&mdev->read_ee));
3104
D_ASSERT(list_empty(&mdev->net_ee));
3105
D_ASSERT(list_empty(&mdev->resync_reads));
3106
D_ASSERT(list_empty(&mdev->data.work.q));
3107
D_ASSERT(list_empty(&mdev->meta.work.q));
3108
D_ASSERT(list_empty(&mdev->resync_work.list));
3109
D_ASSERT(list_empty(&mdev->unplug_work.list));
3110
D_ASSERT(list_empty(&mdev->go_diskless.list));
3111
3112
drbd_set_defaults(mdev);
3113
}
3114
3115
3116
static void drbd_destroy_mempools(void)
3117
{
3118
struct page *page;
3119
3120
while (drbd_pp_pool) {
3121
page = drbd_pp_pool;
3122
drbd_pp_pool = (struct page *)page_private(page);
3123
__free_page(page);
3124
drbd_pp_vacant--;
3125
}
3126
3127
/* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
3128
3129
if (drbd_ee_mempool)
3130
mempool_destroy(drbd_ee_mempool);
3131
if (drbd_request_mempool)
3132
mempool_destroy(drbd_request_mempool);
3133
if (drbd_ee_cache)
3134
kmem_cache_destroy(drbd_ee_cache);
3135
if (drbd_request_cache)
3136
kmem_cache_destroy(drbd_request_cache);
3137
if (drbd_bm_ext_cache)
3138
kmem_cache_destroy(drbd_bm_ext_cache);
3139
if (drbd_al_ext_cache)
3140
kmem_cache_destroy(drbd_al_ext_cache);
3141
3142
drbd_ee_mempool = NULL;
3143
drbd_request_mempool = NULL;
3144
drbd_ee_cache = NULL;
3145
drbd_request_cache = NULL;
3146
drbd_bm_ext_cache = NULL;
3147
drbd_al_ext_cache = NULL;
3148
3149
return;
3150
}
3151
3152
static int drbd_create_mempools(void)
3153
{
3154
struct page *page;
3155
const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
3156
int i;
3157
3158
/* prepare our caches and mempools */
3159
drbd_request_mempool = NULL;
3160
drbd_ee_cache = NULL;
3161
drbd_request_cache = NULL;
3162
drbd_bm_ext_cache = NULL;
3163
drbd_al_ext_cache = NULL;
3164
drbd_pp_pool = NULL;
3165
3166
/* caches */
3167
drbd_request_cache = kmem_cache_create(
3168
"drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
3169
if (drbd_request_cache == NULL)
3170
goto Enomem;
3171
3172
drbd_ee_cache = kmem_cache_create(
3173
"drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
3174
if (drbd_ee_cache == NULL)
3175
goto Enomem;
3176
3177
drbd_bm_ext_cache = kmem_cache_create(
3178
"drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
3179
if (drbd_bm_ext_cache == NULL)
3180
goto Enomem;
3181
3182
drbd_al_ext_cache = kmem_cache_create(
3183
"drbd_al", sizeof(struct lc_element), 0, 0, NULL);
3184
if (drbd_al_ext_cache == NULL)
3185
goto Enomem;
3186
3187
/* mempools */
3188
drbd_request_mempool = mempool_create(number,
3189
mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
3190
if (drbd_request_mempool == NULL)
3191
goto Enomem;
3192
3193
drbd_ee_mempool = mempool_create(number,
3194
mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
3195
if (drbd_ee_mempool == NULL)
3196
goto Enomem;
3197
3198
/* drbd's page pool */
3199
spin_lock_init(&drbd_pp_lock);
3200
3201
for (i = 0; i < number; i++) {
3202
page = alloc_page(GFP_HIGHUSER);
3203
if (!page)
3204
goto Enomem;
3205
set_page_private(page, (unsigned long)drbd_pp_pool);
3206
drbd_pp_pool = page;
3207
}
3208
drbd_pp_vacant = number;
3209
3210
return 0;
3211
3212
Enomem:
3213
drbd_destroy_mempools(); /* in case we allocated some */
3214
return -ENOMEM;
3215
}
3216
3217
static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
3218
void *unused)
3219
{
3220
/* just so we have it. you never know what interesting things we
3221
* might want to do here some day...
3222
*/
3223
3224
return NOTIFY_DONE;
3225
}
3226
3227
static struct notifier_block drbd_notifier = {
3228
.notifier_call = drbd_notify_sys,
3229
};
3230
3231
static void drbd_release_ee_lists(struct drbd_conf *mdev)
3232
{
3233
int rr;
3234
3235
rr = drbd_release_ee(mdev, &mdev->active_ee);
3236
if (rr)
3237
dev_err(DEV, "%d EEs in active list found!\n", rr);
3238
3239
rr = drbd_release_ee(mdev, &mdev->sync_ee);
3240
if (rr)
3241
dev_err(DEV, "%d EEs in sync list found!\n", rr);
3242
3243
rr = drbd_release_ee(mdev, &mdev->read_ee);
3244
if (rr)
3245
dev_err(DEV, "%d EEs in read list found!\n", rr);
3246
3247
rr = drbd_release_ee(mdev, &mdev->done_ee);
3248
if (rr)
3249
dev_err(DEV, "%d EEs in done list found!\n", rr);
3250
3251
rr = drbd_release_ee(mdev, &mdev->net_ee);
3252
if (rr)
3253
dev_err(DEV, "%d EEs in net list found!\n", rr);
3254
}
3255
3256
/* caution. no locking.
3257
* currently only used from module cleanup code. */
3258
static void drbd_delete_device(unsigned int minor)
3259
{
3260
struct drbd_conf *mdev = minor_to_mdev(minor);
3261
3262
if (!mdev)
3263
return;
3264
3265
/* paranoia asserts */
3266
if (mdev->open_cnt != 0)
3267
dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
3268
__FILE__ , __LINE__);
3269
3270
ERR_IF (!list_empty(&mdev->data.work.q)) {
3271
struct list_head *lp;
3272
list_for_each(lp, &mdev->data.work.q) {
3273
dev_err(DEV, "lp = %p\n", lp);
3274
}
3275
};
3276
/* end paranoia asserts */
3277
3278
del_gendisk(mdev->vdisk);
3279
3280
/* cleanup stuff that may have been allocated during
3281
* device (re-)configuration or state changes */
3282
3283
if (mdev->this_bdev)
3284
bdput(mdev->this_bdev);
3285
3286
drbd_free_resources(mdev);
3287
3288
drbd_release_ee_lists(mdev);
3289
3290
/* should be freed on disconnect? */
3291
kfree(mdev->ee_hash);
3292
/*
3293
mdev->ee_hash_s = 0;
3294
mdev->ee_hash = NULL;
3295
*/
3296
3297
lc_destroy(mdev->act_log);
3298
lc_destroy(mdev->resync);
3299
3300
kfree(mdev->p_uuid);
3301
/* mdev->p_uuid = NULL; */
3302
3303
kfree(mdev->int_dig_out);
3304
kfree(mdev->int_dig_in);
3305
kfree(mdev->int_dig_vv);
3306
3307
/* cleanup the rest that has been
3308
* allocated from drbd_new_device
3309
* and actually free the mdev itself */
3310
drbd_free_mdev(mdev);
3311
}
3312
3313
static void drbd_cleanup(void)
3314
{
3315
unsigned int i;
3316
3317
unregister_reboot_notifier(&drbd_notifier);
3318
3319
/* first remove proc,
3320
* drbdsetup uses it's presence to detect
3321
* whether DRBD is loaded.
3322
* If we would get stuck in proc removal,
3323
* but have netlink already deregistered,
3324
* some drbdsetup commands may wait forever
3325
* for an answer.
3326
*/
3327
if (drbd_proc)
3328
remove_proc_entry("drbd", NULL);
3329
3330
drbd_nl_cleanup();
3331
3332
if (minor_table) {
3333
i = minor_count;
3334
while (i--)
3335
drbd_delete_device(i);
3336
drbd_destroy_mempools();
3337
}
3338
3339
kfree(minor_table);
3340
3341
unregister_blkdev(DRBD_MAJOR, "drbd");
3342
3343
printk(KERN_INFO "drbd: module cleanup done.\n");
3344
}
3345
3346
/**
3347
* drbd_congested() - Callback for pdflush
3348
* @congested_data: User data
3349
* @bdi_bits: Bits pdflush is currently interested in
3350
*
3351
* Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
3352
*/
3353
static int drbd_congested(void *congested_data, int bdi_bits)
3354
{
3355
struct drbd_conf *mdev = congested_data;
3356
struct request_queue *q;
3357
char reason = '-';
3358
int r = 0;
3359
3360
if (!may_inc_ap_bio(mdev)) {
3361
/* DRBD has frozen IO */
3362
r = bdi_bits;
3363
reason = 'd';
3364
goto out;
3365
}
3366
3367
if (get_ldev(mdev)) {
3368
q = bdev_get_queue(mdev->ldev->backing_bdev);
3369
r = bdi_congested(&q->backing_dev_info, bdi_bits);
3370
put_ldev(mdev);
3371
if (r)
3372
reason = 'b';
3373
}
3374
3375
if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
3376
r |= (1 << BDI_async_congested);
3377
reason = reason == 'b' ? 'a' : 'n';
3378
}
3379
3380
out:
3381
mdev->congestion_reason = reason;
3382
return r;
3383
}
3384
3385
struct drbd_conf *drbd_new_device(unsigned int minor)
3386
{
3387
struct drbd_conf *mdev;
3388
struct gendisk *disk;
3389
struct request_queue *q;
3390
3391
/* GFP_KERNEL, we are outside of all write-out paths */
3392
mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
3393
if (!mdev)
3394
return NULL;
3395
if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
3396
goto out_no_cpumask;
3397
3398
mdev->minor = minor;
3399
3400
drbd_init_set_defaults(mdev);
3401
3402
q = blk_alloc_queue(GFP_KERNEL);
3403
if (!q)
3404
goto out_no_q;
3405
mdev->rq_queue = q;
3406
q->queuedata = mdev;
3407
3408
disk = alloc_disk(1);
3409
if (!disk)
3410
goto out_no_disk;
3411
mdev->vdisk = disk;
3412
3413
set_disk_ro(disk, true);
3414
3415
disk->queue = q;
3416
disk->major = DRBD_MAJOR;
3417
disk->first_minor = minor;
3418
disk->fops = &drbd_ops;
3419
sprintf(disk->disk_name, "drbd%d", minor);
3420
disk->private_data = mdev;
3421
3422
mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
3423
/* we have no partitions. we contain only ourselves. */
3424
mdev->this_bdev->bd_contains = mdev->this_bdev;
3425
3426
q->backing_dev_info.congested_fn = drbd_congested;
3427
q->backing_dev_info.congested_data = mdev;
3428
3429
blk_queue_make_request(q, drbd_make_request);
3430
/* Setting the max_hw_sectors to an odd value of 8kibyte here
3431
This triggers a max_bio_size message upon first attach or connect */
3432
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
3433
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
3434
blk_queue_merge_bvec(q, drbd_merge_bvec);
3435
q->queue_lock = &mdev->req_lock;
3436
3437
mdev->md_io_page = alloc_page(GFP_KERNEL);
3438
if (!mdev->md_io_page)
3439
goto out_no_io_page;
3440
3441
if (drbd_bm_init(mdev))
3442
goto out_no_bitmap;
3443
/* no need to lock access, we are still initializing this minor device. */
3444
if (!tl_init(mdev))
3445
goto out_no_tl;
3446
3447
mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
3448
if (!mdev->app_reads_hash)
3449
goto out_no_app_reads;
3450
3451
mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
3452
if (!mdev->current_epoch)
3453
goto out_no_epoch;
3454
3455
INIT_LIST_HEAD(&mdev->current_epoch->list);
3456
mdev->epochs = 1;
3457
3458
return mdev;
3459
3460
/* out_whatever_else:
3461
kfree(mdev->current_epoch); */
3462
out_no_epoch:
3463
kfree(mdev->app_reads_hash);
3464
out_no_app_reads:
3465
tl_cleanup(mdev);
3466
out_no_tl:
3467
drbd_bm_cleanup(mdev);
3468
out_no_bitmap:
3469
__free_page(mdev->md_io_page);
3470
out_no_io_page:
3471
put_disk(disk);
3472
out_no_disk:
3473
blk_cleanup_queue(q);
3474
out_no_q:
3475
free_cpumask_var(mdev->cpu_mask);
3476
out_no_cpumask:
3477
kfree(mdev);
3478
return NULL;
3479
}
3480
3481
/* counterpart of drbd_new_device.
3482
* last part of drbd_delete_device. */
3483
void drbd_free_mdev(struct drbd_conf *mdev)
3484
{
3485
kfree(mdev->current_epoch);
3486
kfree(mdev->app_reads_hash);
3487
tl_cleanup(mdev);
3488
if (mdev->bitmap) /* should no longer be there. */
3489
drbd_bm_cleanup(mdev);
3490
__free_page(mdev->md_io_page);
3491
put_disk(mdev->vdisk);
3492
blk_cleanup_queue(mdev->rq_queue);
3493
free_cpumask_var(mdev->cpu_mask);
3494
drbd_free_tl_hash(mdev);
3495
kfree(mdev);
3496
}
3497
3498
3499
int __init drbd_init(void)
3500
{
3501
int err;
3502
3503
if (sizeof(struct p_handshake) != 80) {
3504
printk(KERN_ERR
3505
"drbd: never change the size or layout "
3506
"of the HandShake packet.\n");
3507
return -EINVAL;
3508
}
3509
3510
if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
3511
printk(KERN_ERR
3512
"drbd: invalid minor_count (%d)\n", minor_count);
3513
#ifdef MODULE
3514
return -EINVAL;
3515
#else
3516
minor_count = 8;
3517
#endif
3518
}
3519
3520
err = drbd_nl_init();
3521
if (err)
3522
return err;
3523
3524
err = register_blkdev(DRBD_MAJOR, "drbd");
3525
if (err) {
3526
printk(KERN_ERR
3527
"drbd: unable to register block device major %d\n",
3528
DRBD_MAJOR);
3529
return err;
3530
}
3531
3532
register_reboot_notifier(&drbd_notifier);
3533
3534
/*
3535
* allocate all necessary structs
3536
*/
3537
err = -ENOMEM;
3538
3539
init_waitqueue_head(&drbd_pp_wait);
3540
3541
drbd_proc = NULL; /* play safe for drbd_cleanup */
3542
minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
3543
GFP_KERNEL);
3544
if (!minor_table)
3545
goto Enomem;
3546
3547
err = drbd_create_mempools();
3548
if (err)
3549
goto Enomem;
3550
3551
drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
3552
if (!drbd_proc) {
3553
printk(KERN_ERR "drbd: unable to register proc file\n");
3554
goto Enomem;
3555
}
3556
3557
rwlock_init(&global_state_lock);
3558
3559
printk(KERN_INFO "drbd: initialized. "
3560
"Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
3561
API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
3562
printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
3563
printk(KERN_INFO "drbd: registered as block device major %d\n",
3564
DRBD_MAJOR);
3565
printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
3566
3567
return 0; /* Success! */
3568
3569
Enomem:
3570
drbd_cleanup();
3571
if (err == -ENOMEM)
3572
/* currently always the case */
3573
printk(KERN_ERR "drbd: ran out of memory\n");
3574
else
3575
printk(KERN_ERR "drbd: initialization failure\n");
3576
return err;
3577
}
3578
3579
void drbd_free_bc(struct drbd_backing_dev *ldev)
3580
{
3581
if (ldev == NULL)
3582
return;
3583
3584
blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3585
blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3586
3587
kfree(ldev);
3588
}
3589
3590
void drbd_free_sock(struct drbd_conf *mdev)
3591
{
3592
if (mdev->data.socket) {
3593
mutex_lock(&mdev->data.mutex);
3594
kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
3595
sock_release(mdev->data.socket);
3596
mdev->data.socket = NULL;
3597
mutex_unlock(&mdev->data.mutex);
3598
}
3599
if (mdev->meta.socket) {
3600
mutex_lock(&mdev->meta.mutex);
3601
kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
3602
sock_release(mdev->meta.socket);
3603
mdev->meta.socket = NULL;
3604
mutex_unlock(&mdev->meta.mutex);
3605
}
3606
}
3607
3608
3609
void drbd_free_resources(struct drbd_conf *mdev)
3610
{
3611
crypto_free_hash(mdev->csums_tfm);
3612
mdev->csums_tfm = NULL;
3613
crypto_free_hash(mdev->verify_tfm);
3614
mdev->verify_tfm = NULL;
3615
crypto_free_hash(mdev->cram_hmac_tfm);
3616
mdev->cram_hmac_tfm = NULL;
3617
crypto_free_hash(mdev->integrity_w_tfm);
3618
mdev->integrity_w_tfm = NULL;
3619
crypto_free_hash(mdev->integrity_r_tfm);
3620
mdev->integrity_r_tfm = NULL;
3621
3622
drbd_free_sock(mdev);
3623
3624
__no_warn(local,
3625
drbd_free_bc(mdev->ldev);
3626
mdev->ldev = NULL;);
3627
}
3628
3629
/* meta data management */
3630
3631
struct meta_data_on_disk {
3632
u64 la_size; /* last agreed size. */
3633
u64 uuid[UI_SIZE]; /* UUIDs. */
3634
u64 device_uuid;
3635
u64 reserved_u64_1;
3636
u32 flags; /* MDF */
3637
u32 magic;
3638
u32 md_size_sect;
3639
u32 al_offset; /* offset to this block */
3640
u32 al_nr_extents; /* important for restoring the AL */
3641
/* `-- act_log->nr_elements <-- sync_conf.al_extents */
3642
u32 bm_offset; /* offset to the bitmap, from here */
3643
u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
3644
u32 la_peer_max_bio_size; /* last peer max_bio_size */
3645
u32 reserved_u32[3];
3646
3647
} __packed;
3648
3649
/**
3650
* drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
3651
* @mdev: DRBD device.
3652
*/
3653
void drbd_md_sync(struct drbd_conf *mdev)
3654
{
3655
struct meta_data_on_disk *buffer;
3656
sector_t sector;
3657
int i;
3658
3659
del_timer(&mdev->md_sync_timer);
3660
/* timer may be rearmed by drbd_md_mark_dirty() now. */
3661
if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
3662
return;
3663
3664
/* We use here D_FAILED and not D_ATTACHING because we try to write
3665
* metadata even if we detach due to a disk failure! */
3666
if (!get_ldev_if_state(mdev, D_FAILED))
3667
return;
3668
3669
mutex_lock(&mdev->md_io_mutex);
3670
buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
3671
memset(buffer, 0, 512);
3672
3673
buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
3674
for (i = UI_CURRENT; i < UI_SIZE; i++)
3675
buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
3676
buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
3677
buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
3678
3679
buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
3680
buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
3681
buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
3682
buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
3683
buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
3684
3685
buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
3686
buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
3687
3688
D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
3689
sector = mdev->ldev->md.md_offset;
3690
3691
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
3692
/* this was a try anyways ... */
3693
dev_err(DEV, "meta data update failed!\n");
3694
drbd_chk_io_error(mdev, 1, true);
3695
}
3696
3697
/* Update mdev->ldev->md.la_size_sect,
3698
* since we updated it on metadata. */
3699
mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
3700
3701
mutex_unlock(&mdev->md_io_mutex);
3702
put_ldev(mdev);
3703
}
3704
3705
/**
3706
* drbd_md_read() - Reads in the meta data super block
3707
* @mdev: DRBD device.
3708
* @bdev: Device from which the meta data should be read in.
3709
*
3710
* Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
3711
* something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
3712
*/
3713
int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3714
{
3715
struct meta_data_on_disk *buffer;
3716
int i, rv = NO_ERROR;
3717
3718
if (!get_ldev_if_state(mdev, D_ATTACHING))
3719
return ERR_IO_MD_DISK;
3720
3721
mutex_lock(&mdev->md_io_mutex);
3722
buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
3723
3724
if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
3725
/* NOTE: can't do normal error processing here as this is
3726
called BEFORE disk is attached */
3727
dev_err(DEV, "Error while reading metadata.\n");
3728
rv = ERR_IO_MD_DISK;
3729
goto err;
3730
}
3731
3732
if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
3733
dev_err(DEV, "Error while reading metadata, magic not found.\n");
3734
rv = ERR_MD_INVALID;
3735
goto err;
3736
}
3737
if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
3738
dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
3739
be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
3740
rv = ERR_MD_INVALID;
3741
goto err;
3742
}
3743
if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
3744
dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
3745
be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
3746
rv = ERR_MD_INVALID;
3747
goto err;
3748
}
3749
if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
3750
dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
3751
be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
3752
rv = ERR_MD_INVALID;
3753
goto err;
3754
}
3755
3756
if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
3757
dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
3758
be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
3759
rv = ERR_MD_INVALID;
3760
goto err;
3761
}
3762
3763
bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
3764
for (i = UI_CURRENT; i < UI_SIZE; i++)
3765
bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
3766
bdev->md.flags = be32_to_cpu(buffer->flags);
3767
mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
3768
bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3769
3770
spin_lock_irq(&mdev->req_lock);
3771
if (mdev->state.conn < C_CONNECTED) {
3772
int peer;
3773
peer = be32_to_cpu(buffer->la_peer_max_bio_size);
3774
peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
3775
mdev->peer_max_bio_size = peer;
3776
}
3777
spin_unlock_irq(&mdev->req_lock);
3778
3779
if (mdev->sync_conf.al_extents < 7)
3780
mdev->sync_conf.al_extents = 127;
3781
3782
err:
3783
mutex_unlock(&mdev->md_io_mutex);
3784
put_ldev(mdev);
3785
3786
return rv;
3787
}
3788
3789
/**
3790
* drbd_md_mark_dirty() - Mark meta data super block as dirty
3791
* @mdev: DRBD device.
3792
*
3793
* Call this function if you change anything that should be written to
3794
* the meta-data super block. This function sets MD_DIRTY, and starts a
3795
* timer that ensures that within five seconds you have to call drbd_md_sync().
3796
*/
3797
#ifdef DEBUG
3798
void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
3799
{
3800
if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
3801
mod_timer(&mdev->md_sync_timer, jiffies + HZ);
3802
mdev->last_md_mark_dirty.line = line;
3803
mdev->last_md_mark_dirty.func = func;
3804
}
3805
}
3806
#else
3807
void drbd_md_mark_dirty(struct drbd_conf *mdev)
3808
{
3809
if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
3810
mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
3811
}
3812
#endif
3813
3814
static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
3815
{
3816
int i;
3817
3818
for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
3819
mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
3820
}
3821
3822
void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3823
{
3824
if (idx == UI_CURRENT) {
3825
if (mdev->state.role == R_PRIMARY)
3826
val |= 1;
3827
else
3828
val &= ~((u64)1);
3829
3830
drbd_set_ed_uuid(mdev, val);
3831
}
3832
3833
mdev->ldev->md.uuid[idx] = val;
3834
drbd_md_mark_dirty(mdev);
3835
}
3836
3837
3838
void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3839
{
3840
if (mdev->ldev->md.uuid[idx]) {
3841
drbd_uuid_move_history(mdev);
3842
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
3843
}
3844
_drbd_uuid_set(mdev, idx, val);
3845
}
3846
3847
/**
3848
* drbd_uuid_new_current() - Creates a new current UUID
3849
* @mdev: DRBD device.
3850
*
3851
* Creates a new current UUID, and rotates the old current UUID into
3852
* the bitmap slot. Causes an incremental resync upon next connect.
3853
*/
3854
void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
3855
{
3856
u64 val;
3857
unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
3858
3859
if (bm_uuid)
3860
dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
3861
3862
mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
3863
3864
get_random_bytes(&val, sizeof(u64));
3865
_drbd_uuid_set(mdev, UI_CURRENT, val);
3866
drbd_print_uuids(mdev, "new current UUID");
3867
/* get it to stable storage _now_ */
3868
drbd_md_sync(mdev);
3869
}
3870
3871
void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
3872
{
3873
if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
3874
return;
3875
3876
if (val == 0) {
3877
drbd_uuid_move_history(mdev);
3878
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
3879
mdev->ldev->md.uuid[UI_BITMAP] = 0;
3880
} else {
3881
unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
3882
if (bm_uuid)
3883
dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
3884
3885
mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
3886
}
3887
drbd_md_mark_dirty(mdev);
3888
}
3889
3890
/**
3891
* drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3892
* @mdev: DRBD device.
3893
*
3894
* Sets all bits in the bitmap and writes the whole bitmap to stable storage.
3895
*/
3896
int drbd_bmio_set_n_write(struct drbd_conf *mdev)
3897
{
3898
int rv = -EIO;
3899
3900
if (get_ldev_if_state(mdev, D_ATTACHING)) {
3901
drbd_md_set_flag(mdev, MDF_FULL_SYNC);
3902
drbd_md_sync(mdev);
3903
drbd_bm_set_all(mdev);
3904
3905
rv = drbd_bm_write(mdev);
3906
3907
if (!rv) {
3908
drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
3909
drbd_md_sync(mdev);
3910
}
3911
3912
put_ldev(mdev);
3913
}
3914
3915
return rv;
3916
}
3917
3918
/**
3919
* drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
3920
* @mdev: DRBD device.
3921
*
3922
* Clears all bits in the bitmap and writes the whole bitmap to stable storage.
3923
*/
3924
int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
3925
{
3926
int rv = -EIO;
3927
3928
drbd_resume_al(mdev);
3929
if (get_ldev_if_state(mdev, D_ATTACHING)) {
3930
drbd_bm_clear_all(mdev);
3931
rv = drbd_bm_write(mdev);
3932
put_ldev(mdev);
3933
}
3934
3935
return rv;
3936
}
3937
3938
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3939
{
3940
struct bm_io_work *work = container_of(w, struct bm_io_work, w);
3941
int rv = -EIO;
3942
3943
D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
3944
3945
if (get_ldev(mdev)) {
3946
drbd_bm_lock(mdev, work->why, work->flags);
3947
rv = work->io_fn(mdev);
3948
drbd_bm_unlock(mdev);
3949
put_ldev(mdev);
3950
}
3951
3952
clear_bit(BITMAP_IO, &mdev->flags);
3953
smp_mb__after_clear_bit();
3954
wake_up(&mdev->misc_wait);
3955
3956
if (work->done)
3957
work->done(mdev, rv);
3958
3959
clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
3960
work->why = NULL;
3961
work->flags = 0;
3962
3963
return 1;
3964
}
3965
3966
void drbd_ldev_destroy(struct drbd_conf *mdev)
3967
{
3968
lc_destroy(mdev->resync);
3969
mdev->resync = NULL;
3970
lc_destroy(mdev->act_log);
3971
mdev->act_log = NULL;
3972
__no_warn(local,
3973
drbd_free_bc(mdev->ldev);
3974
mdev->ldev = NULL;);
3975
3976
if (mdev->md_io_tmpp) {
3977
__free_page(mdev->md_io_tmpp);
3978
mdev->md_io_tmpp = NULL;
3979
}
3980
clear_bit(GO_DISKLESS, &mdev->flags);
3981
}
3982
3983
static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3984
{
3985
D_ASSERT(mdev->state.disk == D_FAILED);
3986
/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
3987
* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
3988
* the protected members anymore, though, so once put_ldev reaches zero
3989
* again, it will be safe to free them. */
3990
drbd_force_state(mdev, NS(disk, D_DISKLESS));
3991
return 1;
3992
}
3993
3994
void drbd_go_diskless(struct drbd_conf *mdev)
3995
{
3996
D_ASSERT(mdev->state.disk == D_FAILED);
3997
if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
3998
drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
3999
}
4000
4001
/**
4002
* drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
4003
* @mdev: DRBD device.
4004
* @io_fn: IO callback to be called when bitmap IO is possible
4005
* @done: callback to be called after the bitmap IO was performed
4006
* @why: Descriptive text of the reason for doing the IO
4007
*
4008
* While IO on the bitmap happens we freeze application IO thus we ensure
4009
* that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
4010
* called from worker context. It MUST NOT be used while a previous such
4011
* work is still pending!
4012
*/
4013
void drbd_queue_bitmap_io(struct drbd_conf *mdev,
4014
int (*io_fn)(struct drbd_conf *),
4015
void (*done)(struct drbd_conf *, int),
4016
char *why, enum bm_flag flags)
4017
{
4018
D_ASSERT(current == mdev->worker.task);
4019
4020
D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
4021
D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
4022
D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
4023
if (mdev->bm_io_work.why)
4024
dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
4025
why, mdev->bm_io_work.why);
4026
4027
mdev->bm_io_work.io_fn = io_fn;
4028
mdev->bm_io_work.done = done;
4029
mdev->bm_io_work.why = why;
4030
mdev->bm_io_work.flags = flags;
4031
4032
spin_lock_irq(&mdev->req_lock);
4033
set_bit(BITMAP_IO, &mdev->flags);
4034
if (atomic_read(&mdev->ap_bio_cnt) == 0) {
4035
if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
4036
drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
4037
}
4038
spin_unlock_irq(&mdev->req_lock);
4039
}
4040
4041
/**
4042
* drbd_bitmap_io() - Does an IO operation on the whole bitmap
4043
* @mdev: DRBD device.
4044
* @io_fn: IO callback to be called when bitmap IO is possible
4045
* @why: Descriptive text of the reason for doing the IO
4046
*
4047
* freezes application IO while that the actual IO operations runs. This
4048
* functions MAY NOT be called from worker context.
4049
*/
4050
int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
4051
char *why, enum bm_flag flags)
4052
{
4053
int rv;
4054
4055
D_ASSERT(current != mdev->worker.task);
4056
4057
if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4058
drbd_suspend_io(mdev);
4059
4060
drbd_bm_lock(mdev, why, flags);
4061
rv = io_fn(mdev);
4062
drbd_bm_unlock(mdev);
4063
4064
if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4065
drbd_resume_io(mdev);
4066
4067
return rv;
4068
}
4069
4070
void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4071
{
4072
if ((mdev->ldev->md.flags & flag) != flag) {
4073
drbd_md_mark_dirty(mdev);
4074
mdev->ldev->md.flags |= flag;
4075
}
4076
}
4077
4078
void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4079
{
4080
if ((mdev->ldev->md.flags & flag) != 0) {
4081
drbd_md_mark_dirty(mdev);
4082
mdev->ldev->md.flags &= ~flag;
4083
}
4084
}
4085
int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
4086
{
4087
return (bdev->md.flags & flag) != 0;
4088
}
4089
4090
static void md_sync_timer_fn(unsigned long data)
4091
{
4092
struct drbd_conf *mdev = (struct drbd_conf *) data;
4093
4094
drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
4095
}
4096
4097
static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4098
{
4099
dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
4100
#ifdef DEBUG
4101
dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
4102
mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
4103
#endif
4104
drbd_md_sync(mdev);
4105
return 1;
4106
}
4107
4108
#ifdef CONFIG_DRBD_FAULT_INJECTION
4109
/* Fault insertion support including random number generator shamelessly
4110
* stolen from kernel/rcutorture.c */
4111
struct fault_random_state {
4112
unsigned long state;
4113
unsigned long count;
4114
};
4115
4116
#define FAULT_RANDOM_MULT 39916801 /* prime */
4117
#define FAULT_RANDOM_ADD 479001701 /* prime */
4118
#define FAULT_RANDOM_REFRESH 10000
4119
4120
/*
4121
* Crude but fast random-number generator. Uses a linear congruential
4122
* generator, with occasional help from get_random_bytes().
4123
*/
4124
static unsigned long
4125
_drbd_fault_random(struct fault_random_state *rsp)
4126
{
4127
long refresh;
4128
4129
if (!rsp->count--) {
4130
get_random_bytes(&refresh, sizeof(refresh));
4131
rsp->state += refresh;
4132
rsp->count = FAULT_RANDOM_REFRESH;
4133
}
4134
rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
4135
return swahw32(rsp->state);
4136
}
4137
4138
static char *
4139
_drbd_fault_str(unsigned int type) {
4140
static char *_faults[] = {
4141
[DRBD_FAULT_MD_WR] = "Meta-data write",
4142
[DRBD_FAULT_MD_RD] = "Meta-data read",
4143
[DRBD_FAULT_RS_WR] = "Resync write",
4144
[DRBD_FAULT_RS_RD] = "Resync read",
4145
[DRBD_FAULT_DT_WR] = "Data write",
4146
[DRBD_FAULT_DT_RD] = "Data read",
4147
[DRBD_FAULT_DT_RA] = "Data read ahead",
4148
[DRBD_FAULT_BM_ALLOC] = "BM allocation",
4149
[DRBD_FAULT_AL_EE] = "EE allocation",
4150
[DRBD_FAULT_RECEIVE] = "receive data corruption",
4151
};
4152
4153
return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
4154
}
4155
4156
unsigned int
4157
_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
4158
{
4159
static struct fault_random_state rrs = {0, 0};
4160
4161
unsigned int ret = (
4162
(fault_devs == 0 ||
4163
((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
4164
(((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
4165
4166
if (ret) {
4167
fault_count++;
4168
4169
if (__ratelimit(&drbd_ratelimit_state))
4170
dev_warn(DEV, "***Simulating %s failure\n",
4171
_drbd_fault_str(type));
4172
}
4173
4174
return ret;
4175
}
4176
#endif
4177
4178
const char *drbd_buildtag(void)
4179
{
4180
/* DRBD built from external sources has here a reference to the
4181
git hash of the source code. */
4182
4183
static char buildtag[38] = "\0uilt-in";
4184
4185
if (buildtag[0] == 0) {
4186
#ifdef CONFIG_MODULES
4187
if (THIS_MODULE != NULL)
4188
sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
4189
else
4190
#endif
4191
buildtag[0] = 'b';
4192
}
4193
4194
return buildtag;
4195
}
4196
4197
module_init(drbd_init)
4198
module_exit(drbd_cleanup)
4199
4200
EXPORT_SYMBOL(drbd_conn_str);
4201
EXPORT_SYMBOL(drbd_role_str);
4202
EXPORT_SYMBOL(drbd_disk_str);
4203
EXPORT_SYMBOL(drbd_set_st_err_str);
4204
4205