CoCalc -- blk-merge.c

GitHub Repository: torvalds/linux
Path: blob/master/block/blk-merge.c
²⁶²⁴² views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Functions related to segment and merge handling
4
 */
5
#include <linux/kernel.h>
6
#include <linux/module.h>
7
#include <linux/bio.h>
8
#include <linux/blkdev.h>
9
#include <linux/blk-integrity.h>
10
#include <linux/part_stat.h>
11
#include <linux/blk-cgroup.h>
12

13
#include <trace/events/block.h>
14

15
#include "blk.h"
16
#include "blk-mq-sched.h"
17
#include "blk-rq-qos.h"
18
#include "blk-throttle.h"
19

20
static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
21
{
22
	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
23
}
24

25
static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
26
{
27
	struct bvec_iter iter = bio->bi_iter;
28
	int idx;
29

30
	bio_get_first_bvec(bio, bv);
31
	if (bv->bv_len == bio->bi_iter.bi_size)
32
		return;		/* this bio only has a single bvec */
33

34
	bio_advance_iter(bio, &iter, iter.bi_size);
35

36
	if (!iter.bi_bvec_done)
37
		idx = iter.bi_idx - 1;
38
	else	/* in the middle of bvec */
39
		idx = iter.bi_idx;
40

41
	*bv = bio->bi_io_vec[idx];
42

43
	/*
44
	 * iter.bi_bvec_done records actual length of the last bvec
45
	 * if this bio ends in the middle of one io vector
46
	 */
47
	if (iter.bi_bvec_done)
48
		bv->bv_len = iter.bi_bvec_done;
49
}
50

51
static inline bool bio_will_gap(struct request_queue *q,
52
		struct request *prev_rq, struct bio *prev, struct bio *next)
53
{
54
	struct bio_vec pb, nb;
55

56
	if (!bio_has_data(prev) || !queue_virt_boundary(q))
57
		return false;
58

59
	/*
60
	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
61
	 * is quite difficult to respect the sg gap limit.  We work hard to
62
	 * merge a huge number of small single bios in case of mkfs.
63
	 */
64
	if (prev_rq)
65
		bio_get_first_bvec(prev_rq->bio, &pb);
66
	else
67
		bio_get_first_bvec(prev, &pb);
68
	if (pb.bv_offset & queue_virt_boundary(q))
69
		return true;
70

71
	/*
72
	 * We don't need to worry about the situation that the merged segment
73
	 * ends in unaligned virt boundary:
74
	 *
75
	 * - if 'pb' ends aligned, the merged segment ends aligned
76
	 * - if 'pb' ends unaligned, the next bio must include
77
	 *   one single bvec of 'nb', otherwise the 'nb' can't
78
	 *   merge with 'pb'
79
	 */
80
	bio_get_last_bvec(prev, &pb);
81
	bio_get_first_bvec(next, &nb);
82
	if (biovec_phys_mergeable(q, &pb, &nb))
83
		return false;
84
	return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
85
}
86

87
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
88
{
89
	return bio_will_gap(req->q, req, req->biotail, bio);
90
}
91

92
static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
93
{
94
	return bio_will_gap(req->q, NULL, bio, req->bio);
95
}
96

97
/*
98
 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
99
 * is defined as 'unsigned int', meantime it has to be aligned to with the
100
 * logical block size, which is the minimum accepted unit by hardware.
101
 */
102
static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
103
{
104
	return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
105
}
106

107
static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
108
{
109
	if (unlikely(split_sectors < 0))
110
		goto error;
111

112
	if (split_sectors) {
113
		struct bio *split;
114

115
		split = bio_split(bio, split_sectors, GFP_NOIO,
116
				&bio->bi_bdev->bd_disk->bio_split);
117
		if (IS_ERR(split)) {
118
			split_sectors = PTR_ERR(split);
119
			goto error;
120
		}
121
		split->bi_opf |= REQ_NOMERGE;
122
		blkcg_bio_issue_init(split);
123
		bio_chain(split, bio);
124
		trace_block_split(split, bio->bi_iter.bi_sector);
125
		WARN_ON_ONCE(bio_zone_write_plugging(bio));
126
		submit_bio_noacct(bio);
127
		return split;
128
	}
129

130
	return bio;
131
error:
132
	bio->bi_status = errno_to_blk_status(split_sectors);
133
	bio_endio(bio);
134
	return NULL;
135
}
136

137
struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
138
		unsigned *nsegs)
139
{
140
	unsigned int max_discard_sectors, granularity;
141
	sector_t tmp;
142
	unsigned split_sectors;
143

144
	*nsegs = 1;
145

146
	granularity = max(lim->discard_granularity >> 9, 1U);
147

148
	max_discard_sectors =
149
		min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
150
	max_discard_sectors -= max_discard_sectors % granularity;
151
	if (unlikely(!max_discard_sectors))
152
		return bio;
153

154
	if (bio_sectors(bio) <= max_discard_sectors)
155
		return bio;
156

157
	split_sectors = max_discard_sectors;
158

159
	/*
160
	 * If the next starting sector would be misaligned, stop the discard at
161
	 * the previous aligned sector.
162
	 */
163
	tmp = bio->bi_iter.bi_sector + split_sectors -
164
		((lim->discard_alignment >> 9) % granularity);
165
	tmp = sector_div(tmp, granularity);
166

167
	if (split_sectors > tmp)
168
		split_sectors -= tmp;
169

170
	return bio_submit_split(bio, split_sectors);
171
}
172

173
static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
174
						bool is_atomic)
175
{
176
	/*
177
	 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
178
	 * both non-zero.
179
	 */
180
	if (is_atomic && lim->atomic_write_boundary_sectors)
181
		return lim->atomic_write_boundary_sectors;
182

183
	return lim->chunk_sectors;
184
}
185

186
/*
187
 * Return the maximum number of sectors from the start of a bio that may be
188
 * submitted as a single request to a block device. If enough sectors remain,
189
 * align the end to the physical block size. Otherwise align the end to the
190
 * logical block size. This approach minimizes the number of non-aligned
191
 * requests that are submitted to a block device if the start of a bio is not
192
 * aligned to a physical block boundary.
193
 */
194
static inline unsigned get_max_io_size(struct bio *bio,
195
				       const struct queue_limits *lim)
196
{
197
	unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
198
	unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
199
	bool is_atomic = bio->bi_opf & REQ_ATOMIC;
200
	unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
201
	unsigned max_sectors, start, end;
202

203
	/*
204
	 * We ignore lim->max_sectors for atomic writes because it may less
205
	 * than the actual bio size, which we cannot tolerate.
206
	 */
207
	if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
208
		max_sectors = lim->max_write_zeroes_sectors;
209
	else if (is_atomic)
210
		max_sectors = lim->atomic_write_max_sectors;
211
	else
212
		max_sectors = lim->max_sectors;
213

214
	if (boundary_sectors) {
215
		max_sectors = min(max_sectors,
216
			blk_boundary_sectors_left(bio->bi_iter.bi_sector,
217
					      boundary_sectors));
218
	}
219

220
	start = bio->bi_iter.bi_sector & (pbs - 1);
221
	end = (start + max_sectors) & ~(pbs - 1);
222
	if (end > start)
223
		return end - start;
224
	return max_sectors & ~(lbs - 1);
225
}
226

227
/**
228
 * bvec_split_segs - verify whether or not a bvec should be split in the middle
229
 * @lim:      [in] queue limits to split based on
230
 * @bv:       [in] bvec to examine
231
 * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
232
 *            by the number of segments from @bv that may be appended to that
233
 *            bio without exceeding @max_segs
234
 * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
235
 *            by the number of bytes from @bv that may be appended to that
236
 *            bio without exceeding @max_bytes
237
 * @max_segs: [in] upper bound for *@nsegs
238
 * @max_bytes: [in] upper bound for *@bytes
239
 *
240
 * When splitting a bio, it can happen that a bvec is encountered that is too
241
 * big to fit in a single segment and hence that it has to be split in the
242
 * middle. This function verifies whether or not that should happen. The value
243
 * %true is returned if and only if appending the entire @bv to a bio with
244
 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
245
 * the block driver.
246
 */
247
static bool bvec_split_segs(const struct queue_limits *lim,
248
		const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
249
		unsigned max_segs, unsigned max_bytes)
250
{
251
	unsigned max_len = max_bytes - *bytes;
252
	unsigned len = min(bv->bv_len, max_len);
253
	unsigned total_len = 0;
254
	unsigned seg_size = 0;
255

256
	while (len && *nsegs < max_segs) {
257
		seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
258

259
		(*nsegs)++;
260
		total_len += seg_size;
261
		len -= seg_size;
262

263
		if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
264
			break;
265
	}
266

267
	*bytes += total_len;
268

269
	/* tell the caller to split the bvec if it is too big to fit */
270
	return len > 0 || bv->bv_len > max_len;
271
}
272

273
static unsigned int bio_split_alignment(struct bio *bio,
274
		const struct queue_limits *lim)
275
{
276
	if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
277
		return lim->zone_write_granularity;
278
	return lim->logical_block_size;
279
}
280

281
/**
282
 * bio_split_rw_at - check if and where to split a read/write bio
283
 * @bio:  [in] bio to be split
284
 * @lim:  [in] queue limits to split based on
285
 * @segs: [out] number of segments in the bio with the first half of the sectors
286
 * @max_bytes: [in] maximum number of bytes per bio
287
 *
288
 * Find out if @bio needs to be split to fit the queue limits in @lim and a
289
 * maximum size of @max_bytes.  Returns a negative error number if @bio can't be
290
 * split, 0 if the bio doesn't have to be split, or a positive sector offset if
291
 * @bio needs to be split.
292
 */
293
int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
294
		unsigned *segs, unsigned max_bytes)
295
{
296
	struct bio_vec bv, bvprv, *bvprvp = NULL;
297
	struct bvec_iter iter;
298
	unsigned nsegs = 0, bytes = 0;
299

300
	bio_for_each_bvec(bv, bio, iter) {
301
		/*
302
		 * If the queue doesn't support SG gaps and adding this
303
		 * offset would create a gap, disallow it.
304
		 */
305
		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
306
			goto split;
307

308
		if (nsegs < lim->max_segments &&
309
		    bytes + bv.bv_len <= max_bytes &&
310
		    bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
311
			nsegs++;
312
			bytes += bv.bv_len;
313
		} else {
314
			if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
315
					lim->max_segments, max_bytes))
316
				goto split;
317
		}
318

319
		bvprv = bv;
320
		bvprvp = &bvprv;
321
	}
322

323
	*segs = nsegs;
324
	return 0;
325
split:
326
	if (bio->bi_opf & REQ_ATOMIC)
327
		return -EINVAL;
328

329
	/*
330
	 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
331
	 * with EAGAIN if splitting is required and return an error pointer.
332
	 */
333
	if (bio->bi_opf & REQ_NOWAIT)
334
		return -EAGAIN;
335

336
	*segs = nsegs;
337

338
	/*
339
	 * Individual bvecs might not be logical block aligned. Round down the
340
	 * split size so that each bio is properly block size aligned, even if
341
	 * we do not use the full hardware limits.
342
	 */
343
	bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
344

345
	/*
346
	 * Bio splitting may cause subtle trouble such as hang when doing sync
347
	 * iopoll in direct IO routine. Given performance gain of iopoll for
348
	 * big IO can be trival, disable iopoll when split needed.
349
	 */
350
	bio_clear_polled(bio);
351
	return bytes >> SECTOR_SHIFT;
352
}
353
EXPORT_SYMBOL_GPL(bio_split_rw_at);
354

355
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
356
		unsigned *nr_segs)
357
{
358
	return bio_submit_split(bio,
359
		bio_split_rw_at(bio, lim, nr_segs,
360
			get_max_io_size(bio, lim) << SECTOR_SHIFT));
361
}
362

363
/*
364
 * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
365
 *
366
 * But we want the nr_segs calculation provided by bio_split_rw_at, and having
367
 * a good sanity check that the submitter built the bio correctly is nice to
368
 * have as well.
369
 */
370
struct bio *bio_split_zone_append(struct bio *bio,
371
		const struct queue_limits *lim, unsigned *nr_segs)
372
{
373
	int split_sectors;
374

375
	split_sectors = bio_split_rw_at(bio, lim, nr_segs,
376
			lim->max_zone_append_sectors << SECTOR_SHIFT);
377
	if (WARN_ON_ONCE(split_sectors > 0))
378
		split_sectors = -EINVAL;
379
	return bio_submit_split(bio, split_sectors);
380
}
381

382
struct bio *bio_split_write_zeroes(struct bio *bio,
383
		const struct queue_limits *lim, unsigned *nsegs)
384
{
385
	unsigned int max_sectors = get_max_io_size(bio, lim);
386

387
	*nsegs = 0;
388

389
	/*
390
	 * An unset limit should normally not happen, as bio submission is keyed
391
	 * off having a non-zero limit.  But SCSI can clear the limit in the
392
	 * I/O completion handler, and we can race and see this.  Splitting to a
393
	 * zero limit obviously doesn't make sense, so band-aid it here.
394
	 */
395
	if (!max_sectors)
396
		return bio;
397
	if (bio_sectors(bio) <= max_sectors)
398
		return bio;
399
	return bio_submit_split(bio, max_sectors);
400
}
401

402
/**
403
 * bio_split_to_limits - split a bio to fit the queue limits
404
 * @bio:     bio to be split
405
 *
406
 * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
407
 * if so split off a bio fitting the limits from the beginning of @bio and
408
 * return it.  @bio is shortened to the remainder and re-submitted.
409
 *
410
 * The split bio is allocated from @q->bio_split, which is provided by the
411
 * block layer.
412
 */
413
struct bio *bio_split_to_limits(struct bio *bio)
414
{
415
	unsigned int nr_segs;
416

417
	return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
418
}
419
EXPORT_SYMBOL(bio_split_to_limits);
420

421
unsigned int blk_recalc_rq_segments(struct request *rq)
422
{
423
	unsigned int nr_phys_segs = 0;
424
	unsigned int bytes = 0;
425
	struct req_iterator iter;
426
	struct bio_vec bv;
427

428
	if (!rq->bio)
429
		return 0;
430

431
	switch (bio_op(rq->bio)) {
432
	case REQ_OP_DISCARD:
433
	case REQ_OP_SECURE_ERASE:
434
		if (queue_max_discard_segments(rq->q) > 1) {
435
			struct bio *bio = rq->bio;
436

437
			for_each_bio(bio)
438
				nr_phys_segs++;
439
			return nr_phys_segs;
440
		}
441
		return 1;
442
	case REQ_OP_WRITE_ZEROES:
443
		return 0;
444
	default:
445
		break;
446
	}
447

448
	rq_for_each_bvec(bv, rq, iter)
449
		bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
450
				UINT_MAX, UINT_MAX);
451
	return nr_phys_segs;
452
}
453

454
static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
455
						  sector_t offset)
456
{
457
	struct request_queue *q = rq->q;
458
	struct queue_limits *lim = &q->limits;
459
	unsigned int max_sectors, boundary_sectors;
460
	bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
461

462
	if (blk_rq_is_passthrough(rq))
463
		return q->limits.max_hw_sectors;
464

465
	boundary_sectors = blk_boundary_sectors(lim, is_atomic);
466
	max_sectors = blk_queue_get_max_sectors(rq);
467

468
	if (!boundary_sectors ||
469
	    req_op(rq) == REQ_OP_DISCARD ||
470
	    req_op(rq) == REQ_OP_SECURE_ERASE)
471
		return max_sectors;
472
	return min(max_sectors,
473
		   blk_boundary_sectors_left(offset, boundary_sectors));
474
}
475

476
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
477
		unsigned int nr_phys_segs)
478
{
479
	if (!blk_cgroup_mergeable(req, bio))
480
		goto no_merge;
481

482
	if (blk_integrity_merge_bio(req->q, req, bio) == false)
483
		goto no_merge;
484

485
	/* discard request merge won't add new segment */
486
	if (req_op(req) == REQ_OP_DISCARD)
487
		return 1;
488

489
	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
490
		goto no_merge;
491

492
	/*
493
	 * This will form the start of a new hw segment.  Bump both
494
	 * counters.
495
	 */
496
	req->nr_phys_segments += nr_phys_segs;
497
	if (bio_integrity(bio))
498
		req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
499
									bio);
500
	return 1;
501

502
no_merge:
503
	req_set_nomerge(req->q, req);
504
	return 0;
505
}
506

507
int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
508
{
509
	if (req_gap_back_merge(req, bio))
510
		return 0;
511
	if (blk_integrity_rq(req) &&
512
	    integrity_req_gap_back_merge(req, bio))
513
		return 0;
514
	if (!bio_crypt_ctx_back_mergeable(req, bio))
515
		return 0;
516
	if (blk_rq_sectors(req) + bio_sectors(bio) >
517
	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
518
		req_set_nomerge(req->q, req);
519
		return 0;
520
	}
521

522
	return ll_new_hw_segment(req, bio, nr_segs);
523
}
524

525
static int ll_front_merge_fn(struct request *req, struct bio *bio,
526
		unsigned int nr_segs)
527
{
528
	if (req_gap_front_merge(req, bio))
529
		return 0;
530
	if (blk_integrity_rq(req) &&
531
	    integrity_req_gap_front_merge(req, bio))
532
		return 0;
533
	if (!bio_crypt_ctx_front_mergeable(req, bio))
534
		return 0;
535
	if (blk_rq_sectors(req) + bio_sectors(bio) >
536
	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
537
		req_set_nomerge(req->q, req);
538
		return 0;
539
	}
540

541
	return ll_new_hw_segment(req, bio, nr_segs);
542
}
543

544
static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
545
		struct request *next)
546
{
547
	unsigned short segments = blk_rq_nr_discard_segments(req);
548

549
	if (segments >= queue_max_discard_segments(q))
550
		goto no_merge;
551
	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
552
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
553
		goto no_merge;
554

555
	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
556
	return true;
557
no_merge:
558
	req_set_nomerge(q, req);
559
	return false;
560
}
561

562
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
563
				struct request *next)
564
{
565
	int total_phys_segments;
566

567
	if (req_gap_back_merge(req, next->bio))
568
		return 0;
569

570
	/*
571
	 * Will it become too large?
572
	 */
573
	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
574
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
575
		return 0;
576

577
	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
578
	if (total_phys_segments > blk_rq_get_max_segments(req))
579
		return 0;
580

581
	if (!blk_cgroup_mergeable(req, next->bio))
582
		return 0;
583

584
	if (blk_integrity_merge_rq(q, req, next) == false)
585
		return 0;
586

587
	if (!bio_crypt_ctx_merge_rq(req, next))
588
		return 0;
589

590
	/* Merge is OK... */
591
	req->nr_phys_segments = total_phys_segments;
592
	req->nr_integrity_segments += next->nr_integrity_segments;
593
	return 1;
594
}
595

596
/**
597
 * blk_rq_set_mixed_merge - mark a request as mixed merge
598
 * @rq: request to mark as mixed merge
599
 *
600
 * Description:
601
 *     @rq is about to be mixed merged.  Make sure the attributes
602
 *     which can be mixed are set in each bio and mark @rq as mixed
603
 *     merged.
604
 */
605
static void blk_rq_set_mixed_merge(struct request *rq)
606
{
607
	blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
608
	struct bio *bio;
609

610
	if (rq->rq_flags & RQF_MIXED_MERGE)
611
		return;
612

613
	/*
614
	 * @rq will no longer represent mixable attributes for all the
615
	 * contained bios.  It will just track those of the first one.
616
	 * Distributes the attributs to each bio.
617
	 */
618
	for (bio = rq->bio; bio; bio = bio->bi_next) {
619
		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
620
			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
621
		bio->bi_opf |= ff;
622
	}
623
	rq->rq_flags |= RQF_MIXED_MERGE;
624
}
625

626
static inline blk_opf_t bio_failfast(const struct bio *bio)
627
{
628
	if (bio->bi_opf & REQ_RAHEAD)
629
		return REQ_FAILFAST_MASK;
630

631
	return bio->bi_opf & REQ_FAILFAST_MASK;
632
}
633

634
/*
635
 * After we are marked as MIXED_MERGE, any new RA bio has to be updated
636
 * as failfast, and request's failfast has to be updated in case of
637
 * front merge.
638
 */
639
static inline void blk_update_mixed_merge(struct request *req,
640
		struct bio *bio, bool front_merge)
641
{
642
	if (req->rq_flags & RQF_MIXED_MERGE) {
643
		if (bio->bi_opf & REQ_RAHEAD)
644
			bio->bi_opf |= REQ_FAILFAST_MASK;
645

646
		if (front_merge) {
647
			req->cmd_flags &= ~REQ_FAILFAST_MASK;
648
			req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
649
		}
650
	}
651
}
652

653
static void blk_account_io_merge_request(struct request *req)
654
{
655
	if (req->rq_flags & RQF_IO_STAT) {
656
		part_stat_lock();
657
		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
658
		part_stat_local_dec(req->part,
659
				    in_flight[op_is_write(req_op(req))]);
660
		part_stat_unlock();
661
	}
662
}
663

664
static enum elv_merge blk_try_req_merge(struct request *req,
665
					struct request *next)
666
{
667
	if (blk_discard_mergable(req))
668
		return ELEVATOR_DISCARD_MERGE;
669
	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
670
		return ELEVATOR_BACK_MERGE;
671

672
	return ELEVATOR_NO_MERGE;
673
}
674

675
static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
676
					      struct bio *bio)
677
{
678
	return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
679
}
680

681
static bool blk_atomic_write_mergeable_rqs(struct request *rq,
682
					   struct request *next)
683
{
684
	return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
685
}
686

687
/*
688
 * For non-mq, this has to be called with the request spinlock acquired.
689
 * For mq with scheduling, the appropriate queue wide lock should be held.
690
 */
691
static struct request *attempt_merge(struct request_queue *q,
692
				     struct request *req, struct request *next)
693
{
694
	if (!rq_mergeable(req) || !rq_mergeable(next))
695
		return NULL;
696

697
	if (req_op(req) != req_op(next))
698
		return NULL;
699

700
	if (req->bio->bi_write_hint != next->bio->bi_write_hint)
701
		return NULL;
702
	if (req->bio->bi_write_stream != next->bio->bi_write_stream)
703
		return NULL;
704
	if (req->bio->bi_ioprio != next->bio->bi_ioprio)
705
		return NULL;
706
	if (!blk_atomic_write_mergeable_rqs(req, next))
707
		return NULL;
708

709
	/*
710
	 * If we are allowed to merge, then append bio list
711
	 * from next to rq and release next. merge_requests_fn
712
	 * will have updated segment counts, update sector
713
	 * counts here. Handle DISCARDs separately, as they
714
	 * have separate settings.
715
	 */
716

717
	switch (blk_try_req_merge(req, next)) {
718
	case ELEVATOR_DISCARD_MERGE:
719
		if (!req_attempt_discard_merge(q, req, next))
720
			return NULL;
721
		break;
722
	case ELEVATOR_BACK_MERGE:
723
		if (!ll_merge_requests_fn(q, req, next))
724
			return NULL;
725
		break;
726
	default:
727
		return NULL;
728
	}
729

730
	/*
731
	 * If failfast settings disagree or any of the two is already
732
	 * a mixed merge, mark both as mixed before proceeding.  This
733
	 * makes sure that all involved bios have mixable attributes
734
	 * set properly.
735
	 */
736
	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
737
	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
738
	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
739
		blk_rq_set_mixed_merge(req);
740
		blk_rq_set_mixed_merge(next);
741
	}
742

743
	/*
744
	 * At this point we have either done a back merge or front merge. We
745
	 * need the smaller start_time_ns of the merged requests to be the
746
	 * current request for accounting purposes.
747
	 */
748
	if (next->start_time_ns < req->start_time_ns)
749
		req->start_time_ns = next->start_time_ns;
750

751
	req->biotail->bi_next = next->bio;
752
	req->biotail = next->biotail;
753

754
	req->__data_len += blk_rq_bytes(next);
755

756
	if (!blk_discard_mergable(req))
757
		elv_merge_requests(q, req, next);
758

759
	blk_crypto_rq_put_keyslot(next);
760

761
	/*
762
	 * 'next' is going away, so update stats accordingly
763
	 */
764
	blk_account_io_merge_request(next);
765

766
	trace_block_rq_merge(next);
767

768
	/*
769
	 * ownership of bio passed from next to req, return 'next' for
770
	 * the caller to free
771
	 */
772
	next->bio = NULL;
773
	return next;
774
}
775

776
static struct request *attempt_back_merge(struct request_queue *q,
777
		struct request *rq)
778
{
779
	struct request *next = elv_latter_request(q, rq);
780

781
	if (next)
782
		return attempt_merge(q, rq, next);
783

784
	return NULL;
785
}
786

787
static struct request *attempt_front_merge(struct request_queue *q,
788
		struct request *rq)
789
{
790
	struct request *prev = elv_former_request(q, rq);
791

792
	if (prev)
793
		return attempt_merge(q, prev, rq);
794

795
	return NULL;
796
}
797

798
/*
799
 * Try to merge 'next' into 'rq'. Return true if the merge happened, false
800
 * otherwise. The caller is responsible for freeing 'next' if the merge
801
 * happened.
802
 */
803
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
804
			   struct request *next)
805
{
806
	return attempt_merge(q, rq, next);
807
}
808

809
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
810
{
811
	if (!rq_mergeable(rq) || !bio_mergeable(bio))
812
		return false;
813

814
	if (req_op(rq) != bio_op(bio))
815
		return false;
816

817
	if (!blk_cgroup_mergeable(rq, bio))
818
		return false;
819
	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
820
		return false;
821
	if (!bio_crypt_rq_ctx_compatible(rq, bio))
822
		return false;
823
	if (rq->bio->bi_write_hint != bio->bi_write_hint)
824
		return false;
825
	if (rq->bio->bi_write_stream != bio->bi_write_stream)
826
		return false;
827
	if (rq->bio->bi_ioprio != bio->bi_ioprio)
828
		return false;
829
	if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
830
		return false;
831

832
	return true;
833
}
834

835
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
836
{
837
	if (blk_discard_mergable(rq))
838
		return ELEVATOR_DISCARD_MERGE;
839
	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
840
		return ELEVATOR_BACK_MERGE;
841
	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
842
		return ELEVATOR_FRONT_MERGE;
843
	return ELEVATOR_NO_MERGE;
844
}
845

846
static void blk_account_io_merge_bio(struct request *req)
847
{
848
	if (req->rq_flags & RQF_IO_STAT) {
849
		part_stat_lock();
850
		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
851
		part_stat_unlock();
852
	}
853
}
854

855
enum bio_merge_status bio_attempt_back_merge(struct request *req,
856
		struct bio *bio, unsigned int nr_segs)
857
{
858
	const blk_opf_t ff = bio_failfast(bio);
859

860
	if (!ll_back_merge_fn(req, bio, nr_segs))
861
		return BIO_MERGE_FAILED;
862

863
	trace_block_bio_backmerge(bio);
864
	rq_qos_merge(req->q, req, bio);
865

866
	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
867
		blk_rq_set_mixed_merge(req);
868

869
	blk_update_mixed_merge(req, bio, false);
870

871
	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
872
		blk_zone_write_plug_bio_merged(bio);
873

874
	req->biotail->bi_next = bio;
875
	req->biotail = bio;
876
	req->__data_len += bio->bi_iter.bi_size;
877

878
	bio_crypt_free_ctx(bio);
879

880
	blk_account_io_merge_bio(req);
881
	return BIO_MERGE_OK;
882
}
883

884
static enum bio_merge_status bio_attempt_front_merge(struct request *req,
885
		struct bio *bio, unsigned int nr_segs)
886
{
887
	const blk_opf_t ff = bio_failfast(bio);
888

889
	/*
890
	 * A front merge for writes to sequential zones of a zoned block device
891
	 * can happen only if the user submitted writes out of order. Do not
892
	 * merge such write to let it fail.
893
	 */
894
	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
895
		return BIO_MERGE_FAILED;
896

897
	if (!ll_front_merge_fn(req, bio, nr_segs))
898
		return BIO_MERGE_FAILED;
899

900
	trace_block_bio_frontmerge(bio);
901
	rq_qos_merge(req->q, req, bio);
902

903
	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
904
		blk_rq_set_mixed_merge(req);
905

906
	blk_update_mixed_merge(req, bio, true);
907

908
	bio->bi_next = req->bio;
909
	req->bio = bio;
910

911
	req->__sector = bio->bi_iter.bi_sector;
912
	req->__data_len += bio->bi_iter.bi_size;
913

914
	bio_crypt_do_front_merge(req, bio);
915

916
	blk_account_io_merge_bio(req);
917
	return BIO_MERGE_OK;
918
}
919

920
static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
921
		struct request *req, struct bio *bio)
922
{
923
	unsigned short segments = blk_rq_nr_discard_segments(req);
924

925
	if (segments >= queue_max_discard_segments(q))
926
		goto no_merge;
927
	if (blk_rq_sectors(req) + bio_sectors(bio) >
928
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
929
		goto no_merge;
930

931
	rq_qos_merge(q, req, bio);
932

933
	req->biotail->bi_next = bio;
934
	req->biotail = bio;
935
	req->__data_len += bio->bi_iter.bi_size;
936
	req->nr_phys_segments = segments + 1;
937

938
	blk_account_io_merge_bio(req);
939
	return BIO_MERGE_OK;
940
no_merge:
941
	req_set_nomerge(q, req);
942
	return BIO_MERGE_FAILED;
943
}
944

945
static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
946
						   struct request *rq,
947
						   struct bio *bio,
948
						   unsigned int nr_segs,
949
						   bool sched_allow_merge)
950
{
951
	if (!blk_rq_merge_ok(rq, bio))
952
		return BIO_MERGE_NONE;
953

954
	switch (blk_try_merge(rq, bio)) {
955
	case ELEVATOR_BACK_MERGE:
956
		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
957
			return bio_attempt_back_merge(rq, bio, nr_segs);
958
		break;
959
	case ELEVATOR_FRONT_MERGE:
960
		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
961
			return bio_attempt_front_merge(rq, bio, nr_segs);
962
		break;
963
	case ELEVATOR_DISCARD_MERGE:
964
		return bio_attempt_discard_merge(q, rq, bio);
965
	default:
966
		return BIO_MERGE_NONE;
967
	}
968

969
	return BIO_MERGE_FAILED;
970
}
971

972
/**
973
 * blk_attempt_plug_merge - try to merge with %current's plugged list
974
 * @q: request_queue new bio is being queued at
975
 * @bio: new bio being queued
976
 * @nr_segs: number of segments in @bio
977
 * from the passed in @q already in the plug list
978
 *
979
 * Determine whether @bio being queued on @q can be merged with the previous
980
 * request on %current's plugged list.  Returns %true if merge was successful,
981
 * otherwise %false.
982
 *
983
 * Plugging coalesces IOs from the same issuer for the same purpose without
984
 * going through @q->queue_lock.  As such it's more of an issuing mechanism
985
 * than scheduling, and the request, while may have elvpriv data, is not
986
 * added on the elevator at this point.  In addition, we don't have
987
 * reliable access to the elevator outside queue lock.  Only check basic
988
 * merging parameters without querying the elevator.
989
 *
990
 * Caller must ensure !blk_queue_nomerges(q) beforehand.
991
 */
992
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
993
		unsigned int nr_segs)
994
{
995
	struct blk_plug *plug = current->plug;
996
	struct request *rq;
997

998
	if (!plug || rq_list_empty(&plug->mq_list))
999
		return false;
1000

1001
	rq = plug->mq_list.tail;
1002
	if (rq->q == q)
1003
		return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1004
			BIO_MERGE_OK;
1005
	else if (!plug->multiple_queues)
1006
		return false;
1007

1008
	rq_list_for_each(&plug->mq_list, rq) {
1009
		if (rq->q != q)
1010
			continue;
1011
		if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1012
		    BIO_MERGE_OK)
1013
			return true;
1014
		break;
1015
	}
1016
	return false;
1017
}
1018

1019
/*
1020
 * Iterate list of requests and see if we can merge this bio with any
1021
 * of them.
1022
 */
1023
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1024
			struct bio *bio, unsigned int nr_segs)
1025
{
1026
	struct request *rq;
1027
	int checked = 8;
1028

1029
	list_for_each_entry_reverse(rq, list, queuelist) {
1030
		if (!checked--)
1031
			break;
1032

1033
		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1034
		case BIO_MERGE_NONE:
1035
			continue;
1036
		case BIO_MERGE_OK:
1037
			return true;
1038
		case BIO_MERGE_FAILED:
1039
			return false;
1040
		}
1041

1042
	}
1043

1044
	return false;
1045
}
1046
EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1047

1048
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1049
		unsigned int nr_segs, struct request **merged_request)
1050
{
1051
	struct request *rq;
1052

1053
	switch (elv_merge(q, &rq, bio)) {
1054
	case ELEVATOR_BACK_MERGE:
1055
		if (!blk_mq_sched_allow_merge(q, rq, bio))
1056
			return false;
1057
		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1058
			return false;
1059
		*merged_request = attempt_back_merge(q, rq);
1060
		if (!*merged_request)
1061
			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1062
		return true;
1063
	case ELEVATOR_FRONT_MERGE:
1064
		if (!blk_mq_sched_allow_merge(q, rq, bio))
1065
			return false;
1066
		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1067
			return false;
1068
		*merged_request = attempt_front_merge(q, rq);
1069
		if (!*merged_request)
1070
			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1071
		return true;
1072
	case ELEVATOR_DISCARD_MERGE:
1073
		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1074
	default:
1075
		return false;
1076
	}
1077
}
1078
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1079

1080
Product

Resources

Company