Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/block/blk-mq-dma.c
50480 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2025 Christoph Hellwig
4
*/
5
#include <linux/blk-integrity.h>
6
#include <linux/blk-mq-dma.h>
7
#include "blk.h"
8
9
static bool __blk_map_iter_next(struct blk_map_iter *iter)
10
{
11
if (iter->iter.bi_size)
12
return true;
13
if (!iter->bio || !iter->bio->bi_next)
14
return false;
15
16
iter->bio = iter->bio->bi_next;
17
if (iter->is_integrity) {
18
iter->iter = bio_integrity(iter->bio)->bip_iter;
19
iter->bvecs = bio_integrity(iter->bio)->bip_vec;
20
} else {
21
iter->iter = iter->bio->bi_iter;
22
iter->bvecs = iter->bio->bi_io_vec;
23
}
24
return true;
25
}
26
27
static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter,
28
struct phys_vec *vec)
29
{
30
unsigned int max_size;
31
struct bio_vec bv;
32
33
if (!iter->iter.bi_size)
34
return false;
35
36
bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter);
37
vec->paddr = bvec_phys(&bv);
38
max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
39
bv.bv_len = min(bv.bv_len, max_size);
40
bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len);
41
42
/*
43
* If we are entirely done with this bi_io_vec entry, check if the next
44
* one could be merged into it. This typically happens when moving to
45
* the next bio, but some callers also don't pack bvecs tight.
46
*/
47
while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
48
struct bio_vec next;
49
50
if (!__blk_map_iter_next(iter))
51
break;
52
53
next = mp_bvec_iter_bvec(iter->bvecs, iter->iter);
54
if (bv.bv_len + next.bv_len > max_size ||
55
!biovec_phys_mergeable(req->q, &bv, &next))
56
break;
57
58
bv.bv_len += next.bv_len;
59
bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len);
60
}
61
62
vec->len = bv.bv_len;
63
return true;
64
}
65
66
/*
67
* The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page
68
* size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so
69
* we need to ensure our segments are aligned to this as well.
70
*
71
* Note that there is no point in using the slightly more complicated IOVA based
72
* path for single segment mappings.
73
*/
74
static inline bool blk_can_dma_map_iova(struct request *req,
75
struct device *dma_dev)
76
{
77
return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev));
78
}
79
80
static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
81
{
82
iter->addr = pci_p2pdma_bus_addr_map(iter->p2pdma.mem, vec->paddr);
83
iter->len = vec->len;
84
return true;
85
}
86
87
static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
88
struct blk_dma_iter *iter, struct phys_vec *vec)
89
{
90
unsigned int attrs = 0;
91
92
if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
93
attrs |= DMA_ATTR_MMIO;
94
95
iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len,
96
rq_dma_dir(req), attrs);
97
if (dma_mapping_error(dma_dev, iter->addr)) {
98
iter->status = BLK_STS_RESOURCE;
99
return false;
100
}
101
iter->len = vec->len;
102
return true;
103
}
104
105
static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
106
struct dma_iova_state *state, struct blk_dma_iter *iter,
107
struct phys_vec *vec)
108
{
109
enum dma_data_direction dir = rq_dma_dir(req);
110
unsigned int attrs = 0;
111
size_t mapped = 0;
112
int error;
113
114
iter->addr = state->addr;
115
iter->len = dma_iova_size(state);
116
117
if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
118
attrs |= DMA_ATTR_MMIO;
119
120
do {
121
error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
122
vec->len, dir, attrs);
123
if (error)
124
break;
125
mapped += vec->len;
126
} while (blk_map_iter_next(req, &iter->iter, vec));
127
128
error = dma_iova_sync(dma_dev, state, 0, mapped);
129
if (error) {
130
iter->status = errno_to_blk_status(error);
131
return false;
132
}
133
134
return true;
135
}
136
137
static inline void blk_rq_map_iter_init(struct request *rq,
138
struct blk_map_iter *iter)
139
{
140
struct bio *bio = rq->bio;
141
142
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
143
*iter = (struct blk_map_iter) {
144
.bvecs = &rq->special_vec,
145
.iter = {
146
.bi_size = rq->special_vec.bv_len,
147
}
148
};
149
} else if (bio) {
150
*iter = (struct blk_map_iter) {
151
.bio = bio,
152
.bvecs = bio->bi_io_vec,
153
.iter = bio->bi_iter,
154
};
155
} else {
156
/* the internal flush request may not have bio attached */
157
*iter = (struct blk_map_iter) {};
158
}
159
}
160
161
static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev,
162
struct dma_iova_state *state, struct blk_dma_iter *iter,
163
unsigned int total_len)
164
{
165
struct phys_vec vec;
166
167
memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
168
iter->status = BLK_STS_OK;
169
iter->p2pdma.map = PCI_P2PDMA_MAP_NONE;
170
171
/*
172
* Grab the first segment ASAP because we'll need it to check for P2P
173
* transfers.
174
*/
175
if (!blk_map_iter_next(req, &iter->iter, &vec))
176
return false;
177
178
switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
179
phys_to_page(vec.paddr))) {
180
case PCI_P2PDMA_MAP_BUS_ADDR:
181
return blk_dma_map_bus(iter, &vec);
182
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
183
/*
184
* P2P transfers through the host bridge are treated the
185
* same as non-P2P transfers below and during unmap.
186
*/
187
case PCI_P2PDMA_MAP_NONE:
188
break;
189
default:
190
iter->status = BLK_STS_INVAL;
191
return false;
192
}
193
194
if (blk_can_dma_map_iova(req, dma_dev) &&
195
dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len))
196
return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec);
197
memset(state, 0, sizeof(*state));
198
return blk_dma_map_direct(req, dma_dev, iter, &vec);
199
}
200
201
/**
202
* blk_rq_dma_map_iter_start - map the first DMA segment for a request
203
* @req: request to map
204
* @dma_dev: device to map to
205
* @state: DMA IOVA state
206
* @iter: block layer DMA iterator
207
*
208
* Start DMA mapping @req to @dma_dev. @state and @iter are provided by the
209
* caller and don't need to be initialized. @state needs to be stored for use
210
* at unmap time, @iter is only needed at map time.
211
*
212
* Returns %false if there is no segment to map, including due to an error, or
213
* %true ft it did map a segment.
214
*
215
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
216
* the length in @iter.len. If no segment was mapped the status code is
217
* returned in @iter.status.
218
*
219
* The caller can call blk_rq_dma_map_coalesce() to check if further segments
220
* need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
221
* to try to map the following segments.
222
*/
223
bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
224
struct dma_iova_state *state, struct blk_dma_iter *iter)
225
{
226
blk_rq_map_iter_init(req, &iter->iter);
227
return blk_dma_map_iter_start(req, dma_dev, state, iter,
228
blk_rq_payload_bytes(req));
229
}
230
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start);
231
232
/**
233
* blk_rq_dma_map_iter_next - map the next DMA segment for a request
234
* @req: request to map
235
* @dma_dev: device to map to
236
* @iter: block layer DMA iterator
237
*
238
* Iterate to the next mapping after a previous call to
239
* blk_rq_dma_map_iter_start(). See there for a detailed description of the
240
* arguments.
241
*
242
* Returns %false if there is no segment to map, including due to an error, or
243
* %true ft it did map a segment.
244
*
245
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
246
* the length in @iter.len. If no segment was mapped the status code is
247
* returned in @iter.status.
248
*/
249
bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev,
250
struct blk_dma_iter *iter)
251
{
252
struct phys_vec vec;
253
254
if (!blk_map_iter_next(req, &iter->iter, &vec))
255
return false;
256
257
if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
258
return blk_dma_map_bus(iter, &vec);
259
return blk_dma_map_direct(req, dma_dev, iter, &vec);
260
}
261
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next);
262
263
static inline struct scatterlist *
264
blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist)
265
{
266
if (!*sg)
267
return sglist;
268
269
/*
270
* If the driver previously mapped a shorter list, we could see a
271
* termination bit prematurely unless it fully inits the sg table
272
* on each mapping. We KNOW that there must be more entries here
273
* or the driver would be buggy, so force clear the termination bit
274
* to avoid doing a full sg_init_table() in drivers for each command.
275
*/
276
sg_unmark_end(*sg);
277
return sg_next(*sg);
278
}
279
280
/*
281
* Map a request to scatterlist, return number of sg entries setup. Caller
282
* must make sure sg can hold rq->nr_phys_segments entries.
283
*/
284
int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
285
struct scatterlist **last_sg)
286
{
287
struct blk_map_iter iter;
288
struct phys_vec vec;
289
int nsegs = 0;
290
291
blk_rq_map_iter_init(rq, &iter);
292
while (blk_map_iter_next(rq, &iter, &vec)) {
293
*last_sg = blk_next_sg(last_sg, sglist);
294
295
WARN_ON_ONCE(overflows_type(vec.len, unsigned int));
296
sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
297
offset_in_page(vec.paddr));
298
nsegs++;
299
}
300
301
if (*last_sg)
302
sg_mark_end(*last_sg);
303
304
/*
305
* Something must have been wrong if the figured number of
306
* segment is bigger than number of req's physical segments
307
*/
308
WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
309
310
return nsegs;
311
}
312
EXPORT_SYMBOL(__blk_rq_map_sg);
313
314
#ifdef CONFIG_BLK_DEV_INTEGRITY
315
/**
316
* blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment
317
* for a request
318
* @req: request to map
319
* @dma_dev: device to map to
320
* @state: DMA IOVA state
321
* @iter: block layer DMA iterator
322
*
323
* Start DMA mapping @req integrity data to @dma_dev. @state and @iter are
324
* provided by the caller and don't need to be initialized. @state needs to be
325
* stored for use at unmap time, @iter is only needed at map time.
326
*
327
* Returns %false if there is no segment to map, including due to an error, or
328
* %true if it did map a segment.
329
*
330
* If a segment was mapped, the DMA address for it is returned in @iter.addr
331
* and the length in @iter.len. If no segment was mapped the status code is
332
* returned in @iter.status.
333
*
334
* The caller can call blk_rq_dma_map_coalesce() to check if further segments
335
* need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
336
* to try to map the following segments.
337
*/
338
bool blk_rq_integrity_dma_map_iter_start(struct request *req,
339
struct device *dma_dev, struct dma_iova_state *state,
340
struct blk_dma_iter *iter)
341
{
342
unsigned len = bio_integrity_bytes(&req->q->limits.integrity,
343
blk_rq_sectors(req));
344
struct bio *bio = req->bio;
345
346
iter->iter = (struct blk_map_iter) {
347
.bio = bio,
348
.iter = bio_integrity(bio)->bip_iter,
349
.bvecs = bio_integrity(bio)->bip_vec,
350
.is_integrity = true,
351
};
352
return blk_dma_map_iter_start(req, dma_dev, state, iter, len);
353
}
354
EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start);
355
356
/**
357
* blk_rq_integrity_dma_map_iter_next - map the next integrity DMA segment for
358
* a request
359
* @req: request to map
360
* @dma_dev: device to map to
361
* @state: DMA IOVA state
362
* @iter: block layer DMA iterator
363
*
364
* Iterate to the next integrity mapping after a previous call to
365
* blk_rq_integrity_dma_map_iter_start(). See there for a detailed description
366
* of the arguments.
367
*
368
* Returns %false if there is no segment to map, including due to an error, or
369
* %true if it did map a segment.
370
*
371
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
372
* the length in @iter.len. If no segment was mapped the status code is
373
* returned in @iter.status.
374
*/
375
bool blk_rq_integrity_dma_map_iter_next(struct request *req,
376
struct device *dma_dev, struct blk_dma_iter *iter)
377
{
378
struct phys_vec vec;
379
380
if (!blk_map_iter_next(req, &iter->iter, &vec))
381
return false;
382
383
if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
384
return blk_dma_map_bus(iter, &vec);
385
return blk_dma_map_direct(req, dma_dev, iter, &vec);
386
}
387
EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next);
388
389
/**
390
* blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
391
* @rq: request to map
392
* @sglist: target scatterlist
393
*
394
* Description: Map the integrity vectors in request into a
395
* scatterlist. The scatterlist must be big enough to hold all
396
* elements. I.e. sized using blk_rq_count_integrity_sg() or
397
* rq->nr_integrity_segments.
398
*/
399
int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
400
{
401
struct request_queue *q = rq->q;
402
struct scatterlist *sg = NULL;
403
struct bio *bio = rq->bio;
404
unsigned int segments = 0;
405
struct phys_vec vec;
406
407
struct blk_map_iter iter = {
408
.bio = bio,
409
.iter = bio_integrity(bio)->bip_iter,
410
.bvecs = bio_integrity(bio)->bip_vec,
411
.is_integrity = true,
412
};
413
414
while (blk_map_iter_next(rq, &iter, &vec)) {
415
sg = blk_next_sg(&sg, sglist);
416
417
WARN_ON_ONCE(overflows_type(vec.len, unsigned int));
418
sg_set_page(sg, phys_to_page(vec.paddr), vec.len,
419
offset_in_page(vec.paddr));
420
segments++;
421
}
422
423
if (sg)
424
sg_mark_end(sg);
425
426
/*
427
* Something must have been wrong if the figured number of segment
428
* is bigger than number of req's physical integrity segments
429
*/
430
BUG_ON(segments > rq->nr_integrity_segments);
431
BUG_ON(segments > queue_max_integrity_segments(q));
432
return segments;
433
}
434
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
435
#endif
436
437