Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/block/blk-mq-dma.c
26242 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2025 Christoph Hellwig
4
*/
5
#include <linux/blk-mq-dma.h>
6
#include "blk.h"
7
8
struct phys_vec {
9
phys_addr_t paddr;
10
u32 len;
11
};
12
13
static bool blk_map_iter_next(struct request *req, struct req_iterator *iter,
14
struct phys_vec *vec)
15
{
16
unsigned int max_size;
17
struct bio_vec bv;
18
19
if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
20
if (!iter->bio)
21
return false;
22
vec->paddr = bvec_phys(&req->special_vec);
23
vec->len = req->special_vec.bv_len;
24
iter->bio = NULL;
25
return true;
26
}
27
28
if (!iter->iter.bi_size)
29
return false;
30
31
bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
32
vec->paddr = bvec_phys(&bv);
33
max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
34
bv.bv_len = min(bv.bv_len, max_size);
35
bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len);
36
37
/*
38
* If we are entirely done with this bi_io_vec entry, check if the next
39
* one could be merged into it. This typically happens when moving to
40
* the next bio, but some callers also don't pack bvecs tight.
41
*/
42
while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
43
struct bio_vec next;
44
45
if (!iter->iter.bi_size) {
46
if (!iter->bio->bi_next)
47
break;
48
iter->bio = iter->bio->bi_next;
49
iter->iter = iter->bio->bi_iter;
50
}
51
52
next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
53
if (bv.bv_len + next.bv_len > max_size ||
54
!biovec_phys_mergeable(req->q, &bv, &next))
55
break;
56
57
bv.bv_len += next.bv_len;
58
bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len);
59
}
60
61
vec->len = bv.bv_len;
62
return true;
63
}
64
65
/*
66
* The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page
67
* size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so
68
* we need to ensure our segments are aligned to this as well.
69
*
70
* Note that there is no point in using the slightly more complicated IOVA based
71
* path for single segment mappings.
72
*/
73
static inline bool blk_can_dma_map_iova(struct request *req,
74
struct device *dma_dev)
75
{
76
return !((queue_virt_boundary(req->q) + 1) &
77
dma_get_merge_boundary(dma_dev));
78
}
79
80
static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
81
{
82
iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr);
83
iter->len = vec->len;
84
return true;
85
}
86
87
static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
88
struct blk_dma_iter *iter, struct phys_vec *vec)
89
{
90
iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr),
91
offset_in_page(vec->paddr), vec->len, rq_dma_dir(req));
92
if (dma_mapping_error(dma_dev, iter->addr)) {
93
iter->status = BLK_STS_RESOURCE;
94
return false;
95
}
96
iter->len = vec->len;
97
return true;
98
}
99
100
static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
101
struct dma_iova_state *state, struct blk_dma_iter *iter,
102
struct phys_vec *vec)
103
{
104
enum dma_data_direction dir = rq_dma_dir(req);
105
unsigned int mapped = 0;
106
int error;
107
108
iter->addr = state->addr;
109
iter->len = dma_iova_size(state);
110
111
do {
112
error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
113
vec->len, dir, 0);
114
if (error)
115
break;
116
mapped += vec->len;
117
} while (blk_map_iter_next(req, &iter->iter, vec));
118
119
error = dma_iova_sync(dma_dev, state, 0, mapped);
120
if (error) {
121
iter->status = errno_to_blk_status(error);
122
return false;
123
}
124
125
return true;
126
}
127
128
/**
129
* blk_rq_dma_map_iter_start - map the first DMA segment for a request
130
* @req: request to map
131
* @dma_dev: device to map to
132
* @state: DMA IOVA state
133
* @iter: block layer DMA iterator
134
*
135
* Start DMA mapping @req to @dma_dev. @state and @iter are provided by the
136
* caller and don't need to be initialized. @state needs to be stored for use
137
* at unmap time, @iter is only needed at map time.
138
*
139
* Returns %false if there is no segment to map, including due to an error, or
140
* %true ft it did map a segment.
141
*
142
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
143
* the length in @iter.len. If no segment was mapped the status code is
144
* returned in @iter.status.
145
*
146
* The caller can call blk_rq_dma_map_coalesce() to check if further segments
147
* need to be mapped after this, or go straight to blk_rq_dma_map_iter_next()
148
* to try to map the following segments.
149
*/
150
bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
151
struct dma_iova_state *state, struct blk_dma_iter *iter)
152
{
153
unsigned int total_len = blk_rq_payload_bytes(req);
154
struct phys_vec vec;
155
156
iter->iter.bio = req->bio;
157
iter->iter.iter = req->bio->bi_iter;
158
memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
159
iter->status = BLK_STS_OK;
160
161
/*
162
* Grab the first segment ASAP because we'll need it to check for P2P
163
* transfers.
164
*/
165
if (!blk_map_iter_next(req, &iter->iter, &vec))
166
return false;
167
168
if (IS_ENABLED(CONFIG_PCI_P2PDMA) && (req->cmd_flags & REQ_P2PDMA)) {
169
switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
170
phys_to_page(vec.paddr))) {
171
case PCI_P2PDMA_MAP_BUS_ADDR:
172
return blk_dma_map_bus(iter, &vec);
173
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
174
/*
175
* P2P transfers through the host bridge are treated the
176
* same as non-P2P transfers below and during unmap.
177
*/
178
req->cmd_flags &= ~REQ_P2PDMA;
179
break;
180
default:
181
iter->status = BLK_STS_INVAL;
182
return false;
183
}
184
}
185
186
if (blk_can_dma_map_iova(req, dma_dev) &&
187
dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len))
188
return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec);
189
return blk_dma_map_direct(req, dma_dev, iter, &vec);
190
}
191
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start);
192
193
/**
194
* blk_rq_dma_map_iter_next - map the next DMA segment for a request
195
* @req: request to map
196
* @dma_dev: device to map to
197
* @state: DMA IOVA state
198
* @iter: block layer DMA iterator
199
*
200
* Iterate to the next mapping after a previous call to
201
* blk_rq_dma_map_iter_start(). See there for a detailed description of the
202
* arguments.
203
*
204
* Returns %false if there is no segment to map, including due to an error, or
205
* %true ft it did map a segment.
206
*
207
* If a segment was mapped, the DMA address for it is returned in @iter.addr and
208
* the length in @iter.len. If no segment was mapped the status code is
209
* returned in @iter.status.
210
*/
211
bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev,
212
struct dma_iova_state *state, struct blk_dma_iter *iter)
213
{
214
struct phys_vec vec;
215
216
if (!blk_map_iter_next(req, &iter->iter, &vec))
217
return false;
218
219
if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
220
return blk_dma_map_bus(iter, &vec);
221
return blk_dma_map_direct(req, dma_dev, iter, &vec);
222
}
223
EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next);
224
225
static inline struct scatterlist *
226
blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist)
227
{
228
if (!*sg)
229
return sglist;
230
231
/*
232
* If the driver previously mapped a shorter list, we could see a
233
* termination bit prematurely unless it fully inits the sg table
234
* on each mapping. We KNOW that there must be more entries here
235
* or the driver would be buggy, so force clear the termination bit
236
* to avoid doing a full sg_init_table() in drivers for each command.
237
*/
238
sg_unmark_end(*sg);
239
return sg_next(*sg);
240
}
241
242
/*
243
* Map a request to scatterlist, return number of sg entries setup. Caller
244
* must make sure sg can hold rq->nr_phys_segments entries.
245
*/
246
int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist,
247
struct scatterlist **last_sg)
248
{
249
struct req_iterator iter = {
250
.bio = rq->bio,
251
};
252
struct phys_vec vec;
253
int nsegs = 0;
254
255
/* the internal flush request may not have bio attached */
256
if (iter.bio)
257
iter.iter = iter.bio->bi_iter;
258
259
while (blk_map_iter_next(rq, &iter, &vec)) {
260
*last_sg = blk_next_sg(last_sg, sglist);
261
sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
262
offset_in_page(vec.paddr));
263
nsegs++;
264
}
265
266
if (*last_sg)
267
sg_mark_end(*last_sg);
268
269
/*
270
* Something must have been wrong if the figured number of
271
* segment is bigger than number of req's physical segments
272
*/
273
WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
274
275
return nsegs;
276
}
277
EXPORT_SYMBOL(__blk_rq_map_sg);
278
279