Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/hv/vmbus_bufring.c
26278 views
1
// SPDX-License-Identifier: BSD-3-Clause
2
/*
3
* Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
4
* Copyright (c) 2012 NetApp Inc.
5
* Copyright (c) 2012 Citrix Inc.
6
* All rights reserved.
7
*/
8
9
#include <errno.h>
10
#include <fcntl.h>
11
#include <emmintrin.h>
12
#include <linux/limits.h>
13
#include <stdbool.h>
14
#include <stdint.h>
15
#include <stdio.h>
16
#include <string.h>
17
#include <sys/mman.h>
18
#include <sys/uio.h>
19
#include <unistd.h>
20
#include "vmbus_bufring.h"
21
22
/**
23
* Compiler barrier.
24
*
25
* Guarantees that operation reordering does not occur at compile time
26
* for operations directly before and after the barrier.
27
*/
28
#define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); })
29
30
#define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF
31
#define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1)))))
32
33
void *vmbus_uio_map(int *fd, int size)
34
{
35
void *map;
36
37
map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
38
if (map == MAP_FAILED)
39
return NULL;
40
41
return map;
42
}
43
44
/* Increase bufring index by inc with wraparound */
45
static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
46
{
47
idx += inc;
48
if (idx >= sz)
49
idx -= sz;
50
51
return idx;
52
}
53
54
void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
55
{
56
br->vbr = buf;
57
br->windex = br->vbr->windex;
58
br->dsize = blen - sizeof(struct vmbus_bufring);
59
}
60
61
static inline __always_inline void
62
rte_smp_mb(void)
63
{
64
asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
65
}
66
67
static inline int
68
rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
69
{
70
uint8_t res;
71
72
asm volatile("lock ; "
73
"cmpxchgl %[src], %[dst];"
74
"sete %[res];"
75
: [res] "=a" (res), /* output */
76
[dst] "=m" (*dst)
77
: [src] "r" (src), /* input */
78
"a" (exp),
79
"m" (*dst)
80
: "memory"); /* no-clobber list */
81
return res;
82
}
83
84
static inline uint32_t
85
vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
86
const void *src0, uint32_t cplen)
87
{
88
uint8_t *br_data = tbr->vbr->data;
89
uint32_t br_dsize = tbr->dsize;
90
const uint8_t *src = src0;
91
92
/* XXX use double mapping like Linux kernel? */
93
if (cplen > br_dsize - windex) {
94
uint32_t fraglen = br_dsize - windex;
95
96
/* Wrap-around detected */
97
memcpy(br_data + windex, src, fraglen);
98
memcpy(br_data, src + fraglen, cplen - fraglen);
99
} else {
100
memcpy(br_data + windex, src, cplen);
101
}
102
103
return vmbus_br_idxinc(windex, cplen, br_dsize);
104
}
105
106
/*
107
* Write scattered channel packet to TX bufring.
108
*
109
* The offset of this channel packet is written as a 64bits value
110
* immediately after this channel packet.
111
*
112
* The write goes through three stages:
113
* 1. Reserve space in ring buffer for the new data.
114
* Writer atomically moves priv_write_index.
115
* 2. Copy the new data into the ring.
116
* 3. Update the tail of the ring (visible to host) that indicates
117
* next read location. Writer updates write_index
118
*/
119
static int
120
vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen)
121
{
122
struct vmbus_bufring *vbr = tbr->vbr;
123
uint32_t ring_size = tbr->dsize;
124
uint32_t old_windex, next_windex, windex, total;
125
uint64_t save_windex;
126
int i;
127
128
total = 0;
129
for (i = 0; i < iovlen; i++)
130
total += iov[i].iov_len;
131
total += sizeof(save_windex);
132
133
/* Reserve space in ring */
134
do {
135
uint32_t avail;
136
137
/* Get current free location */
138
old_windex = tbr->windex;
139
140
/* Prevent compiler reordering this with calculation */
141
rte_compiler_barrier();
142
143
avail = vmbus_br_availwrite(tbr, old_windex);
144
145
/* If not enough space in ring, then tell caller. */
146
if (avail <= total)
147
return -EAGAIN;
148
149
next_windex = vmbus_br_idxinc(old_windex, total, ring_size);
150
151
/* Atomic update of next write_index for other threads */
152
} while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));
153
154
/* Space from old..new is now reserved */
155
windex = old_windex;
156
for (i = 0; i < iovlen; i++)
157
windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len);
158
159
/* Set the offset of the current channel packet. */
160
save_windex = ((uint64_t)old_windex) << 32;
161
windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
162
sizeof(save_windex));
163
164
/* The region reserved should match region used */
165
if (windex != next_windex)
166
return -EINVAL;
167
168
/* Ensure that data is available before updating host index */
169
rte_compiler_barrier();
170
171
/* Checkin for our reservation. wait for our turn to update host */
172
while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
173
_mm_pause();
174
175
return 0;
176
}
177
178
int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
179
uint32_t dlen, uint32_t flags)
180
{
181
struct vmbus_chanpkt pkt;
182
unsigned int pktlen, pad_pktlen;
183
const uint32_t hlen = sizeof(pkt);
184
uint64_t pad = 0;
185
struct iovec iov[3];
186
int error;
187
188
pktlen = hlen + dlen;
189
pad_pktlen = ALIGN(pktlen, sizeof(uint64_t));
190
191
pkt.hdr.type = type;
192
pkt.hdr.flags = flags;
193
pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
194
pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
195
pkt.hdr.xactid = VMBUS_RQST_ERROR;
196
197
iov[0].iov_base = &pkt;
198
iov[0].iov_len = hlen;
199
iov[1].iov_base = data;
200
iov[1].iov_len = dlen;
201
iov[2].iov_base = &pad;
202
iov[2].iov_len = pad_pktlen - pktlen;
203
204
error = vmbus_txbr_write(txbr, iov, 3);
205
206
return error;
207
}
208
209
static inline uint32_t
210
vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
211
void *dst0, size_t cplen)
212
{
213
const uint8_t *br_data = rbr->vbr->data;
214
uint32_t br_dsize = rbr->dsize;
215
uint8_t *dst = dst0;
216
217
if (cplen > br_dsize - rindex) {
218
uint32_t fraglen = br_dsize - rindex;
219
220
/* Wrap-around detected. */
221
memcpy(dst, br_data + rindex, fraglen);
222
memcpy(dst + fraglen, br_data, cplen - fraglen);
223
} else {
224
memcpy(dst, br_data + rindex, cplen);
225
}
226
227
return vmbus_br_idxinc(rindex, cplen, br_dsize);
228
}
229
230
/* Copy data from receive ring but don't change index */
231
static int
232
vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
233
{
234
uint32_t avail;
235
236
/*
237
* The requested data and the 64bits channel packet
238
* offset should be there at least.
239
*/
240
avail = vmbus_br_availread(rbr);
241
if (avail < dlen + sizeof(uint64_t))
242
return -EAGAIN;
243
244
vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
245
return 0;
246
}
247
248
/*
249
* Copy data from receive ring and change index
250
* NOTE:
251
* We assume (dlen + skip) == sizeof(channel packet).
252
*/
253
static int
254
vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
255
{
256
struct vmbus_bufring *vbr = rbr->vbr;
257
uint32_t br_dsize = rbr->dsize;
258
uint32_t rindex;
259
260
if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
261
return -EAGAIN;
262
263
/* Record where host was when we started read (for debug) */
264
rbr->windex = rbr->vbr->windex;
265
266
/*
267
* Copy channel packet from RX bufring.
268
*/
269
rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
270
rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
271
272
/*
273
* Discard this channel packet's 64bits offset, which is useless to us.
274
*/
275
rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);
276
277
/* Update the read index _after_ the channel packet is fetched. */
278
rte_compiler_barrier();
279
280
vbr->rindex = rindex;
281
282
return 0;
283
}
284
285
int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr,
286
void *data, uint32_t *len)
287
{
288
struct vmbus_chanpkt_hdr pkt;
289
uint32_t dlen, bufferlen = *len;
290
int error;
291
292
error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt));
293
if (error)
294
return error;
295
296
if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN))
297
/* XXX this channel is dead actually. */
298
return -EIO;
299
300
if (unlikely(pkt.hlen > pkt.tlen))
301
return -EIO;
302
303
/* Length are in quad words */
304
dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
305
*len = dlen;
306
307
/* If caller buffer is not large enough */
308
if (unlikely(dlen > bufferlen))
309
return -ENOBUFS;
310
311
/* Read data and skip packet header */
312
error = vmbus_rxbr_read(rxbr, data, dlen, 0);
313
if (error)
314
return error;
315
316
/* Return the number of bytes read */
317
return dlen + sizeof(uint64_t);
318
}
319
320