Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netlink/netlink_io.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2021 Ng Peng Nam Sean
5
* Copyright (c) 2022 Alexander V. Chernikov <[email protected]>
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/param.h>
30
#include <sys/ck.h>
31
#include <sys/lock.h>
32
#include <sys/malloc.h>
33
#include <sys/mbuf.h>
34
#include <sys/mutex.h>
35
#include <sys/socket.h>
36
#include <sys/socketvar.h>
37
#include <sys/syslog.h>
38
39
#include <netlink/netlink.h>
40
#include <netlink/netlink_ctl.h>
41
#include <netlink/netlink_linux.h>
42
#include <netlink/netlink_var.h>
43
44
#define DEBUG_MOD_NAME nl_io
45
#define DEBUG_MAX_LEVEL LOG_DEBUG3
46
#include <netlink/netlink_debug.h>
47
_DECLARE_DEBUG(LOG_INFO);
48
49
/*
50
* The logic below provide a p2p interface for receiving and
51
* sending netlink data between the kernel and userland.
52
*/
53
54
static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp);
55
56
struct nl_buf *
57
nl_buf_alloc(size_t len, int mflag)
58
{
59
struct nl_buf *nb;
60
61
KASSERT(len > 0 && len <= UINT_MAX, ("%s: invalid length %zu",
62
__func__, len));
63
64
nb = malloc(sizeof(struct nl_buf) + len, M_NETLINK, mflag);
65
if (__predict_true(nb != NULL)) {
66
nb->buflen = len;
67
nb->datalen = nb->offset = 0;
68
}
69
70
return (nb);
71
}
72
73
void
74
nl_buf_free(struct nl_buf *nb)
75
{
76
77
free(nb, M_NETLINK);
78
}
79
80
void
81
nl_schedule_taskqueue(struct nlpcb *nlp)
82
{
83
if (!nlp->nl_task_pending) {
84
nlp->nl_task_pending = true;
85
taskqueue_enqueue(nlp->nl_taskqueue, &nlp->nl_task);
86
NL_LOG(LOG_DEBUG3, "taskqueue scheduled");
87
} else {
88
NL_LOG(LOG_DEBUG3, "taskqueue schedule skipped");
89
}
90
}
91
92
static bool
93
nl_process_received_one(struct nlpcb *nlp)
94
{
95
struct socket *so = nlp->nl_socket;
96
struct sockbuf *sb;
97
struct nl_buf *nb;
98
bool reschedule = false;
99
100
NLP_LOCK(nlp);
101
nlp->nl_task_pending = false;
102
NLP_UNLOCK(nlp);
103
104
/*
105
* Do not process queued up requests if there is no space to queue
106
* replies.
107
*/
108
sb = &so->so_rcv;
109
SOCK_RECVBUF_LOCK(so);
110
if (sb->sb_hiwat <= sb->sb_ccc) {
111
SOCK_RECVBUF_UNLOCK(so);
112
NL_LOG(LOG_DEBUG3, "socket %p stuck", so);
113
return (false);
114
}
115
SOCK_RECVBUF_UNLOCK(so);
116
117
sb = &so->so_snd;
118
SOCK_SENDBUF_LOCK(so);
119
while ((nb = TAILQ_FIRST(&sb->nl_queue)) != NULL) {
120
TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
121
SOCK_SENDBUF_UNLOCK(so);
122
reschedule = nl_process_nbuf(nb, nlp);
123
SOCK_SENDBUF_LOCK(so);
124
if (reschedule) {
125
sb->sb_acc -= nb->datalen;
126
sb->sb_ccc -= nb->datalen;
127
/* XXXGL: potentially can reduce lock&unlock count. */
128
sowwakeup_locked(so);
129
nl_buf_free(nb);
130
SOCK_SENDBUF_LOCK(so);
131
} else {
132
TAILQ_INSERT_HEAD(&sb->nl_queue, nb, tailq);
133
break;
134
}
135
}
136
SOCK_SENDBUF_UNLOCK(so);
137
138
return (reschedule);
139
}
140
141
static void
142
nl_process_received(struct nlpcb *nlp)
143
{
144
NL_LOG(LOG_DEBUG3, "taskqueue called");
145
146
if (__predict_false(nlp->nl_need_thread_setup)) {
147
nl_set_thread_nlp(curthread, nlp);
148
NLP_LOCK(nlp);
149
nlp->nl_need_thread_setup = false;
150
NLP_UNLOCK(nlp);
151
}
152
153
while (nl_process_received_one(nlp))
154
;
155
}
156
157
/*
158
* Called after some data have been read from the socket.
159
*/
160
void
161
nl_on_transmit(struct nlpcb *nlp)
162
{
163
NLP_LOCK(nlp);
164
165
struct socket *so = nlp->nl_socket;
166
if (__predict_false(nlp->nl_dropped_bytes > 0 && so != NULL)) {
167
unsigned long dropped_bytes = nlp->nl_dropped_bytes;
168
unsigned long dropped_messages = nlp->nl_dropped_messages;
169
nlp->nl_dropped_bytes = 0;
170
nlp->nl_dropped_messages = 0;
171
172
struct sockbuf *sb = &so->so_rcv;
173
NLP_LOG(LOG_DEBUG, nlp,
174
"socket RX overflowed, %lu messages (%lu bytes) dropped. "
175
"bytes: [%u/%u]", dropped_messages, dropped_bytes,
176
sb->sb_ccc, sb->sb_hiwat);
177
/* TODO: send netlink message */
178
}
179
180
nl_schedule_taskqueue(nlp);
181
NLP_UNLOCK(nlp);
182
}
183
184
void
185
nl_taskqueue_handler(void *_arg, int pending)
186
{
187
struct nlpcb *nlp = (struct nlpcb *)_arg;
188
189
CURVNET_SET(nlp->nl_socket->so_vnet);
190
nl_process_received(nlp);
191
CURVNET_RESTORE();
192
}
193
194
/*
195
* Tries to send current data buffer from writer.
196
*
197
* Returns true on success.
198
* If no queue overrunes happened, wakes up socket owner.
199
*/
200
bool
201
nl_send(struct nl_writer *nw, struct nlpcb *nlp)
202
{
203
struct socket *so = nlp->nl_socket;
204
struct sockbuf *sb = &so->so_rcv;
205
struct nl_buf *nb;
206
207
MPASS(nw->hdr == NULL);
208
MPASS(nw->buf != NULL);
209
MPASS(nw->buf->datalen > 0);
210
211
IF_DEBUG_LEVEL(LOG_DEBUG2) {
212
struct nlmsghdr *hdr = (struct nlmsghdr *)nw->buf->data;
213
NLP_LOG(LOG_DEBUG2, nlp,
214
"TX len %u msgs %u msg type %d first hdrlen %u",
215
nw->buf->datalen, nw->num_messages, hdr->nlmsg_type,
216
hdr->nlmsg_len);
217
}
218
219
if (nlp->nl_linux && linux_netlink_p != NULL) {
220
nb = linux_netlink_p->msgs_to_linux(nw->buf, nlp);
221
nl_buf_free(nw->buf);
222
nw->buf = NULL;
223
if (nb == NULL)
224
return (false);
225
} else {
226
nb = nw->buf;
227
nw->buf = NULL;
228
}
229
230
SOCK_RECVBUF_LOCK(so);
231
if (!nw->ignore_limit && __predict_false(sb->sb_hiwat <= sb->sb_ccc)) {
232
SOCK_RECVBUF_UNLOCK(so);
233
NLP_LOCK(nlp);
234
nlp->nl_dropped_bytes += nb->datalen;
235
nlp->nl_dropped_messages += nw->num_messages;
236
NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
237
(unsigned long)nlp->nl_dropped_messages, nw->num_messages,
238
(unsigned long)nlp->nl_dropped_bytes, nb->datalen);
239
NLP_UNLOCK(nlp);
240
nl_buf_free(nb);
241
return (false);
242
} else {
243
bool full;
244
245
TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
246
sb->sb_acc += nb->datalen;
247
sb->sb_ccc += nb->datalen;
248
full = sb->sb_hiwat <= sb->sb_ccc;
249
sorwakeup_locked(so);
250
if (full) {
251
NLP_LOCK(nlp);
252
nlp->nl_tx_blocked = true;
253
NLP_UNLOCK(nlp);
254
}
255
return (true);
256
}
257
}
258
259
static int
260
nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
261
struct nlpcb *nlp, struct nl_pstate *npt)
262
{
263
nl_handler_f handler = nl_handlers[nlp->nl_proto].cb;
264
int error = 0;
265
266
NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
267
hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq,
268
hdr->nlmsg_pid);
269
270
if (__predict_false(hdr->nlmsg_len > remaining_length)) {
271
NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d",
272
hdr->nlmsg_len, remaining_length);
273
return (EINVAL);
274
} else if (__predict_false(hdr->nlmsg_len < sizeof(*hdr))) {
275
NL_LOG(LOG_DEBUG, "message too short: %d", hdr->nlmsg_len);
276
return (EINVAL);
277
}
278
/* Stamp each message with sender pid */
279
hdr->nlmsg_pid = nlp->nl_port;
280
281
npt->hdr = hdr;
282
283
if (hdr->nlmsg_flags & NLM_F_REQUEST &&
284
hdr->nlmsg_type >= NLMSG_MIN_TYPE) {
285
NL_LOG(LOG_DEBUG2, "handling message with msg type: %d",
286
hdr->nlmsg_type);
287
if (nlp->nl_linux) {
288
MPASS(linux_netlink_p != NULL);
289
error = linux_netlink_p->msg_from_linux(nlp->nl_proto,
290
&hdr, npt);
291
if (error)
292
goto ack;
293
}
294
error = handler(hdr, npt);
295
NL_LOG(LOG_DEBUG2, "retcode: %d", error);
296
}
297
ack:
298
if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) {
299
if (!npt->nw->suppress_ack) {
300
NL_LOG(LOG_DEBUG3, "ack");
301
nlmsg_ack(nlp, error, hdr, npt);
302
}
303
}
304
305
return (0);
306
}
307
308
static void
309
npt_clear(struct nl_pstate *npt)
310
{
311
lb_clear(&npt->lb);
312
npt->cookie = NULL;
313
npt->error = 0;
314
npt->err_msg = NULL;
315
npt->err_off = 0;
316
npt->hdr = NULL;
317
npt->nw->suppress_ack = false;
318
}
319
320
/*
321
* Processes an incoming packet, which can contain multiple netlink messages
322
*/
323
static bool
324
nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp)
325
{
326
struct nl_writer nw;
327
struct nlmsghdr *hdr;
328
int error;
329
330
NL_LOG(LOG_DEBUG3, "RX netlink buf %p on %p", nb, nlp->nl_socket);
331
332
if (!nl_writer_unicast(&nw, NLMSG_SMALL, nlp, false)) {
333
NL_LOG(LOG_DEBUG, "error allocating socket writer");
334
return (true);
335
}
336
337
nlmsg_ignore_limit(&nw);
338
339
struct nl_pstate npt = {
340
.nlp = nlp,
341
.lb.base = &nb->data[roundup2(nb->datalen, 8)],
342
.lb.size = nb->buflen - roundup2(nb->datalen, 8),
343
.nw = &nw,
344
.strict = nlp->nl_flags & NLF_STRICT,
345
};
346
347
for (; nb->offset + sizeof(struct nlmsghdr) <= nb->datalen;) {
348
hdr = (struct nlmsghdr *)&nb->data[nb->offset];
349
/* Save length prior to calling handler */
350
int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
351
NL_LOG(LOG_DEBUG3, "parsing offset %d/%d",
352
nb->offset, nb->datalen);
353
npt_clear(&npt);
354
error = nl_receive_message(hdr, nb->datalen - nb->offset, nlp,
355
&npt);
356
nb->offset += msglen;
357
if (__predict_false(error != 0 || nlp->nl_tx_blocked))
358
break;
359
}
360
NL_LOG(LOG_DEBUG3, "packet parsing done");
361
nlmsg_flush(&nw);
362
363
if (nlp->nl_tx_blocked) {
364
NLP_LOCK(nlp);
365
nlp->nl_tx_blocked = false;
366
NLP_UNLOCK(nlp);
367
return (false);
368
} else
369
return (true);
370
}
371
372