Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netpfil/ipfw/dn_sched_wf2q.c
39482 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
5
* Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
6
* All rights reserved
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
* 1. Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
* SUCH DAMAGE.
28
*/
29
30
/*
31
*/
32
33
#ifdef _KERNEL
34
#include <sys/malloc.h>
35
#include <sys/socket.h>
36
#include <sys/socketvar.h>
37
#include <sys/kernel.h>
38
#include <sys/lock.h>
39
#include <sys/mbuf.h>
40
#include <sys/module.h>
41
#include <sys/rwlock.h>
42
#include <net/if.h> /* IFNAMSIZ */
43
#include <netinet/in.h>
44
#include <netinet/ip_var.h> /* ipfw_rule_ref */
45
#include <netinet/ip_fw.h> /* flow_id */
46
#include <netinet/ip_dummynet.h>
47
#include <netpfil/ipfw/ip_fw_private.h>
48
#include <netpfil/ipfw/dn_heap.h>
49
#include <netpfil/ipfw/ip_dn_private.h>
50
#ifdef NEW_AQM
51
#include <netpfil/ipfw/dn_aqm.h>
52
#endif
53
#include <netpfil/ipfw/dn_sched.h>
54
#else
55
#include <dn_test.h>
56
#endif
57
58
#ifndef MAX64
59
#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
60
#endif
61
62
/*
63
* timestamps are computed on 64 bit using fixed point arithmetic.
64
* LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
65
* and sum of weights, respectively. FRAC_BITS is the number of
66
* fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
67
* errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
68
* using an unsigned 32-bit division, and to avoid wraparounds we need
69
* LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
70
* As an example
71
* FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
72
*/
73
#ifndef FRAC_BITS
74
#define FRAC_BITS 28 /* shift for fixed point arithmetic */
75
#define ONE_FP (1UL << FRAC_BITS)
76
#endif
77
78
/*
79
* Private information for the scheduler instance:
80
* sch_heap (key is Finish time) returns the next queue to serve
81
* ne_heap (key is Start time) stores not-eligible queues
82
* idle_heap (key=start/finish time) stores idle flows. It must
83
* support extract-from-middle.
84
* A flow is only in 1 of the three heaps.
85
* XXX todo: use a more efficient data structure, e.g. a tree sorted
86
* by F with min_subtree(S) in each node
87
*/
88
struct wf2qp_si {
89
struct dn_heap sch_heap; /* top extract - key Finish time */
90
struct dn_heap ne_heap; /* top extract - key Start time */
91
struct dn_heap idle_heap; /* random extract - key Start=Finish time */
92
uint64_t V; /* virtual time */
93
uint32_t inv_wsum; /* inverse of sum of weights */
94
uint32_t wsum; /* sum of weights */
95
};
96
97
struct wf2qp_queue {
98
struct dn_queue _q;
99
uint64_t S, F; /* start time, finish time */
100
uint32_t inv_w; /* ONE_FP / weight */
101
int32_t heap_pos; /* position (index) of struct in heap */
102
};
103
104
/*
105
* This file implements a WF2Q+ scheduler as it has been in dummynet
106
* since 2000.
107
* The scheduler supports per-flow queues and has O(log N) complexity.
108
*
109
* WF2Q+ needs to drain entries from the idle heap so that we
110
* can keep the sum of weights up to date. We can do it whenever
111
* we get a chance, or periodically, or following some other
112
* strategy. The function idle_check() drains at most N elements
113
* from the idle heap.
114
*/
115
static void
116
idle_check(struct wf2qp_si *si, int n, int force)
117
{
118
struct dn_heap *h = &si->idle_heap;
119
while (n-- > 0 && h->elements > 0 &&
120
(force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
121
struct dn_queue *q = HEAP_TOP(h)->object;
122
struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
123
124
heap_extract(h, NULL);
125
/* XXX to let the flowset delete the queue we should
126
* mark it as 'unused' by the scheduler.
127
*/
128
alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
129
si->wsum -= q->fs->fs.par[0]; /* adjust sum of weights */
130
if (si->wsum > 0)
131
si->inv_wsum = ONE_FP/si->wsum;
132
}
133
}
134
135
static int
136
wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
137
{
138
struct dn_fsk *fs = q->fs;
139
struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
140
struct wf2qp_queue *alg_fq;
141
uint64_t len = m->m_pkthdr.len;
142
143
if (m != q->mq.head) {
144
if (dn_enqueue(q, m, 0)) /* packet was dropped */
145
return 1;
146
if (m != q->mq.head) /* queue was already busy */
147
return 0;
148
}
149
150
/* If reach this point, queue q was idle */
151
alg_fq = (struct wf2qp_queue *)q;
152
153
if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
154
/* F<S means timestamps are invalid ->brand new queue. */
155
alg_fq->S = si->V; /* init start time */
156
si->wsum += fs->fs.par[0]; /* add weight of new queue. */
157
si->inv_wsum = ONE_FP/si->wsum;
158
} else { /* if it was idle then it was in the idle heap */
159
if (! heap_extract(&si->idle_heap, q))
160
return 1;
161
alg_fq->S = MAX64(alg_fq->F, si->V); /* compute new S */
162
}
163
alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
164
165
/* if nothing is backlogged, make sure this flow is eligible */
166
if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
167
si->V = MAX64(alg_fq->S, si->V);
168
169
/*
170
* Look at eligibility. A flow is not eligibile if S>V (when
171
* this happens, it means that there is some other flow already
172
* scheduled for the same pipe, so the sch_heap cannot be
173
* empty). If the flow is not eligible we just store it in the
174
* ne_heap. Otherwise, we store in the sch_heap.
175
* Note that for all flows in sch_heap (SCH), S_i <= V,
176
* and for all flows in ne_heap (NEH), S_i > V.
177
* So when we need to compute max(V, min(S_i)) forall i in
178
* SCH+NEH, we only need to look into NEH.
179
*/
180
if (DN_KEY_LT(si->V, alg_fq->S)) {
181
/* S>V means flow Not eligible. */
182
if (si->sch_heap.elements == 0)
183
D("++ ouch! not eligible but empty scheduler!");
184
heap_insert(&si->ne_heap, alg_fq->S, q);
185
} else {
186
heap_insert(&si->sch_heap, alg_fq->F, q);
187
}
188
return 0;
189
}
190
191
/* XXX invariant: sch > 0 || V >= min(S in neh) */
192
static struct mbuf *
193
wf2qp_dequeue(struct dn_sch_inst *_si)
194
{
195
/* Access scheduler instance private data */
196
struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
197
struct mbuf *m;
198
struct dn_queue *q;
199
struct dn_heap *sch = &si->sch_heap;
200
struct dn_heap *neh = &si->ne_heap;
201
struct wf2qp_queue *alg_fq;
202
203
if (sch->elements == 0 && neh->elements == 0) {
204
/* we have nothing to do. We could kill the idle heap
205
* altogether and reset V
206
*/
207
idle_check(si, 0x7fffffff, 1);
208
si->V = 0;
209
si->wsum = 0; /* should be set already */
210
return NULL; /* quick return if nothing to do */
211
}
212
idle_check(si, 1, 0); /* drain something from the idle heap */
213
214
/* make sure at least one element is eligible, bumping V
215
* and moving entries that have become eligible.
216
* We need to repeat the first part twice, before and
217
* after extracting the candidate, or enqueue() will
218
* find the data structure in a wrong state.
219
*/
220
m = NULL;
221
for(;;) {
222
/*
223
* Compute V = max(V, min(S_i)). Remember that all elements
224
* in sch have by definition S_i <= V so if sch is not empty,
225
* V is surely the max and we must not update it. Conversely,
226
* if sch is empty we only need to look at neh.
227
* We don't need to move the queues, as it will be done at the
228
* next enqueue
229
*/
230
if (sch->elements == 0 && neh->elements > 0) {
231
si->V = MAX64(si->V, HEAP_TOP(neh)->key);
232
}
233
while (neh->elements > 0 &&
234
DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
235
q = HEAP_TOP(neh)->object;
236
alg_fq = (struct wf2qp_queue *)q;
237
heap_extract(neh, NULL);
238
heap_insert(sch, alg_fq->F, q);
239
}
240
if (m) /* pkt found in previous iteration */
241
break;
242
/* ok we have at least one eligible pkt */
243
q = HEAP_TOP(sch)->object;
244
alg_fq = (struct wf2qp_queue *)q;
245
m = dn_dequeue(q);
246
if (m == NULL)
247
return NULL;
248
heap_extract(sch, NULL); /* Remove queue from heap. */
249
si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
250
alg_fq->S = alg_fq->F; /* Update start time. */
251
if (q->mq.head == 0) { /* not backlogged any more. */
252
heap_insert(&si->idle_heap, alg_fq->F, q);
253
} else { /* Still backlogged. */
254
/* Update F, store in neh or sch */
255
uint64_t len = q->mq.head->m_pkthdr.len;
256
alg_fq->F += len * alg_fq->inv_w;
257
if (DN_KEY_LEQ(alg_fq->S, si->V)) {
258
heap_insert(sch, alg_fq->F, q);
259
} else {
260
heap_insert(neh, alg_fq->S, q);
261
}
262
}
263
}
264
return m;
265
}
266
267
static int
268
wf2qp_new_sched(struct dn_sch_inst *_si)
269
{
270
struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
271
int ofs = offsetof(struct wf2qp_queue, heap_pos);
272
273
/* all heaps support extract from middle */
274
if (heap_init(&si->idle_heap, 16, ofs) ||
275
heap_init(&si->sch_heap, 16, ofs) ||
276
heap_init(&si->ne_heap, 16, ofs)) {
277
heap_free(&si->ne_heap);
278
heap_free(&si->sch_heap);
279
heap_free(&si->idle_heap);
280
return ENOMEM;
281
}
282
return 0;
283
}
284
285
static int
286
wf2qp_free_sched(struct dn_sch_inst *_si)
287
{
288
struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
289
290
heap_free(&si->sch_heap);
291
heap_free(&si->ne_heap);
292
heap_free(&si->idle_heap);
293
294
return 0;
295
}
296
297
static int
298
wf2qp_new_fsk(struct dn_fsk *fs)
299
{
300
ipdn_bound_var(&fs->fs.par[0], 1,
301
1, 100, "WF2Q+ weight");
302
return 0;
303
}
304
305
static int
306
wf2qp_new_queue(struct dn_queue *_q)
307
{
308
struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
309
310
_q->ni.oid.subtype = DN_SCHED_WF2QP;
311
q->F = 0; /* not strictly necessary */
312
q->S = q->F + 1; /* mark timestamp as invalid. */
313
q->inv_w = ONE_FP / _q->fs->fs.par[0];
314
if (_q->mq.head != NULL) {
315
wf2qp_enqueue(_q->_si, _q, _q->mq.head);
316
}
317
return 0;
318
}
319
320
/*
321
* Called when the infrastructure removes a queue (e.g. flowset
322
* is reconfigured). Nothing to do if we did not 'own' the queue,
323
* otherwise remove it from the right heap and adjust the sum
324
* of weights.
325
*/
326
static int
327
wf2qp_free_queue(struct dn_queue *q)
328
{
329
struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
330
struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
331
332
if (alg_fq->S >= alg_fq->F + 1)
333
return 0; /* nothing to do, not in any heap */
334
si->wsum -= q->fs->fs.par[0];
335
if (si->wsum > 0)
336
si->inv_wsum = ONE_FP/si->wsum;
337
338
/* extract from the heap. XXX TODO we may need to adjust V
339
* to make sure the invariants hold.
340
*/
341
heap_extract(&si->idle_heap, q);
342
heap_extract(&si->ne_heap, q);
343
heap_extract(&si->sch_heap, q);
344
345
return 0;
346
}
347
348
/*
349
* WF2Q+ scheduler descriptor
350
* contains the type of the scheduler, the name, the size of the
351
* structures and function pointers.
352
*/
353
static struct dn_alg wf2qp_desc = {
354
_SI( .type = ) DN_SCHED_WF2QP,
355
_SI( .name = ) "WF2Q+",
356
_SI( .flags = ) DN_MULTIQUEUE,
357
358
/* we need extra space in the si and the queue */
359
_SI( .schk_datalen = ) 0,
360
_SI( .si_datalen = ) sizeof(struct wf2qp_si),
361
_SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
362
sizeof(struct dn_queue),
363
364
_SI( .enqueue = ) wf2qp_enqueue,
365
_SI( .dequeue = ) wf2qp_dequeue,
366
367
_SI( .config = ) NULL,
368
_SI( .destroy = ) NULL,
369
_SI( .new_sched = ) wf2qp_new_sched,
370
_SI( .free_sched = ) wf2qp_free_sched,
371
372
_SI( .new_fsk = ) wf2qp_new_fsk,
373
_SI( .free_fsk = ) NULL,
374
375
_SI( .new_queue = ) wf2qp_new_queue,
376
_SI( .free_queue = ) wf2qp_free_queue,
377
#ifdef NEW_AQM
378
_SI( .getconfig = ) NULL,
379
#endif
380
381
};
382
383
DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
384
385