#ifdef _KERNEL
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rwlock.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
#include <netinet/ip_dummynet.h>
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
#ifdef NEW_AQM
#include <netpfil/ipfw/dn_aqm.h>
#endif
#include <netpfil/ipfw/dn_sched.h>
#else
#include <dn_test.h>
#endif
#ifdef QFQ_DEBUG
#define _P64 unsigned long long
struct qfq_sched;
static void dump_sched(struct qfq_sched *q, const char *msg);
#define NO(x) x
#else
#define NO(x)
#endif
#define DN_SCHED_QFQ 4
typedef unsigned long bitmap;
#if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
int fls(unsigned int n)
{
int i = 0;
for (i = 0; n > 0; n >>= 1, i++)
;
return i;
}
#endif
#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
static inline unsigned long __fls(unsigned long word)
{
return fls(word) - 1;
}
#endif
#if !defined(_KERNEL) || !defined(__linux__)
#ifdef QFQ_DEBUG
static int test_bit(int ix, bitmap *p)
{
if (ix < 0 || ix > 31)
D("bad index %d", ix);
return *p & (1<<ix);
}
static void __set_bit(int ix, bitmap *p)
{
if (ix < 0 || ix > 31)
D("bad index %d", ix);
*p |= (1<<ix);
}
static void __clear_bit(int ix, bitmap *p)
{
if (ix < 0 || ix > 31)
D("bad index %d", ix);
*p &= ~(1<<ix);
}
#else
#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
#endif
#endif
#ifdef __MIPSEL__
#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
#endif
#define QFQ_MAX_SLOTS 32
#define QFQ_MAX_INDEX 19
#define QFQ_MAX_WSHIFT 16
#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
#define FRAC_BITS 30
#define ONE_FP (1UL << FRAC_BITS)
#define QFQ_MTU_SHIFT 11
#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
struct qfq_group;
struct qfq_class {
struct dn_queue _q;
uint64_t S, F;
struct qfq_class *next;
struct qfq_group *grp;
uint32_t inv_w;
uint32_t lmax;
};
struct qfq_group {
uint64_t S, F;
unsigned int slot_shift;
unsigned int index;
unsigned int front;
bitmap full_slots;
struct qfq_class *slots[QFQ_MAX_SLOTS];
};
struct qfq_sched {
uint64_t V;
uint32_t wsum;
uint32_t iwsum;
NO(uint32_t i_wsum;)
NO(uint32_t queued;)
NO(uint32_t loops;)
bitmap bitmaps[QFQ_MAX_STATE];
struct qfq_group groups[QFQ_MAX_INDEX + 1];
};
static inline int qfq_gt(uint64_t a, uint64_t b)
{
return (int64_t)(a - b) > 0;
}
static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
{
return ts & ~((1ULL << shift) - 1);
}
static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
unsigned long bitmap)
{
int index = ffs(bitmap) - 1;
return &q->groups[index];
}
static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
{
uint64_t slot_size = (uint64_t)maxlen *inv_w;
unsigned long size_map;
int index = 0;
size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
if (!size_map)
goto out;
index = __fls(size_map) + 1;
index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
if (index < 0)
index = 0;
out:
ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
return index;
}
static int
qfq_new_queue(struct dn_queue *_q)
{
struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
struct qfq_class *cl = (struct qfq_class *)_q;
int i;
uint32_t w;
w = _q->fs->fs.par[0];
cl->lmax = _q->fs->fs.par[1];
if (!w || w > QFQ_MAX_WEIGHT) {
w = 1;
D("rounding weight to 1");
}
cl->inv_w = ONE_FP/w;
w = ONE_FP/cl->inv_w;
if (q->wsum + w > QFQ_MAX_WSUM)
return EINVAL;
i = qfq_calc_index(cl->inv_w, cl->lmax);
cl->grp = &q->groups[i];
q->wsum += w;
q->iwsum = ONE_FP / q->wsum;
return 0;
}
static int
qfq_free_queue(struct dn_queue *_q)
{
struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
struct qfq_class *cl = (struct qfq_class *)_q;
if (cl->inv_w) {
q->wsum -= ONE_FP/cl->inv_w;
if (q->wsum != 0)
q->iwsum = ONE_FP / q->wsum;
cl->inv_w = 0;
}
return 0;
}
static inline unsigned long
mask_from(unsigned long bitmap, int from)
{
return bitmap & ~((1UL << from) - 1);
}
static inline unsigned int
qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
{
unsigned int state = qfq_gt(grp->S, q->V);
unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
struct qfq_group *next;
if (mask) {
next = qfq_ffs(q, mask);
if (qfq_gt(grp->F, next->F))
state |= EB;
}
return state;
}
static inline void
qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
{
q->bitmaps[dst] |= q->bitmaps[src] & mask;
q->bitmaps[src] &= ~mask;
}
static inline void
qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
{
unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
struct qfq_group *next;
if (mask) {
next = qfq_ffs(q, mask);
if (!qfq_gt(next->F, old_finish))
return;
}
mask = (1UL << index) - 1;
qfq_move_groups(q, mask, EB, ER);
qfq_move_groups(q, mask, IB, IR);
}
static inline void
qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
{
unsigned long mask, vslot, old_vslot;
vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
if (vslot != old_vslot) {
mask = (2ULL << (__fls(vslot ^ old_vslot))) - 1;
qfq_move_groups(q, mask, IR, ER);
qfq_move_groups(q, mask, IB, EB);
}
}
static inline void
qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
{
uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
cl->next = grp->slots[i];
grp->slots[i] = cl;
__set_bit(slot, &grp->full_slots);
}
static inline void
qfq_front_slot_remove(struct qfq_group *grp)
{
struct qfq_class **h = &grp->slots[grp->front];
*h = (*h)->next;
if (!*h)
__clear_bit(0, &grp->full_slots);
}
static inline struct qfq_class *
qfq_slot_scan(struct qfq_group *grp)
{
int i;
ND("grp %d full %x", grp->index, grp->full_slots);
if (!grp->full_slots)
return NULL;
i = ffs(grp->full_slots) - 1;
if (i > 0) {
grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
grp->full_slots >>= i;
}
return grp->slots[grp->front];
}
static inline void
qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
{
unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
(void)q;
grp->full_slots <<= i;
grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
}
static inline void
qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
{
bitmap ineligible;
ineligible = q->bitmaps[IR] | q->bitmaps[IB];
if (ineligible) {
if (!q->bitmaps[ER]) {
struct qfq_group *grp;
grp = qfq_ffs(q, ineligible);
if (qfq_gt(grp->S, q->V))
q->V = grp->S;
}
qfq_make_eligible(q, old_V);
}
}
static inline int
qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
struct qfq_class *cl)
{
(void)q;
cl->S = cl->F;
if (cl->_q.mq.head == NULL) {
qfq_front_slot_remove(grp);
} else {
unsigned int len;
uint64_t roundedS;
len = cl->_q.mq.head->m_pkthdr.len;
cl->F = cl->S + (uint64_t)len * cl->inv_w;
roundedS = qfq_round_down(cl->S, grp->slot_shift);
if (roundedS == grp->S)
return 0;
qfq_front_slot_remove(grp);
qfq_slot_insert(grp, cl, roundedS);
}
return 1;
}
static struct mbuf *
qfq_dequeue(struct dn_sch_inst *si)
{
struct qfq_sched *q = (struct qfq_sched *)(si + 1);
struct qfq_group *grp;
struct qfq_class *cl;
struct mbuf *m;
uint64_t old_V;
NO(q->loops++;)
if (!q->bitmaps[ER]) {
NO(if (q->queued)
dump_sched(q, "start dequeue");)
return NULL;
}
grp = qfq_ffs(q, q->bitmaps[ER]);
cl = grp->slots[grp->front];
m = dn_dequeue(&cl->_q);
if (!m) {
D("BUG/* non-workconserving leaf */");
return NULL;
}
NO(q->queued--;)
old_V = q->V;
q->V += (uint64_t)m->m_pkthdr.len * q->iwsum;
ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
if (qfq_update_class(q, grp, cl)) {
uint64_t old_F = grp->F;
cl = qfq_slot_scan(grp);
if (!cl) {
__clear_bit(grp->index, &q->bitmaps[ER]);
} else {
uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
unsigned int s;
if (grp->S == roundedS)
goto skip_unblock;
grp->S = roundedS;
grp->F = roundedS + (2ULL << grp->slot_shift);
__clear_bit(grp->index, &q->bitmaps[ER]);
s = qfq_calc_state(q, grp);
__set_bit(grp->index, &q->bitmaps[s]);
}
qfq_unblock_groups(q, grp->index, old_F);
}
skip_unblock:
qfq_update_eligible(q, old_V);
NO(if (!q->bitmaps[ER] && q->queued)
dump_sched(q, "end dequeue");)
return m;
}
static inline void
qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
{
unsigned long mask;
uint64_t limit, roundedF;
int slot_shift = cl->grp->slot_shift;
roundedF = qfq_round_down(cl->F, slot_shift);
limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
mask = mask_from(q->bitmaps[ER], cl->grp->index);
if (mask) {
struct qfq_group *next = qfq_ffs(q, mask);
if (qfq_gt(roundedF, next->F)) {
if (qfq_gt(limit, next->F))
cl->S = next->F;
else
cl->S = limit;
return;
}
}
cl->S = q->V;
} else {
cl->S = cl->F;
}
}
static int
qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
{
struct qfq_sched *q = (struct qfq_sched *)(si + 1);
struct qfq_group *grp;
struct qfq_class *cl = (struct qfq_class *)_q;
uint64_t roundedS;
int s;
NO(q->loops++;)
DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
_q, cl->inv_w, cl->grp->index);
if (m != _q->mq.head) {
if (dn_enqueue(_q, m, 0))
return 1;
NO(q->queued++;)
if (m != _q->mq.head)
return 0;
}
grp = cl->grp;
qfq_update_start(q, cl);
cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
roundedS = qfq_round_down(cl->S, grp->slot_shift);
if (grp->full_slots) {
if (!qfq_gt(grp->S, cl->S))
goto skip_update;
qfq_slot_rotate(q, grp, roundedS);
__clear_bit(grp->index, &q->bitmaps[IR]);
__clear_bit(grp->index, &q->bitmaps[IB]);
} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
q->V = roundedS;
grp->S = roundedS;
grp->F = roundedS + (2ULL << grp->slot_shift);
s = qfq_calc_state(q, grp);
__set_bit(grp->index, &q->bitmaps[s]);
ND("new state %d 0x%x", s, q->bitmaps[s]);
ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
skip_update:
qfq_slot_insert(grp, cl, roundedS);
return 0;
}
#if 0
static inline void
qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
struct qfq_class *cl, struct qfq_class **pprev)
{
unsigned int i, offset;
uint64_t roundedS;
roundedS = qfq_round_down(cl->S, grp->slot_shift);
offset = (roundedS - grp->S) >> grp->slot_shift;
i = (grp->front + offset) % QFQ_MAX_SLOTS;
#ifdef notyet
if (!pprev) {
pprev = &grp->slots[i];
while (*pprev && *pprev != cl)
pprev = &(*pprev)->next;
}
#endif
*pprev = cl->next;
if (!grp->slots[i])
__clear_bit(offset, &grp->full_slots);
}
static void
qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
struct qfq_class **pprev)
{
struct qfq_group *grp = &q->groups[cl->index];
unsigned long mask;
uint64_t roundedS;
int s;
cl->F = cl->S;
qfq_slot_remove(q, grp, cl, pprev);
if (!grp->full_slots) {
__clear_bit(grp->index, &q->bitmaps[IR]);
__clear_bit(grp->index, &q->bitmaps[EB]);
__clear_bit(grp->index, &q->bitmaps[IB]);
if (test_bit(grp->index, &q->bitmaps[ER]) &&
!(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
if (mask)
mask = ~((1UL << __fls(mask)) - 1);
else
mask = ~0UL;
qfq_move_groups(q, mask, EB, ER);
qfq_move_groups(q, mask, IB, IR);
}
__clear_bit(grp->index, &q->bitmaps[ER]);
} else if (!grp->slots[grp->front]) {
cl = qfq_slot_scan(grp);
roundedS = qfq_round_down(cl->S, grp->slot_shift);
if (grp->S != roundedS) {
__clear_bit(grp->index, &q->bitmaps[ER]);
__clear_bit(grp->index, &q->bitmaps[IR]);
__clear_bit(grp->index, &q->bitmaps[EB]);
__clear_bit(grp->index, &q->bitmaps[IB]);
grp->S = roundedS;
grp->F = roundedS + (2ULL << grp->slot_shift);
s = qfq_calc_state(q, grp);
__set_bit(grp->index, &q->bitmaps[s]);
}
}
qfq_update_eligible(q, q->V);
}
#endif
static int
qfq_new_fsk(struct dn_fsk *f)
{
ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
return 0;
}
static int
qfq_new_sched(struct dn_sch_inst *si)
{
struct qfq_sched *q = (struct qfq_sched *)(si + 1);
struct qfq_group *grp;
int i;
for (i = 0; i <= QFQ_MAX_INDEX; i++) {
grp = &q->groups[i];
grp->index = i;
grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
(QFQ_MAX_INDEX - i);
}
return 0;
}
static struct dn_alg qfq_desc = {
_SI( .type = ) DN_SCHED_QFQ,
_SI( .name = ) "QFQ",
_SI( .flags = ) DN_MULTIQUEUE,
_SI( .schk_datalen = ) 0,
_SI( .si_datalen = ) sizeof(struct qfq_sched),
_SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
_SI( .enqueue = ) qfq_enqueue,
_SI( .dequeue = ) qfq_dequeue,
_SI( .config = ) NULL,
_SI( .destroy = ) NULL,
_SI( .new_sched = ) qfq_new_sched,
_SI( .free_sched = ) NULL,
_SI( .new_fsk = ) qfq_new_fsk,
_SI( .free_fsk = ) NULL,
_SI( .new_queue = ) qfq_new_queue,
_SI( .free_queue = ) qfq_free_queue,
#ifdef NEW_AQM
_SI( .getconfig = ) NULL,
#endif
};
DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
#ifdef QFQ_DEBUG
static void
dump_groups(struct qfq_sched *q, uint32_t mask)
{
int i, j;
for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
struct qfq_group *g = &q->groups[i];
if (0 == (mask & (1<<i)))
continue;
for (j = 0; j < QFQ_MAX_SLOTS; j++) {
if (g->slots[j])
D(" bucket %d %p", j, g->slots[j]);
}
D("full_slots 0x%llx", (_P64)g->full_slots);
D(" %2d S 0x%20llx F 0x%llx %c", i,
(_P64)g->S, (_P64)g->F,
mask & (1<<i) ? '1' : '0');
}
}
static void
dump_sched(struct qfq_sched *q, const char *msg)
{
D("--- in %s: ---", msg);
D("loops %d queued %d V 0x%llx", q->loops, q->queued, (_P64)q->V);
D(" ER 0x%08x", (unsigned)q->bitmaps[ER]);
D(" EB 0x%08x", (unsigned)q->bitmaps[EB]);
D(" IR 0x%08x", (unsigned)q->bitmaps[IR]);
D(" IB 0x%08x", (unsigned)q->bitmaps[IB]);
dump_groups(q, 0xffffffff);
};
#endif