/*-1* Copyright (c) 2007-20082* Swinburne University of Technology, Melbourne, Australia.3* Copyright (c) 2009-2010 Lawrence Stewart <[email protected]>4* Copyright (c) 2010 The FreeBSD Foundation5* All rights reserved.6*7* This software was developed at the Centre for Advanced Internet8* Architectures, Swinburne University of Technology, by Lawrence Stewart and9* James Healy, made possible in part by a grant from the Cisco University10* Research Program Fund at Community Foundation Silicon Valley.11*12* Portions of this software were developed at the Centre for Advanced13* Internet Architectures, Swinburne University of Technology, Melbourne,14* Australia by David Hayes under sponsorship from the FreeBSD Foundation.15*16* Redistribution and use in source and binary forms, with or without17* modification, are permitted provided that the following conditions18* are met:19* 1. Redistributions of source code must retain the above copyright20* notice, this list of conditions and the following disclaimer.21* 2. Redistributions in binary form must reproduce the above copyright22* notice, this list of conditions and the following disclaimer in the23* documentation and/or other materials provided with the distribution.24*25* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND26* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE27* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE28* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE29* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL30* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS31* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)32* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT33* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY34* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF35* SUCH DAMAGE.36*/3738/*39* This software was first released in 2007 by James Healy and Lawrence Stewart40* whilst working on the NewTCP research project at Swinburne University of41* Technology's Centre for Advanced Internet Architectures, Melbourne,42* Australia, which was made possible in part by a grant from the Cisco43* University Research Program Fund at Community Foundation Silicon Valley.44* More details are available at:45* http://caia.swin.edu.au/urp/newtcp/46*/4748#ifndef _NETINET_CC_CC_H_49#define _NETINET_CC_CC_H_5051#ifdef _KERNEL5253MALLOC_DECLARE(M_CC_MEM);5455/* Global CC vars. */56extern STAILQ_HEAD(cc_head, cc_algo) cc_list;57extern const int tcprexmtthresh;5859/* Per-netstack bits. */60VNET_DECLARE(struct cc_algo *, default_cc_ptr);61#define V_default_cc_ptr VNET(default_cc_ptr)6263VNET_DECLARE(int, cc_do_abe);64#define V_cc_do_abe VNET(cc_do_abe)6566VNET_DECLARE(int, cc_abe_frlossreduce);67#define V_cc_abe_frlossreduce VNET(cc_abe_frlossreduce)6869/* Define the new net.inet.tcp.cc sysctl tree. */70#ifdef _SYS_SYSCTL_H_71SYSCTL_DECL(_net_inet_tcp_cc);72#endif7374/* For CC modules that use hystart++ */75extern uint32_t hystart_lowcwnd;76extern uint32_t hystart_minrtt_thresh;77extern uint32_t hystart_maxrtt_thresh;78extern uint32_t hystart_n_rttsamples;79extern uint32_t hystart_css_growth_div;80extern uint32_t hystart_css_rounds;81extern uint32_t hystart_bblogs;8283/* CC housekeeping functions. */84int cc_register_algo(struct cc_algo *add_cc);85int cc_deregister_algo(struct cc_algo *remove_cc);86#endif /* _KERNEL */8788#if defined(_KERNEL) || defined(_WANT_TCPCB)89struct cc_var {90void *cc_data; /* Per-connection private CC algorithm data. */91int bytes_this_ack; /* # bytes acked by the current ACK. */92tcp_seq curack; /* Most recent ACK. */93uint32_t flags; /* Flags for cc_var (see below) */94struct tcpcb *tp; /* Pointer to tcpcb */95uint16_t nsegs; /* # segments coalesced into current chain. */96uint8_t labc; /* Dont use system abc use passed in */97};9899/* cc_var flags. */100#define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */101#define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */102#define CCF_USE_LOCAL_ABC 0x0004 /* Dont use the system l_abc val */103#define CCF_ACKNOW 0x0008 /* Will this ack be sent now? */104#define CCF_IPHDR_CE 0x0010 /* Does this packet set CE bit? */105#define CCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */106#define CCF_UNUSED1 0x0040107#define CCF_UNUSED2 0x0080108#define CCF_UNUSED3 0x0100109#define CCF_UNUSED4 0x0200110#define CCF_HYSTART_ALLOWED 0x0400 /* If the CC supports it Hystart is allowed */111#define CCF_HYSTART_CAN_SH_CWND 0x0800 /* Can hystart when going CSS -> CA slam the cwnd */112#define CCF_HYSTART_CONS_SSTH 0x1000 /* Should hystart use the more conservative ssthresh */113114#endif /* defined(_KERNEL) || defined(_WANT_TCPCB) */115typedef enum {116#if defined(_KERNEL) || defined(_WANT_TCPCB)117/* ACK types passed to the ack_received() hook. */118CC_ACK = 0x0001, /* Regular in sequence ACK. */119CC_DUPACK = 0x0002, /* Duplicate ACK. */120CC_PARTIALACK = 0x0004, /* Not yet. */121CC_SACK = 0x0008, /* Not yet. */122#endif /* defined(_KERNEL) || defined(_WANT_TCPCB) */123/* Congestion signal types passed to the cong_signal() hook. */124CC_ECN = 0x0100, /* ECN marked packet received. */125CC_RTO = 0x0200, /* RTO fired. */126CC_RTO_ERR = 0x0400, /* RTO fired in error. */127CC_NDUPACK = 0x0800, /* Threshold of dupack's reached. */128/*129* The highest order 8 bits (0x01000000 - 0x80000000) are reserved130* for CC algos to declare their own congestion signal types.131*/132CC_SIGPRIVMASK = 0xFF000000 /* Mask to check if sig is private. */133} ccsignal_t;134135#ifdef _KERNEL136/*137* Structure to hold data and function pointers that together represent a138* congestion control algorithm.139*/140struct cc_algo {141char name[TCP_CA_NAME_MAX];142143/* Init global module state on kldload. */144int (*mod_init)(void);145146/* Cleanup global module state on kldunload. */147int (*mod_destroy)(void);148149/* Return the size of the void pointer the CC needs for state */150size_t (*cc_data_sz)(void);151152/*153* Init CC state for a new control block. The CC154* module may be passed a NULL ptr indicating that155* it must allocate the memory. If it is passed a156* non-null pointer it is pre-allocated memory by157* the caller and the cb_init is expected to use that memory.158* It is not expected to fail if memory is passed in and159* all currently defined modules do not.160*/161int (*cb_init)(struct cc_var *ccv, void *ptr);162163/* Cleanup CC state for a terminating control block. */164void (*cb_destroy)(struct cc_var *ccv);165166/* Init variables for a newly established connection. */167void (*conn_init)(struct cc_var *ccv);168169/* Called on receipt of an ack. */170void (*ack_received)(struct cc_var *ccv, ccsignal_t type);171172/* Called on detection of a congestion signal. */173void (*cong_signal)(struct cc_var *ccv, ccsignal_t type);174175/* Called after exiting congestion recovery. */176void (*post_recovery)(struct cc_var *ccv);177178/* Called when data transfer resumes after an idle period. */179void (*after_idle)(struct cc_var *ccv);180181/* Called for an additional ECN processing apart from RFC3168. */182void (*ecnpkt_handler)(struct cc_var *ccv);183184/* Called when a new "round" begins, if the transport is tracking rounds. */185void (*newround)(struct cc_var *ccv, uint32_t round_cnt);186187/*188* Called when a RTT sample is made (fas = flight at send, if you dont have it189* send the cwnd in).190*/191void (*rttsample)(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas);192193/* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */194int (*ctl_output)(struct cc_var *, struct sockopt *, void *);195196STAILQ_ENTRY (cc_algo) entries;197u_int cc_refcount;198uint8_t flags;199};200201#define CC_MODULE_BEING_REMOVED 0x01 /* The module is being removed */202203/* Macro to obtain the CC algo's struct ptr. */204#define CC_ALGO(tp) ((tp)->t_cc)205206/* Macro to obtain the CC algo's data ptr. */207#define CC_DATA(tp) ((tp)->t_ccv.cc_data)208209/* Macro to obtain the system default CC algo's struct ptr. */210#define CC_DEFAULT_ALGO() V_default_cc_ptr211212extern struct rwlock cc_list_lock;213#define CC_LIST_LOCK_INIT() rw_init(&cc_list_lock, "cc_list")214#define CC_LIST_LOCK_DESTROY() rw_destroy(&cc_list_lock)215#define CC_LIST_RLOCK() rw_rlock(&cc_list_lock)216#define CC_LIST_RUNLOCK() rw_runlock(&cc_list_lock)217#define CC_LIST_WLOCK() rw_wlock(&cc_list_lock)218#define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock)219#define CC_LIST_LOCK_ASSERT() rw_assert(&cc_list_lock, RA_LOCKED)220221#define CC_ALGOOPT_LIMIT 2048222223/*224* These routines give NewReno behavior to the caller225* they require no state and can be used by any other CC226* module that wishes to use NewReno type behaviour (along227* with anything else they may add on, pre or post call).228*/229void newreno_cc_post_recovery(struct cc_var *);230void newreno_cc_after_idle(struct cc_var *);231void newreno_cc_cong_signal(struct cc_var *, ccsignal_t);232void newreno_cc_ack_received(struct cc_var *, ccsignal_t);233u_int newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var *ccv, uint32_t mss);234u_int newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv);235u_int newreno_cc_cwnd_in_slow_start(struct cc_var *ccv);236237/* Called to temporarily keep an algo from going away during change */238void cc_refer(struct cc_algo *algo);239/* Called to release the temporary hold */240void cc_release(struct cc_algo *algo);241242/* Called to attach a CC algorithm to a tcpcb */243void cc_attach(struct tcpcb *, struct cc_algo *);244/* Called to detach a CC algorithm from a tcpcb */245void cc_detach(struct tcpcb *);246247#endif /* _KERNEL */248#endif /* _NETINET_CC_CC_H_ */249250251