/* SCTP kernel implementation1* Copyright (c) 1999-2000 Cisco, Inc.2* Copyright (c) 1999-2001 Motorola, Inc.3* Copyright (c) 2001-2003 International Business Machines Corp.4* Copyright (c) 2001 Intel Corp.5* Copyright (c) 2001 La Monte H.P. Yarroll6*7* This file is part of the SCTP kernel implementation8*9* This module provides the abstraction for an SCTP tranport representing10* a remote transport address. For local transport addresses, we just use11* union sctp_addr.12*13* This SCTP implementation is free software;14* you can redistribute it and/or modify it under the terms of15* the GNU General Public License as published by16* the Free Software Foundation; either version 2, or (at your option)17* any later version.18*19* This SCTP implementation is distributed in the hope that it20* will be useful, but WITHOUT ANY WARRANTY; without even the implied21* ************************22* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.23* See the GNU General Public License for more details.24*25* You should have received a copy of the GNU General Public License26* along with GNU CC; see the file COPYING. If not, write to27* the Free Software Foundation, 59 Temple Place - Suite 330,28* Boston, MA 02111-1307, USA.29*30* Please send any bug reports or fixes you make to the31* email address(es):32* lksctp developers <[email protected]>33*34* Or submit a bug report through the following website:35* http://www.sf.net/projects/lksctp36*37* Written or modified by:38* La Monte H.P. Yarroll <[email protected]>39* Karl Knutson <[email protected]>40* Jon Grimm <[email protected]>41* Xingang Guo <[email protected]>42* Hui Huang <[email protected]>43* Sridhar Samudrala <[email protected]>44* Ardelle Fan <[email protected]>45*46* Any bugs reported given to us we will try to fix... any fixes shared will47* be incorporated into the next SCTP release.48*/4950#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt5152#include <linux/slab.h>53#include <linux/types.h>54#include <linux/random.h>55#include <net/sctp/sctp.h>56#include <net/sctp/sm.h>5758/* 1st Level Abstractions. */5960/* Initialize a new transport from provided memory. */61static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,62const union sctp_addr *addr,63gfp_t gfp)64{65/* Copy in the address. */66peer->ipaddr = *addr;67peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);68memset(&peer->saddr, 0, sizeof(union sctp_addr));6970/* From 6.3.1 RTO Calculation:71*72* C1) Until an RTT measurement has been made for a packet sent to the73* given destination transport address, set RTO to the protocol74* parameter 'RTO.Initial'.75*/76peer->rto = msecs_to_jiffies(sctp_rto_initial);7778peer->last_time_heard = jiffies;79peer->last_time_ecne_reduced = jiffies;8081peer->param_flags = SPP_HB_DISABLE |82SPP_PMTUD_ENABLE |83SPP_SACKDELAY_ENABLE;8485/* Initialize the default path max_retrans. */86peer->pathmaxrxt = sctp_max_retrans_path;8788INIT_LIST_HEAD(&peer->transmitted);89INIT_LIST_HEAD(&peer->send_ready);90INIT_LIST_HEAD(&peer->transports);9192setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,93(unsigned long)peer);94setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,95(unsigned long)peer);96setup_timer(&peer->proto_unreach_timer,97sctp_generate_proto_unreach_event, (unsigned long)peer);9899/* Initialize the 64-bit random nonce sent with heartbeat. */100get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));101102atomic_set(&peer->refcnt, 1);103104return peer;105}106107/* Allocate and initialize a new transport. */108struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,109gfp_t gfp)110{111struct sctp_transport *transport;112113transport = t_new(struct sctp_transport, gfp);114if (!transport)115goto fail;116117if (!sctp_transport_init(transport, addr, gfp))118goto fail_init;119120transport->malloced = 1;121SCTP_DBG_OBJCNT_INC(transport);122123return transport;124125fail_init:126kfree(transport);127128fail:129return NULL;130}131132/* This transport is no longer needed. Free up if possible, or133* delay until it last reference count.134*/135void sctp_transport_free(struct sctp_transport *transport)136{137transport->dead = 1;138139/* Try to delete the heartbeat timer. */140if (del_timer(&transport->hb_timer))141sctp_transport_put(transport);142143/* Delete the T3_rtx timer if it's active.144* There is no point in not doing this now and letting145* structure hang around in memory since we know146* the tranport is going away.147*/148if (timer_pending(&transport->T3_rtx_timer) &&149del_timer(&transport->T3_rtx_timer))150sctp_transport_put(transport);151152/* Delete the ICMP proto unreachable timer if it's active. */153if (timer_pending(&transport->proto_unreach_timer) &&154del_timer(&transport->proto_unreach_timer))155sctp_association_put(transport->asoc);156157sctp_transport_put(transport);158}159160/* Destroy the transport data structure.161* Assumes there are no more users of this structure.162*/163static void sctp_transport_destroy(struct sctp_transport *transport)164{165SCTP_ASSERT(transport->dead, "Transport is not dead", return);166167if (transport->asoc)168sctp_association_put(transport->asoc);169170sctp_packet_free(&transport->packet);171172dst_release(transport->dst);173kfree(transport);174SCTP_DBG_OBJCNT_DEC(transport);175}176177/* Start T3_rtx timer if it is not already running and update the heartbeat178* timer. This routine is called every time a DATA chunk is sent.179*/180void sctp_transport_reset_timers(struct sctp_transport *transport)181{182/* RFC 2960 6.3.2 Retransmission Timer Rules183*184* R1) Every time a DATA chunk is sent to any address(including a185* retransmission), if the T3-rtx timer of that address is not running186* start it running so that it will expire after the RTO of that187* address.188*/189190if (!timer_pending(&transport->T3_rtx_timer))191if (!mod_timer(&transport->T3_rtx_timer,192jiffies + transport->rto))193sctp_transport_hold(transport);194195/* When a data chunk is sent, reset the heartbeat interval. */196if (!mod_timer(&transport->hb_timer,197sctp_transport_timeout(transport)))198sctp_transport_hold(transport);199}200201/* This transport has been assigned to an association.202* Initialize fields from the association or from the sock itself.203* Register the reference count in the association.204*/205void sctp_transport_set_owner(struct sctp_transport *transport,206struct sctp_association *asoc)207{208transport->asoc = asoc;209sctp_association_hold(asoc);210}211212/* Initialize the pmtu of a transport. */213void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)214{215/* If we don't have a fresh route, look one up */216if (!transport->dst || transport->dst->obsolete > 1) {217dst_release(transport->dst);218transport->af_specific->get_dst(transport, &transport->saddr,219&transport->fl, sk);220}221222if (transport->dst) {223transport->pathmtu = dst_mtu(transport->dst);224} else225transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;226}227228/* this is a complete rip-off from __sk_dst_check229* the cookie is always 0 since this is how it's used in the230* pmtu code231*/232static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t)233{234struct dst_entry *dst = t->dst;235236if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) {237dst_release(t->dst);238t->dst = NULL;239return NULL;240}241242return dst;243}244245void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)246{247struct dst_entry *dst;248249if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {250pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",251__func__, pmtu,252SCTP_DEFAULT_MINSEGMENT);253/* Use default minimum segment size and disable254* pmtu discovery on this transport.255*/256t->pathmtu = SCTP_DEFAULT_MINSEGMENT;257} else {258t->pathmtu = pmtu;259}260261dst = sctp_transport_dst_check(t);262if (dst)263dst->ops->update_pmtu(dst, pmtu);264}265266/* Caches the dst entry and source address for a transport's destination267* address.268*/269void sctp_transport_route(struct sctp_transport *transport,270union sctp_addr *saddr, struct sctp_sock *opt)271{272struct sctp_association *asoc = transport->asoc;273struct sctp_af *af = transport->af_specific;274275af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));276277if (saddr)278memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));279else280af->get_saddr(opt, transport, &transport->fl);281282if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {283return;284}285if (transport->dst) {286transport->pathmtu = dst_mtu(transport->dst);287288/* Initialize sk->sk_rcv_saddr, if the transport is the289* association's active path for getsockname().290*/291if (asoc && (!asoc->peer.primary_path ||292(transport == asoc->peer.active_path)))293opt->pf->af->to_sk_saddr(&transport->saddr,294asoc->base.sk);295} else296transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;297}298299/* Hold a reference to a transport. */300void sctp_transport_hold(struct sctp_transport *transport)301{302atomic_inc(&transport->refcnt);303}304305/* Release a reference to a transport and clean up306* if there are no more references.307*/308void sctp_transport_put(struct sctp_transport *transport)309{310if (atomic_dec_and_test(&transport->refcnt))311sctp_transport_destroy(transport);312}313314/* Update transport's RTO based on the newly calculated RTT. */315void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)316{317/* Check for valid transport. */318SCTP_ASSERT(tp, "NULL transport", return);319320/* We should not be doing any RTO updates unless rto_pending is set. */321SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);322323if (tp->rttvar || tp->srtt) {324/* 6.3.1 C3) When a new RTT measurement R' is made, set325* RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|326* SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'327*/328329/* Note: The above algorithm has been rewritten to330* express rto_beta and rto_alpha as inverse powers331* of two.332* For example, assuming the default value of RTO.Alpha of333* 1/8, rto_alpha would be expressed as 3.334*/335tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)336+ ((abs(tp->srtt - rtt)) >> sctp_rto_beta);337tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)338+ (rtt >> sctp_rto_alpha);339} else {340/* 6.3.1 C2) When the first RTT measurement R is made, set341* SRTT <- R, RTTVAR <- R/2.342*/343tp->srtt = rtt;344tp->rttvar = rtt >> 1;345}346347/* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then348* adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.349*/350if (tp->rttvar == 0)351tp->rttvar = SCTP_CLOCK_GRANULARITY;352353/* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */354tp->rto = tp->srtt + (tp->rttvar << 2);355356/* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min357* seconds then it is rounded up to RTO.Min seconds.358*/359if (tp->rto < tp->asoc->rto_min)360tp->rto = tp->asoc->rto_min;361362/* 6.3.1 C7) A maximum value may be placed on RTO provided it is363* at least RTO.max seconds.364*/365if (tp->rto > tp->asoc->rto_max)366tp->rto = tp->asoc->rto_max;367368tp->rtt = rtt;369370/* Reset rto_pending so that a new RTT measurement is started when a371* new data chunk is sent.372*/373tp->rto_pending = 0;374375SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d "376"rttvar: %d, rto: %ld\n", __func__,377tp, rtt, tp->srtt, tp->rttvar, tp->rto);378}379380/* This routine updates the transport's cwnd and partial_bytes_acked381* parameters based on the bytes acked in the received SACK.382*/383void sctp_transport_raise_cwnd(struct sctp_transport *transport,384__u32 sack_ctsn, __u32 bytes_acked)385{386struct sctp_association *asoc = transport->asoc;387__u32 cwnd, ssthresh, flight_size, pba, pmtu;388389cwnd = transport->cwnd;390flight_size = transport->flight_size;391392/* See if we need to exit Fast Recovery first */393if (asoc->fast_recovery &&394TSN_lte(asoc->fast_recovery_exit, sack_ctsn))395asoc->fast_recovery = 0;396397/* The appropriate cwnd increase algorithm is performed if, and only398* if the cumulative TSN whould advanced and the congestion window is399* being fully utilized.400*/401if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||402(flight_size < cwnd))403return;404405ssthresh = transport->ssthresh;406pba = transport->partial_bytes_acked;407pmtu = transport->asoc->pathmtu;408409if (cwnd <= ssthresh) {410/* RFC 4960 7.2.1411* o When cwnd is less than or equal to ssthresh, an SCTP412* endpoint MUST use the slow-start algorithm to increase413* cwnd only if the current congestion window is being fully414* utilized, an incoming SACK advances the Cumulative TSN415* Ack Point, and the data sender is not in Fast Recovery.416* Only when these three conditions are met can the cwnd be417* increased; otherwise, the cwnd MUST not be increased.418* If these conditions are met, then cwnd MUST be increased419* by, at most, the lesser of 1) the total size of the420* previously outstanding DATA chunk(s) acknowledged, and421* 2) the destination's path MTU. This upper bound protects422* against the ACK-Splitting attack outlined in [SAVAGE99].423*/424if (asoc->fast_recovery)425return;426427if (bytes_acked > pmtu)428cwnd += pmtu;429else430cwnd += bytes_acked;431SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, "432"bytes_acked: %d, cwnd: %d, ssthresh: %d, "433"flight_size: %d, pba: %d\n",434__func__,435transport, bytes_acked, cwnd,436ssthresh, flight_size, pba);437} else {438/* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,439* upon each SACK arrival that advances the Cumulative TSN Ack440* Point, increase partial_bytes_acked by the total number of441* bytes of all new chunks acknowledged in that SACK including442* chunks acknowledged by the new Cumulative TSN Ack and by443* Gap Ack Blocks.444*445* When partial_bytes_acked is equal to or greater than cwnd446* and before the arrival of the SACK the sender had cwnd or447* more bytes of data outstanding (i.e., before arrival of the448* SACK, flightsize was greater than or equal to cwnd),449* increase cwnd by MTU, and reset partial_bytes_acked to450* (partial_bytes_acked - cwnd).451*/452pba += bytes_acked;453if (pba >= cwnd) {454cwnd += pmtu;455pba = ((cwnd < pba) ? (pba - cwnd) : 0);456}457SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: "458"transport: %p, bytes_acked: %d, cwnd: %d, "459"ssthresh: %d, flight_size: %d, pba: %d\n",460__func__,461transport, bytes_acked, cwnd,462ssthresh, flight_size, pba);463}464465transport->cwnd = cwnd;466transport->partial_bytes_acked = pba;467}468469/* This routine is used to lower the transport's cwnd when congestion is470* detected.471*/472void sctp_transport_lower_cwnd(struct sctp_transport *transport,473sctp_lower_cwnd_t reason)474{475struct sctp_association *asoc = transport->asoc;476477switch (reason) {478case SCTP_LOWER_CWND_T3_RTX:479/* RFC 2960 Section 7.2.3, sctpimpguide480* When the T3-rtx timer expires on an address, SCTP should481* perform slow start by:482* ssthresh = max(cwnd/2, 4*MTU)483* cwnd = 1*MTU484* partial_bytes_acked = 0485*/486transport->ssthresh = max(transport->cwnd/2,4874*asoc->pathmtu);488transport->cwnd = asoc->pathmtu;489490/* T3-rtx also clears fast recovery */491asoc->fast_recovery = 0;492break;493494case SCTP_LOWER_CWND_FAST_RTX:495/* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the496* destination address(es) to which the missing DATA chunks497* were last sent, according to the formula described in498* Section 7.2.3.499*500* RFC 2960 7.2.3, sctpimpguide Upon detection of packet501* losses from SACK (see Section 7.2.4), An endpoint502* should do the following:503* ssthresh = max(cwnd/2, 4*MTU)504* cwnd = ssthresh505* partial_bytes_acked = 0506*/507if (asoc->fast_recovery)508return;509510/* Mark Fast recovery */511asoc->fast_recovery = 1;512asoc->fast_recovery_exit = asoc->next_tsn - 1;513514transport->ssthresh = max(transport->cwnd/2,5154*asoc->pathmtu);516transport->cwnd = transport->ssthresh;517break;518519case SCTP_LOWER_CWND_ECNE:520/* RFC 2481 Section 6.1.2.521* If the sender receives an ECN-Echo ACK packet522* then the sender knows that congestion was encountered in the523* network on the path from the sender to the receiver. The524* indication of congestion should be treated just as a525* congestion loss in non-ECN Capable TCP. That is, the TCP526* source halves the congestion window "cwnd" and reduces the527* slow start threshold "ssthresh".528* A critical condition is that TCP does not react to529* congestion indications more than once every window of530* data (or more loosely more than once every round-trip time).531*/532if (time_after(jiffies, transport->last_time_ecne_reduced +533transport->rtt)) {534transport->ssthresh = max(transport->cwnd/2,5354*asoc->pathmtu);536transport->cwnd = transport->ssthresh;537transport->last_time_ecne_reduced = jiffies;538}539break;540541case SCTP_LOWER_CWND_INACTIVE:542/* RFC 2960 Section 7.2.1, sctpimpguide543* When the endpoint does not transmit data on a given544* transport address, the cwnd of the transport address545* should be adjusted to max(cwnd/2, 4*MTU) per RTO.546* NOTE: Although the draft recommends that this check needs547* to be done every RTO interval, we do it every hearbeat548* interval.549*/550transport->cwnd = max(transport->cwnd/2,5514*asoc->pathmtu);552break;553}554555transport->partial_bytes_acked = 0;556SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "557"%d ssthresh: %d\n", __func__,558transport, reason,559transport->cwnd, transport->ssthresh);560}561562/* Apply Max.Burst limit to the congestion window:563* sctpimpguide-05 2.14.2564* D) When the time comes for the sender to565* transmit new DATA chunks, the protocol parameter Max.Burst MUST566* first be applied to limit how many new DATA chunks may be sent.567* The limit is applied by adjusting cwnd as follows:568* if ((flightsize+ Max.Burst * MTU) < cwnd)569* cwnd = flightsize + Max.Burst * MTU570*/571572void sctp_transport_burst_limited(struct sctp_transport *t)573{574struct sctp_association *asoc = t->asoc;575u32 old_cwnd = t->cwnd;576u32 max_burst_bytes;577578if (t->burst_limited)579return;580581max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu);582if (max_burst_bytes < old_cwnd) {583t->cwnd = max_burst_bytes;584t->burst_limited = old_cwnd;585}586}587588/* Restore the old cwnd congestion window, after the burst had it's589* desired effect.590*/591void sctp_transport_burst_reset(struct sctp_transport *t)592{593if (t->burst_limited) {594t->cwnd = t->burst_limited;595t->burst_limited = 0;596}597}598599/* What is the next timeout value for this transport? */600unsigned long sctp_transport_timeout(struct sctp_transport *t)601{602unsigned long timeout;603timeout = t->rto + sctp_jitter(t->rto);604if (t->state != SCTP_UNCONFIRMED)605timeout += t->hbinterval;606timeout += jiffies;607return timeout;608}609610/* Reset transport variables to their initial values */611void sctp_transport_reset(struct sctp_transport *t)612{613struct sctp_association *asoc = t->asoc;614615/* RFC 2960 (bis), Section 5.2.4616* All the congestion control parameters (e.g., cwnd, ssthresh)617* related to this peer MUST be reset to their initial values618* (see Section 6.2.1)619*/620t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));621t->burst_limited = 0;622t->ssthresh = asoc->peer.i.a_rwnd;623t->rto = asoc->rto_initial;624t->rtt = 0;625t->srtt = 0;626t->rttvar = 0;627628/* Reset these additional varibles so that we have a clean629* slate.630*/631t->partial_bytes_acked = 0;632t->flight_size = 0;633t->error_count = 0;634t->rto_pending = 0;635t->hb_sent = 0;636637/* Initialize the state information for SFR-CACC */638t->cacc.changeover_active = 0;639t->cacc.cycling_changeover = 0;640t->cacc.next_tsn_at_change = 0;641t->cacc.cacc_saw_newack = 0;642}643644645