/*1* Copyright (c) 2006 Oracle. All rights reserved.2*3* This software is available to you under a choice of one of two4* licenses. You may choose to be licensed under the terms of the GNU5* General Public License (GPL) Version 2, available from the file6* COPYING in the main directory of this source tree, or the7* OpenIB.org BSD license below:8*9* Redistribution and use in source and binary forms, with or10* without modification, are permitted provided that the following11* conditions are met:12*13* - Redistributions of source code must retain the above14* copyright notice, this list of conditions and the following15* disclaimer.16*17* - Redistributions in binary form must reproduce the above18* copyright notice, this list of conditions and the following19* disclaimer in the documentation and/or other materials20* provided with the distribution.21*22* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,23* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF24* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND25* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS26* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN27* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN28* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE29* SOFTWARE.30*31*/32#include <linux/kernel.h>33#include <linux/random.h>3435#include "rds.h"3637/*38* All of connection management is simplified by serializing it through39* work queues that execute in a connection managing thread.40*41* TCP wants to send acks through sendpage() in response to data_ready(),42* but it needs a process context to do so.43*44* The receive paths need to allocate but can't drop packets (!) so we have45* a thread around to block allocating if the receive fast path sees an46* allocation failure.47*/4849/* Grand Unified Theory of connection life cycle:50* At any point in time, the connection can be in one of these states:51* DOWN, CONNECTING, UP, DISCONNECTING, ERROR52*53* The following transitions are possible:54* ANY -> ERROR55* UP -> DISCONNECTING56* ERROR -> DISCONNECTING57* DISCONNECTING -> DOWN58* DOWN -> CONNECTING59* CONNECTING -> UP60*61* Transition to state DISCONNECTING/DOWN:62* - Inside the shutdown worker; synchronizes with xmit path63* through RDS_IN_XMIT, and with connection management callbacks64* via c_cm_lock.65*66* For receive callbacks, we rely on the underlying transport67* (TCP, IB/RDMA) to provide the necessary synchronisation.68*/69struct workqueue_struct *rds_wq;70EXPORT_SYMBOL_GPL(rds_wq);7172void rds_connect_complete(struct rds_connection *conn)73{74if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {75printk(KERN_WARNING "%s: Cannot transition to state UP, "76"current state is %d\n",77__func__,78atomic_read(&conn->c_state));79atomic_set(&conn->c_state, RDS_CONN_ERROR);80queue_work(rds_wq, &conn->c_down_w);81return;82}8384rdsdebug("conn %p for %pI4 to %pI4 complete\n",85conn, &conn->c_laddr, &conn->c_faddr);8687conn->c_reconnect_jiffies = 0;88set_bit(0, &conn->c_map_queued);89queue_delayed_work(rds_wq, &conn->c_send_w, 0);90queue_delayed_work(rds_wq, &conn->c_recv_w, 0);91}92EXPORT_SYMBOL_GPL(rds_connect_complete);9394/*95* This random exponential backoff is relied on to eventually resolve racing96* connects.97*98* If connect attempts race then both parties drop both connections and come99* here to wait for a random amount of time before trying again. Eventually100* the backoff range will be so much greater than the time it takes to101* establish a connection that one of the pair will establish the connection102* before the other's random delay fires.103*104* Connection attempts that arrive while a connection is already established105* are also considered to be racing connects. This lets a connection from106* a rebooted machine replace an existing stale connection before the transport107* notices that the connection has failed.108*109* We should *always* start with a random backoff; otherwise a broken connection110* will always take several iterations to be re-established.111*/112void rds_queue_reconnect(struct rds_connection *conn)113{114unsigned long rand;115116rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n",117conn, &conn->c_laddr, &conn->c_faddr,118conn->c_reconnect_jiffies);119120set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);121if (conn->c_reconnect_jiffies == 0) {122conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;123queue_delayed_work(rds_wq, &conn->c_conn_w, 0);124return;125}126127get_random_bytes(&rand, sizeof(rand));128rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",129rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies,130conn, &conn->c_laddr, &conn->c_faddr);131queue_delayed_work(rds_wq, &conn->c_conn_w,132rand % conn->c_reconnect_jiffies);133134conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2,135rds_sysctl_reconnect_max_jiffies);136}137138void rds_connect_worker(struct work_struct *work)139{140struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work);141int ret;142143clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags);144if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {145ret = conn->c_trans->conn_connect(conn);146rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",147conn, &conn->c_laddr, &conn->c_faddr, ret);148149if (ret) {150if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN))151rds_queue_reconnect(conn);152else153rds_conn_error(conn, "RDS: connect failed\n");154}155}156}157158void rds_send_worker(struct work_struct *work)159{160struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);161int ret;162163if (rds_conn_state(conn) == RDS_CONN_UP) {164ret = rds_send_xmit(conn);165rdsdebug("conn %p ret %d\n", conn, ret);166switch (ret) {167case -EAGAIN:168rds_stats_inc(s_send_immediate_retry);169queue_delayed_work(rds_wq, &conn->c_send_w, 0);170break;171case -ENOMEM:172rds_stats_inc(s_send_delayed_retry);173queue_delayed_work(rds_wq, &conn->c_send_w, 2);174default:175break;176}177}178}179180void rds_recv_worker(struct work_struct *work)181{182struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work);183int ret;184185if (rds_conn_state(conn) == RDS_CONN_UP) {186ret = conn->c_trans->recv(conn);187rdsdebug("conn %p ret %d\n", conn, ret);188switch (ret) {189case -EAGAIN:190rds_stats_inc(s_recv_immediate_retry);191queue_delayed_work(rds_wq, &conn->c_recv_w, 0);192break;193case -ENOMEM:194rds_stats_inc(s_recv_delayed_retry);195queue_delayed_work(rds_wq, &conn->c_recv_w, 2);196default:197break;198}199}200}201202void rds_shutdown_worker(struct work_struct *work)203{204struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);205206rds_conn_shutdown(conn);207}208209void rds_threads_exit(void)210{211destroy_workqueue(rds_wq);212}213214int rds_threads_init(void)215{216rds_wq = create_singlethread_workqueue("krdsd");217if (!rds_wq)218return -ENOMEM;219220return 0;221}222223224