diff --git a/src/ucp/core/ucp_context.c b/src/ucp/core/ucp_context.c index b9a261871af..d7ca3278b80 100644 --- a/src/ucp/core/ucp_context.c +++ b/src/ucp/core/ucp_context.c @@ -291,6 +291,12 @@ static ucs_config_field_t ucp_config_table[] = { "require out of band synchronization before destroying UCP resources.", ucs_offsetof(ucp_config_t, ctx.sockaddr_cm_enable), UCS_CONFIG_TYPE_TERNARY}, + {"LISTENER_BACKLOG", "auto", + "'auto' means that each transport would use its maximal allowed value.\n" + "If a value larger than what a transport supports is set, the backlog value\n" + "would be cut to that maximal value.", + ucs_offsetof(ucp_config_t, ctx.listener_backlog), UCS_CONFIG_TYPE_ULUNITS}, + {"PROTO_ENABLE", "n", "Experimental: enable new protocol selection logic", ucs_offsetof(ucp_config_t, ctx.proto_enable), UCS_CONFIG_TYPE_BOOL}, diff --git a/src/ucp/core/ucp_context.h b/src/ucp/core/ucp_context.h index 2bed2008d10..9614236f4db 100644 --- a/src/ucp/core/ucp_context.h +++ b/src/ucp/core/ucp_context.h @@ -100,6 +100,8 @@ typedef struct ucp_context_config { int unified_mode; /** Enable cm wireup-and-close protocol for client-server connections */ ucs_ternary_value_t sockaddr_cm_enable; + /** Maximal number of pending connection requests for a listener */ + size_t listener_backlog; /** Enable new protocol selection logic */ int proto_enable; } ucp_context_config_t; diff --git a/src/ucp/core/ucp_listener.c b/src/ucp/core/ucp_listener.c index b8c442bb2a6..971fd8095fb 100644 --- a/src/ucp/core/ucp_listener.c +++ b/src/ucp/core/ucp_listener.c @@ -242,9 +242,12 @@ ucp_listen_on_cm(ucp_listener_h listener, const ucp_listener_params_t *params) ucs_assert_always(num_cms > 0); uct_params.field_mask = UCT_LISTENER_PARAM_FIELD_CONN_REQUEST_CB | - UCT_LISTENER_PARAM_FIELD_USER_DATA; + UCT_LISTENER_PARAM_FIELD_USER_DATA | + UCT_LISTENER_PARAM_FIELD_BACKLOG; uct_params.conn_request_cb = ucp_cm_server_conn_request_cb; uct_params.user_data = listener; + uct_params.backlog = ucs_min((size_t)INT_MAX, + worker->context->config.ext.listener_backlog); listener->num_rscs = 0; uct_listeners = ucs_calloc(num_cms, sizeof(*uct_listeners), diff --git a/src/uct/base/uct_cm.c b/src/uct/base/uct_cm.c index 4c862a39ff1..c730f18d391 100644 --- a/src/uct/base/uct_cm.c +++ b/src/uct/base/uct_cm.c @@ -223,6 +223,29 @@ ucs_status_t uct_listener_reject(uct_listener_h listener, return listener->cm->ops->listener_reject(listener, conn_request); } +ucs_status_t uct_listener_backlog_adjust(const uct_listener_params_t *params, + int max_value, int *backlog) +{ + if (params->field_mask & UCT_LISTENER_PARAM_FIELD_BACKLOG) { + if (params->backlog > max_value) { + ucs_diag("configure value %d is greater than the max_value %d. " + "using max_value", params->backlog, max_value); + *backlog = max_value; + } else { + *backlog = params->backlog; + } + } else { + *backlog = max_value; + } + + if (*backlog <= 0) { + ucs_error("the backlog value cannot be zero or negative"); + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + #ifdef ENABLE_STATS static ucs_stats_class_t uct_cm_stats_class = { diff --git a/src/uct/base/uct_cm.h b/src/uct/base/uct_cm.h index 9544597465d..691cc06cc17 100644 --- a/src/uct/base/uct_cm.h +++ b/src/uct/base/uct_cm.h @@ -108,6 +108,9 @@ extern ucs_config_field_t uct_cm_config_table[]; UCS_CLASS_DECLARE(uct_cm_t, uct_cm_ops_t*, uct_iface_ops_t*, uct_worker_h, uct_component_h); +ucs_status_t uct_listener_backlog_adjust(const uct_listener_params_t *params, + int max_value, int *backlog); + ucs_status_t uct_cm_set_common_data(uct_cm_base_ep_t *ep, const uct_ep_params_t *params); diff --git a/src/uct/ib/rdmacm/rdmacm_listener.c b/src/uct/ib/rdmacm/rdmacm_listener.c index f785e0cf369..6832fd56979 100644 --- a/src/uct/ib/rdmacm/rdmacm_listener.c +++ b/src/uct/ib/rdmacm/rdmacm_listener.c @@ -11,6 +11,25 @@ #include "rdmacm_listener.h" +#define UCS_RDMACM_MAX_BACKLOG_PATH "/proc/sys/net/rdma_ucm/max_backlog" + + +static long ucs_rdmacm_max_backlog() +{ + static long max_backlog = 0; + + if ((max_backlog != 0) || + (ucs_read_file_number(&max_backlog, 1, UCS_RDMACM_MAX_BACKLOG_PATH) == UCS_OK)) { + ucs_assert(max_backlog <= INT_MAX); + } else { + ucs_diag("unable to read max_backlog value from %s file", + UCS_RDMACM_MAX_BACKLOG_PATH); + max_backlog = 1024; + } + + return max_backlog; +} + UCS_CLASS_INIT_FUNC(uct_rdmacm_listener_t, uct_cm_h cm, const struct sockaddr *saddr, socklen_t socklen, const uct_listener_params_t *params) @@ -41,8 +60,12 @@ UCS_CLASS_INIT_FUNC(uct_rdmacm_listener_t, uct_cm_h cm, goto err_destroy_id; } - backlog = (params->field_mask & UCT_LISTENER_PARAM_FIELD_BACKLOG) ? - params->backlog : SOMAXCONN; + status = uct_listener_backlog_adjust(params, ucs_rdmacm_max_backlog(), + &backlog); + if (status != UCS_OK) { + goto err_destroy_id; + } + if (rdma_listen(self->id, backlog)) { ucs_error("rdma_listen(id:=%p addr=%s backlog=%d) failed: %m", self->id, ucs_sockaddr_str(saddr, ip_port_str, diff --git a/src/uct/tcp/tcp_listener.c b/src/uct/tcp/tcp_listener.c index 3ad4c427132..347efd6d501 100644 --- a/src/uct/tcp/tcp_listener.c +++ b/src/uct/tcp/tcp_listener.c @@ -106,8 +106,11 @@ UCS_CLASS_INIT_FUNC(uct_tcp_listener_t, uct_cm_h cm, self->conn_request_cb = params->conn_request_cb; self->user_data = (params->field_mask & UCT_LISTENER_PARAM_FIELD_USER_DATA) ? params->user_data : NULL; - backlog = (params->field_mask & UCT_LISTENER_PARAM_FIELD_BACKLOG) ? - params->backlog : ucs_socket_max_conn(); + + status = uct_listener_backlog_adjust(params, ucs_socket_max_conn(), &backlog); + if (status != UCS_OK) { + goto err; + } status = ucs_socket_server_init(saddr, socklen, backlog, 0, self->sockcm->allow_addr_inuse,