diff --git a/src/cart/README.env b/src/cart/README.env index 00f270d7a413..b90939c8c720 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -1,13 +1,10 @@ This file lists the environment variables used in CaRT. . D_PROVIDER (Deprecated: CRT_PHY_ADDR_STR) - It determines which mercury NA plugin to be used: + It determines which mercury NA plugin and transport to be used: - set it as "ofi+verbs;ofi_rxm" to use OFI verbs;ofi_rxm provider - - set it as "ofi+gni" to use OFI gni provider - set it as "sm" to use SM plugin which only works within single node - - set it as "ofi+tcp;ofi_rxm" to use OFI tcp;ofi_rxm provider. - - set it as "ofi+sockets" to use OFI sockets provider - NOTE: This provider is deprecated in favor of "ofi+tcp;ofi_rxm" + - set it as "ofi+tcp" to use OFI tcp provider. - by default (not set or set as any other value) it will use ofi tcp provider. @@ -205,3 +202,8 @@ This file lists the environment variables used in CaRT. start copying data in an effort to release multi-recv buffers. Copy will occur when at most D_MRECV_BUF_COPY buffers remain. + SWIM_TRAFFIC_CLASS + (server only) Select a traffic class for the SWIM protocol to use and prevent potential + traffic congestion. Available options are: "unspec" (default), "best_effort", + "low_latency", "bulk_data". + diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 33ef9a6c9b4e..26b54b52ec3d 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -863,6 +863,9 @@ crt_hg_class_init(crt_provider_t provider, int ctx_idx, bool primary, int iface_ init_info.request_post_incr = crt_gdata.cg_post_incr; init_info.multi_recv_op_max = crt_gdata.cg_mrecv_buf; init_info.multi_recv_copy_threshold = crt_gdata.cg_mrecv_buf_copy; + /* Separate SWIM traffic in an effort to prevent potential congestion. */ + if (crt_is_service() && ctx_idx == crt_gdata.cg_swim_crt_idx) + init_info.traffic_class = (enum na_traffic_class)crt_gdata.cg_swim_tc; hg_class = HG_Init_opt2(info_string, crt_is_service(), HG_VERSION(2, 4), &init_info); if (hg_class == NULL) { diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 21fc184d4469..48d2090a5b36 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -18,6 +18,10 @@ static volatile int gdata_init_flag; struct crt_plugin_gdata crt_plugin_gdata; static bool g_prov_settings_applied[CRT_PROV_COUNT]; +#define X(a, b) b, +static const char *const crt_tc_name[] = {CRT_TRAFFIC_CLASSES}; +#undef X + static void crt_lib_init(void) __attribute__((__constructor__)); @@ -237,18 +241,30 @@ crt_gdata_dump(void) DUMP_GDATA_FIELD("%d", cg_rpc_quota); } +static enum crt_traffic_class +crt_str_to_tc(const char *str) +{ + enum crt_traffic_class i = 0; + + while (str != NULL && strcmp(crt_tc_name[i], str) != 0 && i < CRT_TC_UNKNOWN) + i++; + + return i == CRT_TC_UNKNOWN ? CRT_TC_UNSPEC : i; +} + /* first step init - for initializing crt_gdata */ static int data_init(int server, crt_init_options_t *opt) { - uint32_t timeout = 0; - uint32_t credits; - uint32_t fi_univ_size = 0; - uint32_t mem_pin_enable = 0; - uint32_t is_secondary; - uint32_t post_init = CRT_HG_POST_INIT, post_incr = CRT_HG_POST_INCR; - unsigned int mrecv_buf = CRT_HG_MRECV_BUF; - unsigned int mrecv_buf_copy = 0; /* buf copy disabled by default */ - int rc = 0; + uint32_t timeout = 0; + uint32_t credits; + uint32_t fi_univ_size = 0; + uint32_t mem_pin_enable = 0; + uint32_t is_secondary; + uint32_t post_init = CRT_HG_POST_INIT, post_incr = CRT_HG_POST_INCR; + unsigned int mrecv_buf = CRT_HG_MRECV_BUF; + unsigned int mrecv_buf_copy = 0; /* buf copy disabled by default */ + char *swim_traffic_class = NULL; + int rc = 0; crt_env_dump(); @@ -261,6 +277,8 @@ static int data_init(int server, crt_init_options_t *opt) crt_gdata.cg_mrecv_buf = mrecv_buf; crt_env_get(D_MRECV_BUF_COPY, &mrecv_buf_copy); crt_gdata.cg_mrecv_buf_copy = mrecv_buf_copy; + crt_env_get(SWIM_TRAFFIC_CLASS, &swim_traffic_class); + crt_gdata.cg_swim_tc = crt_str_to_tc(swim_traffic_class); is_secondary = 0; /* Apply CART-890 workaround for server side only */ diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 38133e0cf170..c3eb9cae12cb 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -42,6 +42,17 @@ struct crt_na_config { char **noc_domain_str; /* Array of domains */ }; +#define CRT_TRAFFIC_CLASSES \ + X(CRT_TC_UNSPEC, "unspec") /* Leave it upon plugin to choose */ \ + X(CRT_TC_BEST_EFFORT, "best_effort") /* Best effort */ \ + X(CRT_TC_LOW_LATENCY, "low_latency") /* Low latency */ \ + X(CRT_TC_BULK_DATA, "bulk_data") /* Bulk data */ \ + X(CRT_TC_UNKNOWN, "unknown") /* Unknown */ + +#define X(a, b) a, +enum crt_traffic_class { CRT_TRAFFIC_CLASSES }; +#undef X + struct crt_prov_gdata { /** NA plugin type */ int cpg_provider; @@ -105,6 +116,9 @@ struct crt_gdata { /** global swim index for all servers */ int32_t cg_swim_crt_idx; + /** traffic class used by SWIM */ + enum crt_traffic_class cg_swim_tc; + /** credits limitation for #in-flight RPCs per target EP CTX */ uint32_t cg_credit_ep_ctx; @@ -220,6 +234,7 @@ struct crt_event_cb_priv { ENV(SWIM_PING_TIMEOUT) \ ENV(SWIM_PROTOCOL_PERIOD_LEN) \ ENV(SWIM_SUSPECT_TIMEOUT) \ + ENV_STR(SWIM_TRAFFIC_CLASS) \ ENV_STR(UCX_IB_FORK_INIT) /* uint env */