/* run-time CPU detection */ static UCS_F_NOOPTIMIZE void ucs_check_cpu_flags(void) { char str[256]; char *p_str; int cpu_flags; struct { const char* flag; ucs_cpu_flag_t value; } *p_flags, cpu_flags_array[] = { { "cmov", UCS_CPU_FLAG_CMOV }, { "mmx", UCS_CPU_FLAG_MMX }, { "mmx2", UCS_CPU_FLAG_MMX2 }, { "sse", UCS_CPU_FLAG_SSE }, { "sse2", UCS_CPU_FLAG_SSE2 }, { "sse3", UCS_CPU_FLAG_SSE3 }, { "ssse3", UCS_CPU_FLAG_SSSE3 }, { "sse41", UCS_CPU_FLAG_SSE41 }, { "sse42", UCS_CPU_FLAG_SSE42 }, { "avx", UCS_CPU_FLAG_AVX }, { "avx2", UCS_CPU_FLAG_AVX2 }, { NULL, UCS_CPU_FLAG_UNKNOWN }, }; cpu_flags = ucs_arch_get_cpu_flag(); if (UCS_CPU_FLAG_UNKNOWN == cpu_flags) { return ; } strncpy(str, UCS_PP_MAKE_STRING(CPU_FLAGS), sizeof(str) - 1); p_str = strtok(str, " |\t\n\r"); while (p_str) { p_flags = cpu_flags_array; while (p_flags && p_flags->flag) { if (!strcmp(p_str, p_flags->flag)) { if (!(cpu_flags & p_flags->value)) { fprintf(stderr, "[%s:%d] FATAL: UCX library was compiled with %s" " but CPU does not support it.\n", ucs_get_host_name(), getpid(), p_flags->flag); exit(1); } break; } p_flags++; } if (NULL == p_flags->flag) { fprintf(stderr, "[%s:%d] FATAL: UCX library was compiled with %s" " but CPU does not support it.\n", ucs_get_host_name(), getpid(), p_str); exit(1); } p_str = strtok(NULL, " |\t\n\r"); } }
{"ATOMIC_MODE", "guess", "Atomic operations synchronization mode.\n" " cpu - atomic operations are consistent with respect to the CPU.\n" " device - atomic operations are performed on one of the transport devices,\n" " and there is guarantee of consistency with respect to the CPU." " guess - atomic operations mode is configured based on underlying\n" " transport capabilities. If one of active transports supports\n" " the DEVICE atomic mode, the DEVICE mode is selected.\n" " Otherwise the CPU mode is selected.", ucs_offsetof(ucp_config_t, ctx.atomic_mode), UCS_CONFIG_TYPE_ENUM(ucp_atomic_modes)}, {"LOG_DATA", "0", "Size of packet data that is dumped to the log system in debug mode (0 - nothing).", ucs_offsetof(ucp_config_t, ctx.log_data_size), UCS_CONFIG_TYPE_MEMUNITS}, {"MAX_WORKER_NAME", UCS_PP_MAKE_STRING(UCP_WORKER_NAME_MAX), "Maximal length of worker name. Affects the size of worker address in debug builds.", ucs_offsetof(ucp_config_t, ctx.max_worker_name), UCS_CONFIG_TYPE_UINT}, {NULL} }; static ucp_tl_alias_t ucp_tl_aliases[] = { { "sm", { "mm", "knem", "sysv", "posix", "cma", "xpmem", NULL } }, { "shm", { "mm", "knem", "sysv", "posix", "cma", "xpmem", NULL } }, { "ib", { "rc", "ud", "rc_mlx5", "ud_mlx5", NULL } }, { "rc", { "rc", "ud", NULL } }, { "rc_x", { "rc_mlx5", "ud_mlx5", NULL } }, { "ud_x", { "ud_mlx5", NULL } }, { "dc_x", { "dc_mlx5", NULL } }, { "ugni", { "ugni_smsg", "ugni_udt", "ugni_rdma", NULL } },
#include <ucs/sys/sys.h> #include <ucs/debug/memtrack.h> #include <ucs/type/class.h> #include <cuda_runtime.h> #include <cuda.h> #define UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN 65536 static ucs_config_field_t uct_gdr_copy_md_config_table[] = { {"", "", NULL, ucs_offsetof(uct_gdr_copy_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, {"RCACHE", "try", "Enable using memory registration cache", ucs_offsetof(uct_gdr_copy_md_config_t, enable_rcache), UCS_CONFIG_TYPE_TERNARY}, {"", "RCACHE_ADDR_ALIGN=" UCS_PP_MAKE_STRING(UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN), NULL, ucs_offsetof(uct_gdr_copy_md_config_t, rcache), UCS_CONFIG_TYPE_TABLE(uct_md_config_rcache_table)}, {"MEM_REG_OVERHEAD", "16us", "Memory registration overhead", /* TODO take default from device */ ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.overhead), UCS_CONFIG_TYPE_TIME}, {"MEM_REG_GROWTH", "0.06ns", "Memory registration growth rate", /* TODO take default from device */ ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.growth), UCS_CONFIG_TYPE_TIME}, {NULL} }; static ucs_status_t uct_gdr_copy_md_query(uct_md_h md, uct_md_attr_t *md_attr) { md_attr->cap.flags = UCT_MD_FLAG_REG |