Beispiel #1
0
/* run-time CPU detection */
static UCS_F_NOOPTIMIZE void ucs_check_cpu_flags(void)
{
    char str[256];
    char *p_str;
    int cpu_flags;
    struct {
        const char* flag;
        ucs_cpu_flag_t value;
    } *p_flags,
    cpu_flags_array[] = {
        { "cmov", UCS_CPU_FLAG_CMOV },
        { "mmx", UCS_CPU_FLAG_MMX },
        { "mmx2", UCS_CPU_FLAG_MMX2 },
        { "sse", UCS_CPU_FLAG_SSE },
        { "sse2", UCS_CPU_FLAG_SSE2 },
        { "sse3", UCS_CPU_FLAG_SSE3 },
        { "ssse3", UCS_CPU_FLAG_SSSE3 },
        { "sse41", UCS_CPU_FLAG_SSE41 },
        { "sse42", UCS_CPU_FLAG_SSE42 },
        { "avx", UCS_CPU_FLAG_AVX },
        { "avx2", UCS_CPU_FLAG_AVX2 },
        { NULL, UCS_CPU_FLAG_UNKNOWN },
    };

    cpu_flags = ucs_arch_get_cpu_flag();
    if (UCS_CPU_FLAG_UNKNOWN == cpu_flags) {
        return ;
    }
    strncpy(str, UCS_PP_MAKE_STRING(CPU_FLAGS), sizeof(str) - 1);

    p_str = strtok(str, " |\t\n\r");
    while (p_str) {
        p_flags = cpu_flags_array;
        while (p_flags && p_flags->flag) {
            if (!strcmp(p_str, p_flags->flag)) {
                if (!(cpu_flags & p_flags->value)) {
                    fprintf(stderr, "[%s:%d] FATAL: UCX library was compiled with %s"
                            " but CPU does not support it.\n",
                            ucs_get_host_name(), getpid(), p_flags->flag);
                    exit(1);
                }
                break;
            }
            p_flags++;
        }
        if (NULL == p_flags->flag) {
            fprintf(stderr, "[%s:%d] FATAL: UCX library was compiled with %s"
                    " but CPU does not support it.\n",
                    ucs_get_host_name(), getpid(), p_str);
            exit(1);
        }
        p_str = strtok(NULL, " |\t\n\r");
    }
}
Beispiel #2
0
  {"ATOMIC_MODE", "guess",
   "Atomic operations synchronization mode.\n"
   " cpu    - atomic operations are consistent with respect to the CPU.\n"
   " device - atomic operations are performed on one of the transport devices,\n"
   "          and there is guarantee of consistency with respect to the CPU."
   " guess  - atomic operations mode is configured based on underlying\n"
   "          transport capabilities. If one of active transports supports\n"
   "          the DEVICE atomic mode, the DEVICE mode is selected.\n"
   "          Otherwise the CPU mode is selected.",
   ucs_offsetof(ucp_config_t, ctx.atomic_mode), UCS_CONFIG_TYPE_ENUM(ucp_atomic_modes)},

  {"LOG_DATA", "0",
   "Size of packet data that is dumped to the log system in debug mode (0 - nothing).",
   ucs_offsetof(ucp_config_t, ctx.log_data_size), UCS_CONFIG_TYPE_MEMUNITS},

  {"MAX_WORKER_NAME", UCS_PP_MAKE_STRING(UCP_WORKER_NAME_MAX),
   "Maximal length of worker name. Affects the size of worker address in debug builds.",
   ucs_offsetof(ucp_config_t, ctx.max_worker_name), UCS_CONFIG_TYPE_UINT},

  {NULL}
};

static ucp_tl_alias_t ucp_tl_aliases[] = {
  { "sm",    { "mm", "knem", "sysv", "posix", "cma", "xpmem", NULL } },
  { "shm",   { "mm", "knem", "sysv", "posix", "cma", "xpmem", NULL } },
  { "ib",    { "rc", "ud", "rc_mlx5", "ud_mlx5", NULL } },
  { "rc",    { "rc", "ud", NULL } },
  { "rc_x",  { "rc_mlx5", "ud_mlx5", NULL } },
  { "ud_x",  { "ud_mlx5", NULL } },
  { "dc_x",  { "dc_mlx5", NULL } },
  { "ugni",  { "ugni_smsg", "ugni_udt", "ugni_rdma", NULL } },
Beispiel #3
0
#include <ucs/sys/sys.h>
#include <ucs/debug/memtrack.h>
#include <ucs/type/class.h>
#include <cuda_runtime.h>
#include <cuda.h>

#define UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN 65536

static ucs_config_field_t uct_gdr_copy_md_config_table[] = {
    {"", "", NULL,
     ucs_offsetof(uct_gdr_copy_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)},

    {"RCACHE", "try", "Enable using memory registration cache",
     ucs_offsetof(uct_gdr_copy_md_config_t, enable_rcache), UCS_CONFIG_TYPE_TERNARY},

    {"", "RCACHE_ADDR_ALIGN=" UCS_PP_MAKE_STRING(UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN), NULL,
     ucs_offsetof(uct_gdr_copy_md_config_t, rcache),
     UCS_CONFIG_TYPE_TABLE(uct_md_config_rcache_table)},

    {"MEM_REG_OVERHEAD", "16us", "Memory registration overhead", /* TODO take default from device */
     ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.overhead), UCS_CONFIG_TYPE_TIME},

    {"MEM_REG_GROWTH", "0.06ns", "Memory registration growth rate", /* TODO take default from device */
     ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.growth), UCS_CONFIG_TYPE_TIME},

    {NULL}
};

static ucs_status_t uct_gdr_copy_md_query(uct_md_h md, uct_md_attr_t *md_attr)
{
    md_attr->cap.flags         = UCT_MD_FLAG_REG |