static void *util_ns_name_server_func(void *args) { struct util_ns *ns; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; struct addrinfo *res, *p; void *cleanup_args[2]; char *service; SOCKET listenfd = INVALID_SOCKET, connfd; int n, ret; struct util_ns_cmd cmd = (const struct util_ns_cmd){ 0 }; ns = (struct util_ns *)args; if (asprintf(&service, "%d", ns->ns_port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { free(service); return NULL; } for (p = res; p; p = p->ai_next) { listenfd = ofi_socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (listenfd != INVALID_SOCKET) { n = 1; (void) setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof(n)); if (!bind(listenfd, p->ai_addr, p->ai_addrlen)) break; ofi_close_socket(listenfd); listenfd = INVALID_SOCKET; } } freeaddrinfo(res); free(service); if (listenfd == INVALID_SOCKET) return NULL; if (util_ns_map_init(ns)) goto done; ret = listen(listenfd, 256); if (ret) goto done; cleanup_args[0] = (void *)(uintptr_t)listenfd; cleanup_args[1] = (void *)ns; pthread_cleanup_push(util_ns_name_server_cleanup, (void *)cleanup_args); while (1) { connfd = accept(listenfd, NULL, 0); if (connfd != INVALID_SOCKET) { /* Read service data */ ret = ofi_read_socket(connfd, &cmd, cmd_len); if (ret == cmd_len) { (void) util_ns_op_dispatcher(ns, &cmd, connfd); } ofi_close_socket(connfd); } } pthread_cleanup_pop(1); done: ofi_close_socket(listenfd); return NULL; } /* * Name server API: client side */ static int util_ns_connect_server(struct util_ns *ns, const char *server) { struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; struct addrinfo *res, *p; char *service; SOCKET sockfd = INVALID_SOCKET; int n; if (asprintf(&service, "%d", ns->ns_port) < 0) return -1; n = getaddrinfo(server, service, &hints, &res); if (n < 0) { free(service); return -1; } for (p = res; p; p = p->ai_next) { sockfd = ofi_socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (sockfd != INVALID_SOCKET) { if (!connect(sockfd, p->ai_addr, p->ai_addrlen)) break; ofi_close_socket(sockfd); sockfd = INVALID_SOCKET; } } freeaddrinfo(res); free(service); return sockfd; } int ofi_ns_add_local_name(struct util_ns *ns, void *service, void *name) { SOCKET sockfd; int ret; char *server = (ns->ns_hostname ? ns->ns_hostname : OFI_NS_DEFAULT_HOSTNAME); void *write_buf; size_t write_len = 0; struct util_ns_cmd cmd = { .op = OFI_UTIL_NS_ADD, .status = 0, }; write_buf = calloc(cmd_len + ns->service_len + ns->name_len, 1); if (!write_buf) { ret = -FI_ENOMEM; goto err1; } memcpy(write_buf, &cmd, cmd_len); write_len += cmd_len; memcpy((void *)((char *)write_buf + write_len), service, ns->service_len); write_len += ns->service_len; memcpy((void *)((char *)write_buf + write_len), name, ns->name_len); write_len += ns->name_len; sockfd = util_ns_connect_server(ns, server); if (sockfd == INVALID_SOCKET) { ret = -FI_ENODATA; goto err2; } ret = util_ns_write_socket_op(sockfd, write_buf, write_len); ret = ((ret == write_len) ? FI_SUCCESS : -FI_ENODATA); ofi_close_socket(sockfd); err2: free(write_buf); err1: return ret; } int ofi_ns_del_local_name(struct util_ns *ns, void *service, void *name) { SOCKET sockfd; int ret; const char *server_hostname = (ns->ns_hostname ? ns->ns_hostname : OFI_NS_DEFAULT_HOSTNAME); void *write_buf; size_t write_len = 0; struct util_ns_cmd cmd = { .op = OFI_UTIL_NS_DEL, .status = 0, }; write_buf = calloc(cmd_len + ns->service_len + ns->name_len, 1); if (!write_buf) { ret = -FI_ENOMEM; goto err1; } memcpy(write_buf, &cmd, cmd_len); write_len += cmd_len; memcpy((void *)((char *)write_buf + write_len), service, ns->service_len); write_len += ns->service_len; memcpy((void *)((char *)write_buf + write_len), name, ns->name_len); write_len += ns->name_len; sockfd = util_ns_connect_server(ns, server_hostname); if (sockfd == INVALID_SOCKET) { ret = -FI_ENODATA; goto err2; } ret = util_ns_write_socket_op(sockfd, write_buf, write_len); ret = ((ret == write_len) ? FI_SUCCESS : -FI_ENODATA); ofi_close_socket(sockfd); err2: free(write_buf); err1: return ret; } void *ofi_ns_resolve_name(struct util_ns *ns, const char *server_hostname, void *service) { void *dest_addr = NULL, *io_buf; size_t io_len = 0; SOCKET sockfd; ssize_t ret = 0; struct util_ns_cmd cmd = { .op = OFI_UTIL_NS_QUERY, .status = 0, }; sockfd = util_ns_connect_server(ns, server_hostname); if (sockfd == INVALID_SOCKET) goto err1; io_buf = calloc(cmd_len + ns->service_len, 1); if (!io_buf) goto err2; memcpy(io_buf, &cmd, cmd_len); io_len += cmd_len; memcpy((void *)((char *)io_buf + io_len), service, ns->service_len); io_len += ns->service_len; ret = util_ns_write_socket_op(sockfd, io_buf, io_len); if (ret < 0) goto err3; free(io_buf); io_len = ns->service_len + ns->name_len; io_buf = calloc(io_len, 1); if (!io_buf) goto err2; ret = util_ns_read_socket_op(sockfd, &cmd, cmd_len); if (ret < 0 || cmd.status) goto err3; ret = util_ns_read_socket_op(sockfd, io_buf, io_len); if (ret == io_len) { dest_addr = calloc(ns->name_len, 1); if (!dest_addr) goto err3; io_len = 0; memcpy(service, (void *)((char *)io_buf + io_len), ns->service_len); io_len += ns->service_len; memcpy(dest_addr, (void *)((char *)io_buf + io_len), ns->name_len); } err3: free(io_buf); err2: ofi_close_socket(sockfd); err1: return dest_addr; } /* * Name server API: server side */ void ofi_ns_start_server(struct util_ns *ns) { int ret; SOCKET sockfd; int sleep_usec = 1000; char *server_hostname = (ns->ns_hostname ? ns->ns_hostname : OFI_NS_DEFAULT_HOSTNAME); ofi_osd_init(); ret = pthread_create(&ns->ns_thread, NULL, util_ns_name_server_func, (void *)ns); if (ret) { /* * use the main thread's ID as invalid * value for the new thread */ ns->ns_thread = pthread_self(); } /* * Wait for the local name server to come up. It could be the thread * created above, or the thread created by another process on the same * node. The total wait time is about (1+2+4+...+8192)ms = 16 seconds. */ while (sleep_usec < 10000) { sockfd = util_ns_connect_server(ns, server_hostname); if (sockfd != INVALID_SOCKET) { ofi_close_socket(sockfd); return; } usleep(sleep_usec); sleep_usec *= 2; } }
void fi_ini(void) { char *param_val = NULL; pthread_mutex_lock(&ofi_ini_lock); if (ofi_init) goto unlock; fi_param_init(); fi_log_init(); fi_util_init(); ofi_osd_init(); fi_param_define(NULL, "provider", FI_PARAM_STRING, "Only use specified provider (default: all available)"); fi_param_define(NULL, "fork_unsafe", FI_PARAM_BOOL, "Whether use of fork() may be unsafe for some providers" " (default: no). Setting this to yes could improve" " performance at the expense of making fork() potentially" " unsafe"); fi_param_get_str(NULL, "provider", ¶m_val); fi_create_filter(&prov_filter, param_val); #ifdef HAVE_LIBDL int n = 0; char **dirs; char *provdir = NULL; void *dlhandle; /* If dlopen fails, assume static linking and just return without error */ dlhandle = dlopen(NULL, RTLD_NOW); if (dlhandle == NULL) { goto libdl_done; } dlclose(dlhandle); fi_param_define(NULL, "provider_path", FI_PARAM_STRING, "Search for providers in specific path (default: " PROVDLDIR ")"); fi_param_get_str(NULL, "provider_path", &provdir); if (!provdir) provdir = PROVDLDIR; dirs = split_and_alloc(provdir, ":"); if (dirs) { for (n = 0; dirs[n]; ++n) { fi_ini_dir(dirs[n]); } free_string_array(dirs); } libdl_done: #endif fi_register_provider(PSM2_INIT, NULL); fi_register_provider(PSM_INIT, NULL); fi_register_provider(USNIC_INIT, NULL); fi_register_provider(MLX_INIT, NULL); fi_register_provider(VERBS_INIT, NULL); fi_register_provider(GNI_INIT, NULL); fi_register_provider(RXM_INIT, NULL); fi_register_provider(BGQ_INIT, NULL); /* Initialize the socket(s) provider last. This will result in it being the least preferred provider. */ fi_register_provider(UDP_INIT, NULL); fi_register_provider(SOCKETS_INIT, NULL); /* Before you add ANYTHING here, read the comment above!!! */ /* Seriously, read it! */ ofi_init = 1; unlock: pthread_mutex_unlock(&ofi_ini_lock); }
void fi_ini(void) { char *param_val = NULL; pthread_mutex_lock(&common_locks.ini_lock); if (ofi_init) goto unlock; ofi_ordered_provs_init(); fi_param_init(); fi_log_init(); ofi_osd_init(); ofi_pmem_init(); ofi_perf_init(); ofi_hook_init(); fi_param_define(NULL, "provider", FI_PARAM_STRING, "Only use specified provider (default: all available)"); fi_param_define(NULL, "fork_unsafe", FI_PARAM_BOOL, "Whether use of fork() may be unsafe for some providers" " (default: no). Setting this to yes could improve" " performance at the expense of making fork() potentially" " unsafe"); fi_param_define(NULL, "universe_size", FI_PARAM_SIZE_T, "Defines the maximum number of processes that will be" " used by distribute OFI application. The provider uses" " this to optimize resource allocations" " (default: OFI service specific)"); fi_param_get_str(NULL, "provider", ¶m_val); ofi_create_filter(&prov_filter, param_val); #ifdef HAVE_LIBDL int n = 0; char **dirs; char *provdir = NULL; void *dlhandle; /* If dlopen fails, assume static linking and just return without error */ dlhandle = dlopen(NULL, RTLD_NOW); if (dlhandle == NULL) { goto libdl_done; } dlclose(dlhandle); fi_param_define(NULL, "provider_path", FI_PARAM_STRING, "Search for providers in specific path (default: " PROVDLDIR ")"); fi_param_get_str(NULL, "provider_path", &provdir); if (!provdir) provdir = PROVDLDIR; dirs = ofi_split_and_alloc(provdir, ":", NULL); if (dirs) { for (n = 0; dirs[n]; ++n) { ofi_ini_dir(dirs[n]); } ofi_free_string_array(dirs); } libdl_done: #endif ofi_register_provider(PSM2_INIT, NULL); ofi_register_provider(PSM_INIT, NULL); ofi_register_provider(USNIC_INIT, NULL); ofi_register_provider(MLX_INIT, NULL); ofi_register_provider(GNI_INIT, NULL); ofi_register_provider(BGQ_INIT, NULL); ofi_register_provider(NETDIR_INIT, NULL); ofi_register_provider(SHM_INIT, NULL); ofi_register_provider(RXM_INIT, NULL); ofi_register_provider(VERBS_INIT, NULL); //ofi_register_provider(MRAIL_INIT, NULL); ofi_register_provider(RSTREAM_INIT, NULL); { /* TODO: RXD is not stable for now. Disable it by default */ int enable_rxd = 0; fi_param_define(NULL, "rxd_enable", FI_PARAM_BOOL, "Enable RXD provider (default: no)"); fi_param_get_bool(NULL, "rxd_enable", &enable_rxd); if (enable_rxd) ofi_register_provider(RXD_INIT, NULL); } ofi_register_provider(UDP_INIT, NULL); ofi_register_provider(SOCKETS_INIT, NULL); ofi_register_provider(TCP_INIT, NULL); ofi_init = 1; unlock: pthread_mutex_unlock(&common_locks.ini_lock); }