mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc) { mca_btl_openib_proc_t *ib_proc = NULL, *ib_proc_ret = NULL; size_t msg_size; uint32_t size; int rc, i, j; void *message; char *offset; int modex_message_size; mca_btl_openib_modex_message_t dummy; bool is_new = false; /* Check if we have already created a IB proc * structure for this ompi process */ ib_proc = ibproc_lookup_and_lock(proc); if (NULL != ib_proc) { /* Gotcha! */ return ib_proc; } /* All initialization has to be an atomic operation. we do the following assumption: * - we let all concurent threads to try to do the initialization; * - when one has finished it locks ib_lock and checks if corresponding * process is still missing; * - if so - new proc is added, otherwise - initialized proc struct is released. */ /* First time, gotta create a new IB proc * out of the opal_proc ... */ ib_proc = OBJ_NEW(mca_btl_openib_proc_t); if (NULL == ib_proc) { return NULL; } /* Initialize number of peer */ ib_proc->proc_endpoint_count = 0; ib_proc->proc_opal = proc; /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version, &proc->proc_name, &message, &msg_size); if (OPAL_SUCCESS != rc) { BTL_VERBOSE(("[%s:%d] opal_modex_recv failed for peer %s", __FILE__, __LINE__, OPAL_NAME_PRINT(proc->proc_name))); goto no_err_exit; } if (0 == msg_size) { goto no_err_exit; } /* Message was packed in btl_openib_component.c; the format is listed in a comment in that file */ modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy); /* Unpack the number of modules in the message */ offset = (char *) message; unpack8(&offset, &(ib_proc->proc_port_count)); BTL_VERBOSE(("unpack: %d btls", ib_proc->proc_port_count)); if (ib_proc->proc_port_count > 0) { ib_proc->proc_ports = (mca_btl_openib_proc_modex_t *) malloc(sizeof(mca_btl_openib_proc_modex_t) * ib_proc->proc_port_count); } else { ib_proc->proc_ports = NULL; } /* Loop over unpacking all the ports */ for (i = 0; i < ib_proc->proc_port_count; i++) { /* Unpack the modex comment message struct */ size = modex_message_size; memcpy(&(ib_proc->proc_ports[i].pm_port_info), offset, size); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT MCA_BTL_OPENIB_MODEX_MSG_NTOH(ib_proc->proc_ports[i].pm_port_info); #endif offset += size; BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d", i, (int)(offset-((char*)message)))); /* Unpack the number of CPCs that follow */ unpack8(&offset, &(ib_proc->proc_ports[i].pm_cpc_data_count)); BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)", i, ib_proc->proc_ports[i].pm_cpc_data_count, (int)(offset-((char*)message)))); ib_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) calloc(ib_proc->proc_ports[i].pm_cpc_data_count, sizeof(opal_btl_openib_connect_base_module_data_t)); if (NULL == ib_proc->proc_ports[i].pm_cpc_data) { goto err_exit; } /* Unpack the CPCs */ for (j = 0; j < ib_proc->proc_ports[i].pm_cpc_data_count; ++j) { uint8_t u8; opal_btl_openib_connect_base_module_data_t *cpcd; cpcd = ib_proc->proc_ports[i].pm_cpc_data + j; unpack8(&offset, &u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)", i, j, u8, (int)(offset-(char*)message))); cpcd->cbm_component = opal_btl_openib_connect_base_get_cpc_byindex(u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s", i, j, cpcd->cbm_component->cbc_name)); unpack8(&offset, &cpcd->cbm_priority); unpack8(&offset, &cpcd->cbm_modex_message_len); BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)", i, j, cpcd->cbm_priority, cpcd->cbm_modex_message_len, (int)(offset-(char*)message))); if (cpcd->cbm_modex_message_len > 0) { cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len); if (NULL == cpcd->cbm_modex_message) { BTL_ERROR(("Failed to malloc")); goto err_exit; } memcpy(cpcd->cbm_modex_message, offset, cpcd->cbm_modex_message_len); offset += cpcd->cbm_modex_message_len; BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)", i, j, ((uint32_t*)cpcd->cbm_modex_message)[0], ((uint32_t*)cpcd->cbm_modex_message)[1], (int)(offset-((char*)message)))); } } } if (0 == ib_proc->proc_port_count) { ib_proc->proc_endpoints = NULL; } else { ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**) malloc(ib_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*)); } if (NULL == ib_proc->proc_endpoints) { goto err_exit; } BTL_VERBOSE(("unpacking done!")); /* Finally add this process to the initialized procs list */ opal_mutex_lock(&mca_btl_openib_component.ib_lock); ib_proc_ret = ibproc_lookup_no_lock(proc); if (NULL == ib_proc_ret) { /* if process can't be found in this list - insert it locked * it is safe to lock ib_proc here because this thread is * the only one who knows about it so far */ opal_mutex_lock(&ib_proc->proc_lock); opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super); ib_proc_ret = ib_proc; is_new = true; } else { /* otherwise - release module_proc */ OBJ_RELEASE(ib_proc); } opal_mutex_unlock(&mca_btl_openib_component.ib_lock); /* if we haven't insert the process - lock it here so we * won't lock mca_btl_openib_component.ib_lock */ if( !is_new ){ opal_mutex_lock(&ib_proc_ret->proc_lock); } return ib_proc_ret; err_exit: BTL_ERROR(("%d: error exit from mca_btl_openib_proc_create", OPAL_PROC_MY_NAME.vpid)); no_err_exit: OBJ_RELEASE(ib_proc); return NULL; }
mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) { mca_btl_openib_proc_t* module_proc = NULL; size_t msg_size; uint32_t size; int rc, i, j; void *message; char *offset; int modex_message_size; mca_btl_openib_modex_message_t dummy; /* Check if we have already created a IB proc * structure for this ompi process */ module_proc = mca_btl_openib_proc_lookup_proc(proc); if (NULL != module_proc) { /* Gotcha! */ return module_proc; } /* Oops! First time, gotta create a new IB proc * out of the opal_proc ... */ module_proc = OBJ_NEW(mca_btl_openib_proc_t); /* Initialize number of peer */ module_proc->proc_endpoint_count = 0; module_proc->proc_opal = proc; /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version, proc, &message, &msg_size); if (OPAL_SUCCESS != rc) { BTL_ERROR(("[%s:%d] opal_modex_recv failed for peer %s", __FILE__, __LINE__, OPAL_NAME_PRINT(proc->proc_name))); OBJ_RELEASE(module_proc); return NULL; } if (0 == msg_size) { return NULL; } /* Message was packed in btl_openib_component.c; the format is listed in a comment in that file */ modex_message_size = ((char *) &(dummy.end)) - ((char*) &dummy); /* Unpack the number of modules in the message */ offset = (char *) message; unpack8(&offset, &(module_proc->proc_port_count)); BTL_VERBOSE(("unpack: %d btls", module_proc->proc_port_count)); if (module_proc->proc_port_count > 0) { module_proc->proc_ports = (mca_btl_openib_proc_modex_t *) malloc(sizeof(mca_btl_openib_proc_modex_t) * module_proc->proc_port_count); } else { module_proc->proc_ports = NULL; } /* Loop over unpacking all the ports */ for (i = 0; i < module_proc->proc_port_count; i++) { /* Unpack the modex comment message struct */ size = modex_message_size; memcpy(&(module_proc->proc_ports[i].pm_port_info), offset, size); #if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT MCA_BTL_OPENIB_MODEX_MSG_NTOH(module_proc->proc_ports[i].pm_port_info); #endif offset += size; BTL_VERBOSE(("unpacked btl %d: modex message, offset now %d", i, (int)(offset-((char*)message)))); /* Unpack the number of CPCs that follow */ unpack8(&offset, &(module_proc->proc_ports[i].pm_cpc_data_count)); BTL_VERBOSE(("unpacked btl %d: number of cpcs to follow %d (offset now %d)", i, module_proc->proc_ports[i].pm_cpc_data_count, (int)(offset-((char*)message)))); module_proc->proc_ports[i].pm_cpc_data = (opal_btl_openib_connect_base_module_data_t *) calloc(module_proc->proc_ports[i].pm_cpc_data_count, sizeof(opal_btl_openib_connect_base_module_data_t)); if (NULL == module_proc->proc_ports[i].pm_cpc_data) { return NULL; } /* Unpack the CPCs */ for (j = 0; j < module_proc->proc_ports[i].pm_cpc_data_count; ++j) { uint8_t u8; opal_btl_openib_connect_base_module_data_t *cpcd; cpcd = module_proc->proc_ports[i].pm_cpc_data + j; unpack8(&offset, &u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: index %d (offset now %d)", i, j, u8, (int)(offset-(char*)message))); cpcd->cbm_component = opal_btl_openib_connect_base_get_cpc_byindex(u8); BTL_VERBOSE(("unpacked btl %d: cpc %d: component %s", i, j, cpcd->cbm_component->cbc_name)); unpack8(&offset, &cpcd->cbm_priority); unpack8(&offset, &cpcd->cbm_modex_message_len); BTL_VERBOSE(("unpacked btl %d: cpc %d: priority %d, msg len %d (offset now %d)", i, j, cpcd->cbm_priority, cpcd->cbm_modex_message_len, (int)(offset-(char*)message))); if (cpcd->cbm_modex_message_len > 0) { cpcd->cbm_modex_message = malloc(cpcd->cbm_modex_message_len); if (NULL == cpcd->cbm_modex_message) { BTL_ERROR(("Failed to malloc")); return NULL; } memcpy(cpcd->cbm_modex_message, offset, cpcd->cbm_modex_message_len); offset += cpcd->cbm_modex_message_len; BTL_VERBOSE(("unpacked btl %d: cpc %d: blob unpacked %d %x (offset now %d)", i, j, ((uint32_t*)cpcd->cbm_modex_message)[0], ((uint32_t*)cpcd->cbm_modex_message)[1], (int)(offset-((char*)message)))); } } } if (0 == module_proc->proc_port_count) { module_proc->proc_endpoints = NULL; } else { module_proc->proc_endpoints = (mca_btl_base_endpoint_t**) malloc(module_proc->proc_port_count * sizeof(mca_btl_base_endpoint_t*)); } if (NULL == module_proc->proc_endpoints) { OBJ_RELEASE(module_proc); return NULL; } BTL_VERBOSE(("unpacking done!")); return module_proc; }