/* Function handle async thread commands */ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_poll, opal_list_t *ignore_qp_err_list) { struct pollfd *async_pollfd_tmp; mca_btl_openib_async_cmd_t cmd; int fd,flags,j; /* Got command from main thread */ if (read(devices_poll->async_pollfd[0].fd, &cmd, sizeof(mca_btl_openib_async_cmd_t)) < 0) { BTL_ERROR(("Read failed [%d]",errno)); return OPAL_ERROR; } BTL_VERBOSE(("Got cmd %d", cmd.a_cmd)); if (OPENIB_ASYNC_CMD_FD_ADD == cmd.a_cmd) { fd = cmd.fd; BTL_VERBOSE(("Got fd %d", fd)); BTL_VERBOSE(("Adding device [%d] to async event poll[%d]", fd, devices_poll->active_poll_size)); flags = fcntl(fd, F_GETFL); if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0) { BTL_ERROR(("Failed to change file descriptor of async event")); return OPAL_ERROR; } if ((devices_poll->active_poll_size + 1) > devices_poll->poll_size) { devices_poll->poll_size+=devices_poll->poll_size; async_pollfd_tmp = malloc(sizeof(struct pollfd) * devices_poll->poll_size); if (NULL == async_pollfd_tmp) { BTL_ERROR(("Failed malloc: %s:%d. " "Fatal error, stoping asynch event thread" , __FILE__, __LINE__)); return OPAL_ERROR; } memcpy (async_pollfd_tmp,devices_poll->async_pollfd, sizeof(struct pollfd) * (devices_poll->active_poll_size)); free(devices_poll->async_pollfd); devices_poll->async_pollfd = async_pollfd_tmp; } devices_poll->async_pollfd[devices_poll->active_poll_size].fd = fd; devices_poll->async_pollfd[devices_poll->active_poll_size].events = POLLIN; devices_poll->async_pollfd[devices_poll->active_poll_size].revents = 0; devices_poll->active_poll_size++; if (OPAL_SUCCESS != send_command_comp(fd)) { return OPAL_ERROR; } } else if (OPENIB_ASYNC_CMD_FD_REMOVE == cmd.a_cmd) { bool fd_found = false; fd = cmd.fd; BTL_VERBOSE(("Got fd %d", fd)); /* Removing device from poll */ BTL_VERBOSE(("Removing device [%d] from async event poll [%d]", fd, devices_poll->active_poll_size)); if (devices_poll->active_poll_size > 1) { for (j=0; (j < devices_poll->active_poll_size || !fd_found); j++) { if (devices_poll->async_pollfd[j].fd == fd) { devices_poll->async_pollfd[j].fd = devices_poll->async_pollfd[devices_poll->active_poll_size-1].fd; devices_poll->async_pollfd[j].events = devices_poll->async_pollfd[devices_poll->active_poll_size-1].events; devices_poll->async_pollfd[j].revents = devices_poll->async_pollfd[devices_poll->active_poll_size-1].revents; fd_found = true; } } if (!fd_found) { BTL_ERROR(("Requested FD[%d] was not found in poll array",fd)); return OPAL_ERROR; } } devices_poll->active_poll_size--; if (OPAL_SUCCESS != send_command_comp(fd)) { return OPAL_ERROR; } } else if (OPENIB_ASYNC_IGNORE_QP_ERR == cmd.a_cmd) { mca_btl_openib_qp_list *new_qp; new_qp = OBJ_NEW(mca_btl_openib_qp_list); BTL_VERBOSE(("Ignore errors on QP %p", (void *)cmd.qp)); new_qp->qp = cmd.qp; opal_list_append(ignore_qp_err_list, (opal_list_item_t *)new_qp); send_command_comp(OPENIB_ASYNC_IGNORE_QP_ERR); } else if (OPENIB_ASYNC_THREAD_EXIT == cmd.a_cmd) { /* Got 0 - command to close the thread */ opal_list_item_t *item; BTL_VERBOSE(("Async event thread exit")); free(devices_poll->async_pollfd); return_status = OPAL_SUCCESS; while ((item = opal_list_remove_first(ignore_qp_err_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(ignore_qp_err_list); pthread_exit(&return_status); } return OPAL_SUCCESS; }
/* Function handle async thread commands */ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_poll) { struct pollfd *async_pollfd_tmp; int fd,flags,j; /* Got command from main thread */ if (read(devices_poll->async_pollfd[0].fd, &fd, sizeof(int)) < 0) { BTL_ERROR(("Read failed [%d]",errno)); return OMPI_ERROR; } BTL_VERBOSE(("GOT event from -> %d",fd)); if (fd > 0) { BTL_VERBOSE(("Adding device [%d] to async event poll[%d]", fd, devices_poll->active_poll_size)); flags = fcntl(fd, F_GETFL); if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0) { BTL_ERROR(("Failed to change file descriptor of async event")); return OMPI_ERROR; } if ((devices_poll->active_poll_size + 1) > devices_poll->poll_size) { devices_poll->poll_size+=devices_poll->poll_size; async_pollfd_tmp = malloc(sizeof(struct pollfd) * devices_poll->poll_size); if (NULL == async_pollfd_tmp) { BTL_ERROR(("Failed malloc: %s:%d. " "Fatal error, stoping asynch event thread" , __FILE__, __LINE__)); return OMPI_ERROR; } memcpy (async_pollfd_tmp,devices_poll->async_pollfd, sizeof(struct pollfd) * (devices_poll->active_poll_size)); free(devices_poll->async_pollfd); devices_poll->async_pollfd = async_pollfd_tmp; } devices_poll->async_pollfd[devices_poll->active_poll_size].fd = fd; devices_poll->async_pollfd[devices_poll->active_poll_size].events = POLLIN; devices_poll->async_pollfd[devices_poll->active_poll_size].revents = 0; devices_poll->active_poll_size++; if (OMPI_SUCCESS != send_command_comp(fd)) { return OMPI_ERROR; } } else if (fd < 0) { bool fd_found = false; /* Removing device from poll */ fd = -(fd); BTL_VERBOSE(("Removing device [%d] from async event poll [%d]", fd, devices_poll->active_poll_size)); if (devices_poll->active_poll_size > 1) { for (j=0; (j < devices_poll->active_poll_size || !fd_found); j++) { if (devices_poll->async_pollfd[j].fd == fd) { devices_poll->async_pollfd[j].fd = devices_poll->async_pollfd[devices_poll->active_poll_size-1].fd; devices_poll->async_pollfd[j].events = devices_poll->async_pollfd[devices_poll->active_poll_size-1].events; devices_poll->async_pollfd[j].revents = devices_poll->async_pollfd[devices_poll->active_poll_size-1].revents; fd_found = true; } } if (!fd_found) { BTL_ERROR(("Requested FD[%d] was not found in poll array",fd)); return OMPI_ERROR; } } devices_poll->active_poll_size--; if (OMPI_SUCCESS != send_command_comp(-(fd))) { return OMPI_ERROR; } } else { /* Got 0 - command to close the thread */ BTL_VERBOSE(("Async event thread exit")); free(devices_poll->async_pollfd); return_status = OMPI_SUCCESS; pthread_exit(&return_status); } return OMPI_SUCCESS; }