int MPID_nem_scif_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p) { int mpi_errno = MPI_SUCCESS; int ret; int i; MPIU_CHKPMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_INIT); /* first make sure that our private fields in the vc fit into the * area provided */ MPIU_Assert(sizeof(MPID_nem_scif_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN); MPID_nem_scif_nranks = pg_p->size; MPID_nem_scif_myrank = pg_rank; /* set up listener socket */ if (MPID_nem_scif_myrank < MPID_nem_scif_nranks - 1) { listen_fd = scif_open(); MPIU_ERR_CHKANDJUMP1(listen_fd == -1, mpi_errno, MPI_ERR_OTHER, "**scif_open", "**scif_open %s", MPIU_Strerror(errno)); listen_port = scif_bind(listen_fd, 0); MPIU_ERR_CHKANDJUMP1(listen_port == -1, mpi_errno, MPI_ERR_OTHER, "**scif_bind", "**scif_bind %s", MPIU_Strerror(errno)); ret = scif_listen(listen_fd, MPID_nem_scif_nranks); MPIU_ERR_CHKANDJUMP1(ret == -1, mpi_errno, MPI_ERR_OTHER, "**scif_listen", "**scif_listen %s", MPIU_Strerror(errno)); } /* create business card */ mpi_errno = MPID_nem_scif_get_business_card(pg_rank, bc_val_p, val_max_sz_p); if (mpi_errno) MPIU_ERR_POP(mpi_errno); MPIU_CHKPMEM_MALLOC(MPID_nem_scif_conns, scifconn_t *, MPID_nem_scif_nranks * sizeof(scifconn_t), mpi_errno, "connection table"); memset(MPID_nem_scif_conns, 0, MPID_nem_scif_nranks * sizeof(scifconn_t)); for (i = 0; i < MPID_nem_scif_nranks; ++i) MPID_nem_scif_conns[i].fd = -1; MPIU_CHKPMEM_MALLOC(MPID_nem_scif_recv_buf, char *, MPID_NEM_SCIF_RECV_MAX_PKT_LEN, mpi_errno, "SCIF temporary buffer"); MPIU_CHKPMEM_COMMIT(); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_INIT); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; }
int mca_btl_scif_module_init (void) { int rc; /* create an endpoint to listen for connections */ mca_btl_scif_module.scif_fd = scif_open (); if (-1 == mca_btl_scif_module.scif_fd) { BTL_VERBOSE(("scif_open failed. errno = %d", errno)); return OPAL_ERROR; } /* bind the endpoint to a port */ mca_btl_scif_module.port_id.port = scif_bind (mca_btl_scif_module.scif_fd, 0); if (-1 == mca_btl_scif_module.port_id.port) { BTL_VERBOSE(("scif_bind failed. errno = %d", errno)); scif_close (mca_btl_scif_module.scif_fd); mca_btl_scif_module.scif_fd = -1; return OPAL_ERROR; } /* determine this processes node id */ rc = scif_get_nodeIDs (NULL, 0, &mca_btl_scif_module.port_id.node); if (-1 == rc) { BTL_VERBOSE(("btl/scif error getting node id of this node")); return OPAL_ERROR; } /* Listen for connections */ /* TODO - base the maximum backlog off something */ rc = scif_listen (mca_btl_scif_module.scif_fd, 64); if (-1 == rc) { BTL_VERBOSE(("scif_listen failed. errno = %d", errno)); scif_close (mca_btl_scif_module.scif_fd); mca_btl_scif_module.scif_fd = -1; return OPAL_ERROR; } BTL_VERBOSE(("btl/scif: listening @ port %u on node %u\n", mca_btl_scif_module.port_id.port, mca_btl_scif_module.port_id.node)); OBJ_CONSTRUCT(&mca_btl_scif_module.dma_frags, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_scif_module.eager_frags, opal_free_list_t); return OPAL_SUCCESS; }
void * mic_credentials(void *arg) { struct mic_info *mic; struct mpssd_info *mpssdi; struct jobs *job; struct jobs *jlist; struct scif_portID portID; struct passwd *pass; char *username = NULL; char cookie[MPSS_COOKIE_SIZE]; int len; unsigned int proto; scif_epd_t lep; scif_epd_t dep; uid_t uid; int err; if ((lep = scif_open()) < 0) { mpsslog(PINFO, "Cannot open mpssd credentials SCIF listen port: %s\n", strerror(errno)); pthread_exit((void *)1); } if (scif_bind(lep, MPSSD_CRED) < 0) { mpsslog(PINFO, "Cannot bind to mpssd credentials SCIF PORT: %s\n", strerror(errno)); pthread_exit((void *)1); } if (scif_listen(lep, 16) < 0) { mpsslog(PINFO, "Set Listen on mpssd credentials SCIF PORT fail: %s\n", strerror(errno)); pthread_exit((void *)1); } while (1) { if (scif_accept(lep, &portID, &dep, SCIF_ACCEPT_SYNC)) { if (errno != EINTR) { mpsslog(PINFO, "Wait for credentials request fail: %s\n", strerror(errno)); scif_close(dep); } continue; } if ((err = scif_recv(dep, &uid, sizeof(uid), SCIF_RECV_BLOCK)) != sizeof(uid)) { mpsslog(PINFO, "Credential connect recieve error %s\n", strerror(errno)); scif_close(dep); continue; } username = NULL; while ((pass = getpwent()) != NULL) { if (uid == pass->pw_uid) { username = pass->pw_name; break; } } endpwent(); if (username == NULL) { mpsslog(PERROR, "User request unknown UID %d\n", uid); proto = CRED_FAIL_UNKNOWNUID; scif_send(dep, &proto, sizeof(proto), 0); scif_close(dep); continue; }; if (get_cookie(pass, cookie) < 0) { proto = CRED_FAIL_READCOOKIE; scif_send(dep, &proto, sizeof(proto), 0); scif_close(dep); continue; } if ((job = malloc(sizeof(struct jobs))) == NULL) { proto = CRED_FAIL_MALLOC; scif_send(dep, &proto, sizeof(proto), 0); scif_close(dep); continue; } job->jobid = nextjobid++; job->dep = dep; job->cnt = 0; len = strlen(username); while (pthread_mutex_lock(&jobs_lock) != 0); for (mic = miclist; mic != NULL; mic = mic->next) { mpssdi = (struct mpssd_info *)mic->data; if (mpssdi->send_ep != -1) { job->cnt++; proto = REQ_CREDENTIAL; if ((scif_send(mpssdi->send_ep, &proto, sizeof(proto), 0)) < 0) { if (errno == ECONNRESET) { job->cnt--; continue; } } scif_send(mpssdi->send_ep, &job->jobid, sizeof(job->jobid), 0); scif_send(mpssdi->send_ep, &len, sizeof(len), 0); scif_send(mpssdi->send_ep, username, len, 0); len = sizeof(cookie); scif_send(mpssdi->send_ep, &len, sizeof(len), 0); scif_send(mpssdi->send_ep, cookie, len, SCIF_SEND_BLOCK); } } if (job->cnt == 0) { proto = CRED_SUCCESS; scif_send(job->dep, &proto, sizeof(proto), 0); scif_close(job->dep); } else { jlist = &gjobs; while (jlist->next) jlist = jlist->next; jlist->next = job; job->next = NULL; } while (pthread_mutex_unlock(&jobs_lock) != 0); } }
void * mic_monitor(void *arg) { struct mic_info *mic; struct mpssd_info *mpssdi; pthread_attr_t attr; struct scif_portID sendID = {0, MPSSD_MONSEND}; struct scif_portID recvID; scif_epd_t lep; scif_epd_t recv_ep; scif_epd_t send_ep; unsigned int proto; uint16_t send_port; uint16_t remote_port = 0; int err; if ((lep = scif_open()) < 0) { mpsslog(PINFO, "Cannot open mpssd monitor SCIF listen port: %s\n", strerror(errno)); pthread_exit((void *)1); } if (scif_bind(lep, MPSSD_MONRECV) < 0) { mpsslog(PINFO, "Cannot bind to mpssd monitor SCIF PORT: %s\n", strerror(errno)); pthread_exit((void *)1); } if (scif_listen(lep, 16) < 0) { mpsslog(PINFO, "Set Listen on mpssd monitor SCIF PORT fail: %s\n", strerror(errno)); pthread_exit((void *)1); } while (1) { if (scif_accept(lep, &recvID, &recv_ep, SCIF_ACCEPT_SYNC)) { if (errno != EINTR) mpsslog(PINFO, "Wait for card connect failed: %s\n", strerror(errno)); sleep(1); continue; } if ((mic = mpss_find_micid_inlist(miclist, recvID.node - 1)) == NULL) { mpsslog(PINFO, "Cannot configure - node %d does not seem to exist\n", recvID.node - 1); scif_close(recv_ep); continue; } mpssdi = (struct mpssd_info *)mic->data; if ((send_ep = scif_open()) < 0) { fprintf(logfp, "Failed to open SCIF: %s\n", strerror(errno)); scif_close(recv_ep); pthread_exit((void *)1); } mpssdi->send_ep = send_ep; if ((err = scif_recv(recv_ep, &proto, sizeof(proto), SCIF_RECV_BLOCK)) != sizeof(proto)) { mpsslog(PINFO, "%s: MIC card mpssd daemon startup connection error %s\n", mic->name, strerror(errno)); scif_close(recv_ep); mpssdi->recv_ep = -1; continue; } switch (proto) { case MONITOR_START: sendID.node = mic->id + 1; while ((send_port = scif_connect(send_ep, &sendID)) < 0) { fprintf(logfp, "Failed to connect to monitor thread on card: %s\n", strerror(errno)); sleep(1); } // Over reliable connection, mpssd tells us which port number it uses // to talk back to us. If this port matches actual recv_ep remote port // then we know that recv_ep and send_ep reference the same client. // We also know that send_ep, references mpssd on mic, as port we // connect to on that endpoint requires privliges to listen on. if (scif_recv(send_ep, &remote_port, sizeof(remote_port), SCIF_RECV_BLOCK) < 0) { mpsslog(PINFO, "%s: MIC card mpssd daemon handshake error %s\n", mic->name, strerror(errno)); scif_close(send_ep); scif_close(recv_ep); continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards } if (remote_port != recvID.port || sendID.node != recvID.node) { mpsslog(PINFO, "%s: Failed to authenticate connection with mic mpssd\n", mic->name); scif_close(send_ep); scif_close(recv_ep); continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards } // Similarily, provide info for the client, so that he can also verify // that both connections send_ep & recv_ep belong to us. if (scif_send(recv_ep, &send_port, sizeof(send_port), SCIF_SEND_BLOCK) < 0) { mpsslog(PINFO, "%s: MIC card mpssd daemon handshake error %s\n", mic->name, strerror(errno)); scif_close(send_ep); scif_close(recv_ep); continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards } mpssdi->recv_ep = recv_ep; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); pthread_create(&mpssdi->monitor_pth, &attr, monitor, mic); proto = MONITOR_START_ACK; scif_send(send_ep, &proto, sizeof(proto), SCIF_RECV_BLOCK); mpsslog(PINFO, "%s: Monitor connection established\n", mic->name); break; } } }
int acptboot_init(void) { int err, ret; acptboot_data = (acptboot_data_t *)kzalloc(sizeof(*acptboot_data), GFP_KERNEL); if (!acptboot_data) { printk(KERN_ERR "ACPTBOOT: memory allocation failure\n"); return -ENOMEM; } acptboot_data->listen_epd = scif_open(); if (!acptboot_data->listen_epd) { printk(KERN_ERR "ACPTBOOT: scif_open() failed!\n"); err = -ENOMEM; goto error; } err = scif_bind(acptboot_data->listen_epd, MIC_NOTIFY); if (err < 0) { pr_debug("ACPTBOOT: scif_bind() failed! %d\n", err); goto error; } acptboot_data->acptboot_pn = err; err = scif_listen(acptboot_data->listen_epd, ACPT_BACKLOG); if (err < 0) { pr_debug("scif_listen() failed! %d\n", err); goto error; } pr_debug("ACPT endpoint listening port %d\n", acptboot_data->acptboot_pn); // Create workqueue acptboot_data->acptbootwq = __mic_create_singlethread_workqueue( "ACPTBOOT_WQ"); if (!acptboot_data->acptbootwq) { printk(KERN_ERR "%s %d wq creation failed!\n", __func__, __LINE__); goto error; } INIT_WORK(&acptboot_data->acptbootwork, acptboot_getconn); queue_work(acptboot_data->acptbootwq, &acptboot_data->acptbootwork); return 0; error: if (acptboot_data->listen_epd) if ((ret = scif_close(acptboot_data->listen_epd)) < 0) pr_debug("ACPTBOOT: scif_close() failed! %d\n", ret); kfree(acptboot_data); return err; }