예제 #1
0
void acptboot_getconn(struct work_struct *work)
{
	mic_ctx_t *node_ctx;
	struct scif_portID data;
	scif_epd_t conn_epd;
	struct timespec tod;
	int proto;
	int version;
	int err;

	if ((err = scif_accept(acptboot_data->listen_epd, &data, &conn_epd,
						SCIF_ACCEPT_SYNC))) {
		pr_debug("ACPTBOOT: scif_accept_failed %d\n", err);
		return;

		//goto requeue_accept;
	}

	if (!data.node) {
		printk(KERN_ERR "ACPTBOOT: connect received from invalid dev %d\n", 
								-EINVAL);
		goto close_epd;
	}

	if ((err = scif_recv(conn_epd, &version, sizeof(version), SCIF_RECV_BLOCK)) != sizeof(version)) {
		printk(KERN_ERR "ACPTBOOT: failed to recieve version number err %d\n", err);
		goto close_epd;
	}

	if ((err = scif_recv(conn_epd, &proto, sizeof(proto), SCIF_RECV_BLOCK)) != sizeof(proto)) {
		printk(KERN_ERR "ACPTBOOT: failed to recieve proto id %d\n", err);
		goto close_epd;
	}

	switch (proto) {
	case ACPT_BOOTED:
		node_ctx = get_per_dev_ctx(data.node - 1);
		mic_setstate(node_ctx, MIC_ONLINE);
		node_ctx->boot_count++;

		proto = ACPT_BOOT_ACK;
		scif_send(conn_epd, &proto, sizeof(proto), SCIF_SEND_BLOCK);
		break;

	case ACPT_REQUEST_TIME:
		getnstimeofday(&tod);
		proto = ACPT_TIME_DATA;
		scif_send(conn_epd, &proto, sizeof(proto), SCIF_SEND_BLOCK);
		scif_send(conn_epd, &tod, sizeof(tod), SCIF_SEND_BLOCK);
		break;
	}

close_epd:
	if ((err = scif_close(conn_epd)))
		printk(KERN_ERR "ACPTBOOT: scif_close failed %d\n", err);

//requeue_accept:
	queue_work(acptboot_data->acptbootwq, &acptboot_data->acptbootwork);
}
예제 #2
0
static void *mca_btl_scif_connect_accept (void *arg)
{
    struct scif_pollepd pollepd = {.epd = mca_btl_scif_module.scif_fd, .events = SCIF_POLLIN, .revents = 0};
    int rc;

    BTL_VERBOSE(("btl/scif: listening for new connections"));

    /* listen for connections */
    while (1) {
        pollepd.revents = 0;

        rc = scif_poll (&pollepd, 1, -1);
        if (1 == rc) {
            if (SCIF_POLLIN != pollepd.revents) {
                break;
            }
            if (mca_btl_scif_module.exiting) {
                /* accept the connection so scif_connect() does not timeout */
                struct scif_portID peer;
                scif_epd_t newepd;
                scif_accept(mca_btl_scif_module.scif_fd, &peer, &newepd, SCIF_ACCEPT_SYNC);
                scif_close(newepd);
                break;
            }

            rc = mca_btl_scif_ep_connect_start_passive ();
            if (OPAL_SUCCESS != rc) {
                BTL_VERBOSE(("btl/scif: error accepting scif connection"));
                continue;
            }
        } else {
            break;
        }
    }

    BTL_VERBOSE(("btl/scif: stopped listening for new connections"));

    return NULL;
}

int mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl,
                            size_t nprocs, struct opal_proc_t **procs,
                            struct mca_btl_base_endpoint_t **peers) {
    /* do nothing for now */
    return OPAL_SUCCESS;
}

static int scif_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
{
    mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg;
    size_t size = (size_t)((uintptr_t) reg->bound - (uintptr_t) reg->base);
    int i;

    /* register the fragment with all connected endpoints */
    for (i = 0 ; i < (int) mca_btl_scif_module.endpoint_count ; ++i) {
        if ((off_t)-1 != scif_reg->handles[i].btl_handle.scif_offset &&
            MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
            (void) scif_unregister(mca_btl_scif_module.endpoints[i].scif_epd,
                                   scif_reg->handles[i].btl_handle.scif_offset, size);
        }
    }

    free (scif_reg->handles);

    return OPAL_SUCCESS;
}
예제 #3
0
파일: mpssd.c 프로젝트: CIRCL/mpss-daemon
void *
mic_credentials(void *arg)
{
	struct mic_info *mic;
	struct mpssd_info *mpssdi;
	struct jobs *job;
	struct jobs *jlist;
	struct scif_portID portID;
	struct passwd *pass;
	char *username = NULL;
	char cookie[MPSS_COOKIE_SIZE];
	int len;
	unsigned int proto;
	scif_epd_t lep;
	scif_epd_t dep;
	uid_t uid;
	int err;

	if ((lep = scif_open()) < 0) {
		mpsslog(PINFO, "Cannot open mpssd credentials SCIF listen port: %s\n",
			       strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_bind(lep, MPSSD_CRED) < 0) {
		mpsslog(PINFO, "Cannot bind to mpssd credentials SCIF PORT: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_listen(lep, 16) < 0) {
		mpsslog(PINFO, "Set Listen on mpssd credentials SCIF PORT fail: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	while (1) {
		if (scif_accept(lep, &portID, &dep, SCIF_ACCEPT_SYNC)) {
			if (errno != EINTR) {
				mpsslog(PINFO, "Wait for credentials request fail: %s\n", strerror(errno));
				scif_close(dep);
			}
			continue;
		}

		if ((err = scif_recv(dep, &uid, sizeof(uid), SCIF_RECV_BLOCK)) != sizeof(uid)) {
			mpsslog(PINFO, "Credential connect recieve error %s\n", strerror(errno));
			scif_close(dep);
			continue;
		}

		username = NULL;
		while ((pass = getpwent()) != NULL) {
			if (uid == pass->pw_uid) {
				username = pass->pw_name;
				break;
			}
		}

		endpwent();

		if (username == NULL) {
			mpsslog(PERROR, "User request unknown UID %d\n", uid);
			proto = CRED_FAIL_UNKNOWNUID;
			scif_send(dep, &proto, sizeof(proto), 0);
			scif_close(dep);
			continue;
		};

		if (get_cookie(pass, cookie) < 0) {
			proto = CRED_FAIL_READCOOKIE;
			scif_send(dep, &proto, sizeof(proto), 0);
			scif_close(dep);
			continue;
		}

		if ((job = malloc(sizeof(struct jobs))) == NULL) {
			proto = CRED_FAIL_MALLOC;
			scif_send(dep, &proto, sizeof(proto), 0);
			scif_close(dep);
			continue;
		}

		job->jobid = nextjobid++;
		job->dep = dep;
		job->cnt = 0;
		len = strlen(username);

		while (pthread_mutex_lock(&jobs_lock) != 0);

		for (mic = miclist; mic != NULL; mic = mic->next) {
			mpssdi = (struct mpssd_info *)mic->data;

			if (mpssdi->send_ep != -1) {
				job->cnt++;
				proto = REQ_CREDENTIAL;
				if ((scif_send(mpssdi->send_ep, &proto, sizeof(proto), 0)) < 0) {
					if (errno == ECONNRESET) {
						job->cnt--;
						continue;
					}
				}

				scif_send(mpssdi->send_ep, &job->jobid, sizeof(job->jobid), 0);
				scif_send(mpssdi->send_ep, &len, sizeof(len), 0);
				scif_send(mpssdi->send_ep, username, len, 0);
				len = sizeof(cookie);
				scif_send(mpssdi->send_ep, &len, sizeof(len), 0);
				scif_send(mpssdi->send_ep, cookie, len, SCIF_SEND_BLOCK);
			}
		}

		if (job->cnt == 0) {
			proto = CRED_SUCCESS;
			scif_send(job->dep, &proto, sizeof(proto), 0);
			scif_close(job->dep);
		} else {
			jlist = &gjobs;
			while (jlist->next)
				jlist = jlist->next;

			jlist->next = job;
			job->next = NULL;
		}
		while (pthread_mutex_unlock(&jobs_lock) != 0);
	}
}
예제 #4
0
파일: mpssd.c 프로젝트: CIRCL/mpss-daemon
void *
mic_monitor(void *arg)
{
	struct mic_info *mic;
	struct mpssd_info *mpssdi;
	pthread_attr_t attr;
	struct scif_portID sendID = {0, MPSSD_MONSEND};
	struct scif_portID recvID;
	scif_epd_t lep;
	scif_epd_t recv_ep;
	scif_epd_t send_ep;
	unsigned int proto;
	uint16_t send_port;
	uint16_t remote_port = 0;
	int err;

	if ((lep = scif_open()) < 0) {
		mpsslog(PINFO, "Cannot open mpssd monitor SCIF listen port: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_bind(lep, MPSSD_MONRECV) < 0) {
		mpsslog(PINFO, "Cannot bind to mpssd monitor SCIF PORT: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_listen(lep, 16) < 0) {
		mpsslog(PINFO, "Set Listen on mpssd monitor SCIF PORT fail: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	while (1) {
		if (scif_accept(lep, &recvID, &recv_ep, SCIF_ACCEPT_SYNC)) {
			if (errno != EINTR)
				mpsslog(PINFO, "Wait for card connect failed: %s\n", strerror(errno));
			sleep(1);
			continue;
		}

		if ((mic = mpss_find_micid_inlist(miclist, recvID.node - 1)) == NULL) {
			mpsslog(PINFO, "Cannot configure - node %d does not seem to exist\n",
				       recvID.node - 1);
			scif_close(recv_ep);
			continue;
		}

		mpssdi = (struct mpssd_info *)mic->data;

		if ((send_ep = scif_open()) < 0) {
			fprintf(logfp, "Failed to open SCIF: %s\n", strerror(errno));
			scif_close(recv_ep);
			pthread_exit((void *)1);
		}
		mpssdi->send_ep = send_ep;

		if ((err = scif_recv(recv_ep, &proto, sizeof(proto), SCIF_RECV_BLOCK)) != sizeof(proto)) {
			mpsslog(PINFO, "%s: MIC card mpssd daemon startup connection error %s\n",
					mic->name, strerror(errno));
			scif_close(recv_ep);
			mpssdi->recv_ep = -1;
			continue;
		}

		switch (proto) {
		case MONITOR_START:
			sendID.node = mic->id + 1;
			while ((send_port = scif_connect(send_ep, &sendID)) < 0) {
				fprintf(logfp, "Failed to connect to monitor thread on card: %s\n",
					strerror(errno));
				sleep(1);
			}

			// Over reliable connection, mpssd tells us which port number it uses
			// to talk back to us. If this port matches actual recv_ep remote port
			// then we know that recv_ep and send_ep reference the same client.
			// We also know that send_ep, references mpssd on mic, as port we
			// connect to on that endpoint requires privliges to listen on.
			if (scif_recv(send_ep, &remote_port, sizeof(remote_port), SCIF_RECV_BLOCK) < 0) {
				mpsslog(PINFO, "%s: MIC card mpssd daemon handshake error %s\n",
					mic->name, strerror(errno));
				scif_close(send_ep);
				scif_close(recv_ep);
				continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards
			}

			if (remote_port != recvID.port || sendID.node != recvID.node) {
				mpsslog(PINFO, "%s: Failed to authenticate connection with mic mpssd\n",
					mic->name);
				scif_close(send_ep);
				scif_close(recv_ep);
				continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards

			}

			// Similarily, provide info for the client, so that he can also verify
			// that both connections send_ep & recv_ep belong to us.
			if (scif_send(recv_ep, &send_port, sizeof(send_port), SCIF_SEND_BLOCK) < 0) {
				mpsslog(PINFO, "%s: MIC card mpssd daemon handshake error %s\n",
					mic->name, strerror(errno));
				scif_close(send_ep);
				scif_close(recv_ep);
				continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards

			}

			mpssdi->recv_ep = recv_ep;
			pthread_attr_init(&attr);
			pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
			pthread_create(&mpssdi->monitor_pth, &attr, monitor, mic);
			proto = MONITOR_START_ACK;
			scif_send(send_ep, &proto, sizeof(proto), SCIF_RECV_BLOCK);
			mpsslog(PINFO, "%s: Monitor connection established\n", mic->name);
			break;
		}
	}
}
예제 #5
0
파일: scif_init.c 프로젝트: dbrowneup/pmap
int MPID_nem_scif_vc_init(MPIDI_VC_t * vc)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3I_VC *vc_ch = &vc->ch;
    MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
    int ret;
    size_t s;
    scifconn_t *sc;
    off_t offset;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_VC_INIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_VC_INIT);

    vc->sendNoncontig_fn = MPID_nem_scif_SendNoncontig;
    vc_ch->iStartContigMsg = MPID_nem_scif_iStartContigMsg;
    vc_ch->iSendContig = MPID_nem_scif_iSendContig;

    vc_ch->next = NULL;
    vc_ch->prev = NULL;

    ASSIGN_SC_TO_VC(vc_scif, NULL);
    vc_scif->send_queue.head = vc_scif->send_queue.tail = NULL;
    vc_scif->sc = sc = &MPID_nem_scif_conns[vc->pg_rank];
    vc_scif->terminate = 0;
    sc->vc = vc;

    /* do the connection */
    if (vc->pg_rank < MPID_nem_scif_myrank) {
        sc->fd = scif_open();
        MPIU_ERR_CHKANDJUMP1(sc->fd == -1, mpi_errno, MPI_ERR_OTHER,
                             "**scif_open", "**scif_open %s", MPIU_Strerror(errno));
        mpi_errno = get_addr(vc, &sc->addr);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);
        ret = scif_connect(sc->fd, &sc->addr);
        MPIU_ERR_CHKANDJUMP1(ret == -1, mpi_errno, MPI_ERR_OTHER,
                             "**scif_connect", "**scif_connect %s", MPIU_Strerror(errno));
    }
    else {
        ret = scif_accept(listen_fd, &sc->addr, &sc->fd, SCIF_ACCEPT_SYNC);
        MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER,
                             "**scif_accept", "**scif_accept %s", MPIU_Strerror(errno));
    }
    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
    ret = MPID_nem_scif_init_shmsend(&sc->csend, sc->fd, vc->pg_rank);
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER,
                         "**scif_init_shmsend", "**scif_init_shmsend %s",
                         MPIU_Strerror(errno));

    /* Exchange offsets */
    s = scif_send(sc->fd, &sc->csend.offset, sizeof(off_t), SCIF_SEND_BLOCK);
    MPIU_ERR_CHKANDJUMP1(s != sizeof(off_t), mpi_errno, MPI_ERR_OTHER,
                         "**scif_send", "**scif_send %s", MPIU_Strerror(errno));
    s = scif_recv(sc->fd, &offset, sizeof(off_t), SCIF_RECV_BLOCK);
    MPIU_ERR_CHKANDJUMP1(s != sizeof(off_t), mpi_errno, MPI_ERR_OTHER,
                         "**scif_recv", "**scif_recv %s", MPIU_Strerror(errno));

    ret = MPID_nem_scif_init_shmrecv(&sc->crecv, sc->fd, offset, vc->pg_rank);
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER,
                         "**scif_init_shmrecv", "**scif_init_shmrecv %s",
                         MPIU_Strerror(errno));

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_VC_INIT);

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
예제 #6
0
int mca_btl_scif_ep_connect_start_passive (void) {
    mca_btl_base_endpoint_t *ep = NULL;
    opal_process_name_t remote_name;
    struct scif_portID port_id;
    unsigned int i;
    scif_epd_t epd;
    int rc;

    /* accept the connection request. if the endpoint is already connecting we
     * may close this endpoint and alloc mca_btl_scif_ep_connect_start_active
     * to finish the connection. */
    rc = scif_accept (mca_btl_scif_module.scif_fd, &port_id, &epd, SCIF_ACCEPT_SYNC);
    if (OPAL_UNLIKELY(0 > rc)) {
        BTL_VERBOSE(("error accepting connecton from scif peer. %d", errno));
        return OPAL_ERROR;
    }

    /* determine which peer sent the connection request */
    rc = scif_recv (epd, &remote_name, sizeof (remote_name), SCIF_RECV_BLOCK);
    if (OPAL_UNLIKELY(-1 == rc)) {
        BTL_VERBOSE(("error in scif_recv"));
        scif_close (epd);
        return OPAL_ERROR;
    }

    BTL_VERBOSE(("got connection request from vpid %d on port %u on node %u",
                 remote_name.vpid, port_id.port, port_id.node));

    for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
        if (mca_btl_scif_module.endpoints[i].peer_proc->proc_name.vpid ==
            remote_name.vpid) {
            ep = mca_btl_scif_module.endpoints + i;
            break;
        }
    }

    /* peer not found */
    if (i == mca_btl_scif_module.endpoint_count) {
        BTL_VERBOSE(("remote peer %d unknown", remote_name.vpid));
        scif_close (epd);
        return OPAL_ERROR;
    }

    /* similtaneous connections (active side) */
    if ((MCA_BTL_SCIF_EP_STATE_CONNECTING == ep->state &&
         ep->port_id.port < mca_btl_scif_module.port_id.port) ||
        MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) {
        BTL_VERBOSE(("active connection in progress. connection request from peer %d rejected", remote_name.vpid));
        scif_close (epd);
        return OPAL_SUCCESS;
    }

    opal_mutex_lock (&ep->lock);

    if (MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) {
        opal_mutex_unlock (&ep->lock);
        scif_close (epd);
        return OPAL_SUCCESS;
    }

    BTL_VERBOSE(("accepted connection from port %d", ep->port_id.port));

    ep->state    = MCA_BTL_SCIF_EP_STATE_CONNECTING;
    ep->scif_epd = epd;

    rc = mca_btl_scif_ep_connect_finish (ep, true);
    if (OPAL_SUCCESS != rc) {
        scif_close (ep->scif_epd);
        ep->scif_epd = -1;
        ep->state = MCA_BTL_SCIF_EP_STATE_INIT;
    }

    opal_mutex_unlock (&ep->lock);

    return rc;
}