Exemple #1
0
int MPID_nem_scif_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    int i;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_INIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_INIT);

    /* first make sure that our private fields in the vc fit into the
     * area provided  */
    MPIU_Assert(sizeof(MPID_nem_scif_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN);

    MPID_nem_scif_nranks = pg_p->size;
    MPID_nem_scif_myrank = pg_rank;

    /* set up listener socket */
    if (MPID_nem_scif_myrank < MPID_nem_scif_nranks - 1) {
        listen_fd = scif_open();
        MPIU_ERR_CHKANDJUMP1(listen_fd == -1, mpi_errno, MPI_ERR_OTHER,
                             "**scif_open", "**scif_open %s", MPIU_Strerror(errno));

        listen_port = scif_bind(listen_fd, 0);
        MPIU_ERR_CHKANDJUMP1(listen_port == -1, mpi_errno, MPI_ERR_OTHER,
                             "**scif_bind", "**scif_bind %s", MPIU_Strerror(errno));

        ret = scif_listen(listen_fd, MPID_nem_scif_nranks);
        MPIU_ERR_CHKANDJUMP1(ret == -1, mpi_errno, MPI_ERR_OTHER,
                             "**scif_listen", "**scif_listen %s", MPIU_Strerror(errno));
    }

    /* create business card */
    mpi_errno = MPID_nem_scif_get_business_card(pg_rank, bc_val_p, val_max_sz_p);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

    MPIU_CHKPMEM_MALLOC(MPID_nem_scif_conns, scifconn_t *,
                        MPID_nem_scif_nranks * sizeof(scifconn_t), mpi_errno,
                        "connection table");
    memset(MPID_nem_scif_conns, 0, MPID_nem_scif_nranks * sizeof(scifconn_t));
    for (i = 0; i < MPID_nem_scif_nranks; ++i)
        MPID_nem_scif_conns[i].fd = -1;

    MPIU_CHKPMEM_MALLOC(MPID_nem_scif_recv_buf, char *,
                        MPID_NEM_SCIF_RECV_MAX_PKT_LEN, mpi_errno, "SCIF temporary buffer");
    MPIU_CHKPMEM_COMMIT();

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_INIT);
    return mpi_errno;
  fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}
Exemple #2
0
int mca_btl_scif_module_init (void)
{
    int rc;

    /* create an endpoint to listen for connections */
    mca_btl_scif_module.scif_fd = scif_open ();
    if (-1 == mca_btl_scif_module.scif_fd) {
        BTL_VERBOSE(("scif_open failed. errno = %d", errno));
        return OPAL_ERROR;
    }

    /* bind the endpoint to a port */
    mca_btl_scif_module.port_id.port = scif_bind (mca_btl_scif_module.scif_fd, 0);
    if (-1 == mca_btl_scif_module.port_id.port) {
        BTL_VERBOSE(("scif_bind failed. errno = %d", errno));
        scif_close (mca_btl_scif_module.scif_fd);
        mca_btl_scif_module.scif_fd = -1;
        return OPAL_ERROR;
    }

    /* determine this processes node id */
    rc = scif_get_nodeIDs (NULL, 0, &mca_btl_scif_module.port_id.node);
    if (-1 == rc) {
        BTL_VERBOSE(("btl/scif error getting node id of this node"));
        return OPAL_ERROR;
    }

    /* Listen for connections */
    /* TODO - base the maximum backlog off something */
    rc = scif_listen (mca_btl_scif_module.scif_fd, 64);
    if (-1 == rc) {
        BTL_VERBOSE(("scif_listen failed. errno = %d", errno));
        scif_close (mca_btl_scif_module.scif_fd);
        mca_btl_scif_module.scif_fd = -1;
        return OPAL_ERROR;
    }

    BTL_VERBOSE(("btl/scif: listening @ port %u on node %u\n",
                 mca_btl_scif_module.port_id.port, mca_btl_scif_module.port_id.node));

    OBJ_CONSTRUCT(&mca_btl_scif_module.dma_frags, opal_free_list_t);
    OBJ_CONSTRUCT(&mca_btl_scif_module.eager_frags, opal_free_list_t);

    return OPAL_SUCCESS;
}
Exemple #3
0
void *
mic_credentials(void *arg)
{
	struct mic_info *mic;
	struct mpssd_info *mpssdi;
	struct jobs *job;
	struct jobs *jlist;
	struct scif_portID portID;
	struct passwd *pass;
	char *username = NULL;
	char cookie[MPSS_COOKIE_SIZE];
	int len;
	unsigned int proto;
	scif_epd_t lep;
	scif_epd_t dep;
	uid_t uid;
	int err;

	if ((lep = scif_open()) < 0) {
		mpsslog(PINFO, "Cannot open mpssd credentials SCIF listen port: %s\n",
			       strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_bind(lep, MPSSD_CRED) < 0) {
		mpsslog(PINFO, "Cannot bind to mpssd credentials SCIF PORT: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_listen(lep, 16) < 0) {
		mpsslog(PINFO, "Set Listen on mpssd credentials SCIF PORT fail: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	while (1) {
		if (scif_accept(lep, &portID, &dep, SCIF_ACCEPT_SYNC)) {
			if (errno != EINTR) {
				mpsslog(PINFO, "Wait for credentials request fail: %s\n", strerror(errno));
				scif_close(dep);
			}
			continue;
		}

		if ((err = scif_recv(dep, &uid, sizeof(uid), SCIF_RECV_BLOCK)) != sizeof(uid)) {
			mpsslog(PINFO, "Credential connect recieve error %s\n", strerror(errno));
			scif_close(dep);
			continue;
		}

		username = NULL;
		while ((pass = getpwent()) != NULL) {
			if (uid == pass->pw_uid) {
				username = pass->pw_name;
				break;
			}
		}

		endpwent();

		if (username == NULL) {
			mpsslog(PERROR, "User request unknown UID %d\n", uid);
			proto = CRED_FAIL_UNKNOWNUID;
			scif_send(dep, &proto, sizeof(proto), 0);
			scif_close(dep);
			continue;
		};

		if (get_cookie(pass, cookie) < 0) {
			proto = CRED_FAIL_READCOOKIE;
			scif_send(dep, &proto, sizeof(proto), 0);
			scif_close(dep);
			continue;
		}

		if ((job = malloc(sizeof(struct jobs))) == NULL) {
			proto = CRED_FAIL_MALLOC;
			scif_send(dep, &proto, sizeof(proto), 0);
			scif_close(dep);
			continue;
		}

		job->jobid = nextjobid++;
		job->dep = dep;
		job->cnt = 0;
		len = strlen(username);

		while (pthread_mutex_lock(&jobs_lock) != 0);

		for (mic = miclist; mic != NULL; mic = mic->next) {
			mpssdi = (struct mpssd_info *)mic->data;

			if (mpssdi->send_ep != -1) {
				job->cnt++;
				proto = REQ_CREDENTIAL;
				if ((scif_send(mpssdi->send_ep, &proto, sizeof(proto), 0)) < 0) {
					if (errno == ECONNRESET) {
						job->cnt--;
						continue;
					}
				}

				scif_send(mpssdi->send_ep, &job->jobid, sizeof(job->jobid), 0);
				scif_send(mpssdi->send_ep, &len, sizeof(len), 0);
				scif_send(mpssdi->send_ep, username, len, 0);
				len = sizeof(cookie);
				scif_send(mpssdi->send_ep, &len, sizeof(len), 0);
				scif_send(mpssdi->send_ep, cookie, len, SCIF_SEND_BLOCK);
			}
		}

		if (job->cnt == 0) {
			proto = CRED_SUCCESS;
			scif_send(job->dep, &proto, sizeof(proto), 0);
			scif_close(job->dep);
		} else {
			jlist = &gjobs;
			while (jlist->next)
				jlist = jlist->next;

			jlist->next = job;
			job->next = NULL;
		}
		while (pthread_mutex_unlock(&jobs_lock) != 0);
	}
}
Exemple #4
0
void *
mic_monitor(void *arg)
{
	struct mic_info *mic;
	struct mpssd_info *mpssdi;
	pthread_attr_t attr;
	struct scif_portID sendID = {0, MPSSD_MONSEND};
	struct scif_portID recvID;
	scif_epd_t lep;
	scif_epd_t recv_ep;
	scif_epd_t send_ep;
	unsigned int proto;
	uint16_t send_port;
	uint16_t remote_port = 0;
	int err;

	if ((lep = scif_open()) < 0) {
		mpsslog(PINFO, "Cannot open mpssd monitor SCIF listen port: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_bind(lep, MPSSD_MONRECV) < 0) {
		mpsslog(PINFO, "Cannot bind to mpssd monitor SCIF PORT: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	if (scif_listen(lep, 16) < 0) {
		mpsslog(PINFO, "Set Listen on mpssd monitor SCIF PORT fail: %s\n", strerror(errno));
		pthread_exit((void *)1);
	}

	while (1) {
		if (scif_accept(lep, &recvID, &recv_ep, SCIF_ACCEPT_SYNC)) {
			if (errno != EINTR)
				mpsslog(PINFO, "Wait for card connect failed: %s\n", strerror(errno));
			sleep(1);
			continue;
		}

		if ((mic = mpss_find_micid_inlist(miclist, recvID.node - 1)) == NULL) {
			mpsslog(PINFO, "Cannot configure - node %d does not seem to exist\n",
				       recvID.node - 1);
			scif_close(recv_ep);
			continue;
		}

		mpssdi = (struct mpssd_info *)mic->data;

		if ((send_ep = scif_open()) < 0) {
			fprintf(logfp, "Failed to open SCIF: %s\n", strerror(errno));
			scif_close(recv_ep);
			pthread_exit((void *)1);
		}
		mpssdi->send_ep = send_ep;

		if ((err = scif_recv(recv_ep, &proto, sizeof(proto), SCIF_RECV_BLOCK)) != sizeof(proto)) {
			mpsslog(PINFO, "%s: MIC card mpssd daemon startup connection error %s\n",
					mic->name, strerror(errno));
			scif_close(recv_ep);
			mpssdi->recv_ep = -1;
			continue;
		}

		switch (proto) {
		case MONITOR_START:
			sendID.node = mic->id + 1;
			while ((send_port = scif_connect(send_ep, &sendID)) < 0) {
				fprintf(logfp, "Failed to connect to monitor thread on card: %s\n",
					strerror(errno));
				sleep(1);
			}

			// Over reliable connection, mpssd tells us which port number it uses
			// to talk back to us. If this port matches actual recv_ep remote port
			// then we know that recv_ep and send_ep reference the same client.
			// We also know that send_ep, references mpssd on mic, as port we
			// connect to on that endpoint requires privliges to listen on.
			if (scif_recv(send_ep, &remote_port, sizeof(remote_port), SCIF_RECV_BLOCK) < 0) {
				mpsslog(PINFO, "%s: MIC card mpssd daemon handshake error %s\n",
					mic->name, strerror(errno));
				scif_close(send_ep);
				scif_close(recv_ep);
				continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards
			}

			if (remote_port != recvID.port || sendID.node != recvID.node) {
				mpsslog(PINFO, "%s: Failed to authenticate connection with mic mpssd\n",
					mic->name);
				scif_close(send_ep);
				scif_close(recv_ep);
				continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards

			}

			// Similarily, provide info for the client, so that he can also verify
			// that both connections send_ep & recv_ep belong to us.
			if (scif_send(recv_ep, &send_port, sizeof(send_port), SCIF_SEND_BLOCK) < 0) {
				mpsslog(PINFO, "%s: MIC card mpssd daemon handshake error %s\n",
					mic->name, strerror(errno));
				scif_close(send_ep);
				scif_close(recv_ep);
				continue; // go back to next iteration of while(1), we cannot break the while loop because hosts mpssd can connect with multiple mic cards

			}

			mpssdi->recv_ep = recv_ep;
			pthread_attr_init(&attr);
			pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
			pthread_create(&mpssdi->monitor_pth, &attr, monitor, mic);
			proto = MONITOR_START_ACK;
			scif_send(send_ep, &proto, sizeof(proto), SCIF_RECV_BLOCK);
			mpsslog(PINFO, "%s: Monitor connection established\n", mic->name);
			break;
		}
	}
}
Exemple #5
0
int
acptboot_init(void)
{
	int err, ret;

	acptboot_data = (acptboot_data_t *)kzalloc(sizeof(*acptboot_data), GFP_KERNEL);

	if (!acptboot_data) {
		printk(KERN_ERR "ACPTBOOT: memory allocation failure\n");
		return -ENOMEM;
	}

	acptboot_data->listen_epd = scif_open();

	if (!acptboot_data->listen_epd) {
		printk(KERN_ERR "ACPTBOOT: scif_open() failed!\n");
		err = -ENOMEM;
		goto error;
	}

	err = scif_bind(acptboot_data->listen_epd, MIC_NOTIFY);
	if (err < 0) {
		pr_debug("ACPTBOOT: scif_bind() failed! %d\n", err);
		goto error;
	}

	acptboot_data->acptboot_pn = err;

	err = scif_listen(acptboot_data->listen_epd, ACPT_BACKLOG);
	if (err < 0) {
		pr_debug("scif_listen() failed! %d\n", err);
		goto error;

	}

	pr_debug("ACPT endpoint listening port %d\n", 
						acptboot_data->acptboot_pn);

	// Create workqueue
	acptboot_data->acptbootwq = __mic_create_singlethread_workqueue(
							"ACPTBOOT_WQ");

	if (!acptboot_data->acptbootwq) {
		printk(KERN_ERR "%s %d wq creation failed!\n", __func__, __LINE__);
		goto error;
	}

	INIT_WORK(&acptboot_data->acptbootwork, acptboot_getconn);
	queue_work(acptboot_data->acptbootwq, 
					&acptboot_data->acptbootwork);
	return 0;

error:

	if (acptboot_data->listen_epd)
		if ((ret = scif_close(acptboot_data->listen_epd)) < 0)
			pr_debug("ACPTBOOT: scif_close() failed! %d\n", ret);

	kfree(acptboot_data);

	return err;
}