Exemple #1
0
/*
 * main loop of agent thread
 */
static void *
_agent(void * unused)
{
	eio_handle_t *pmi2_handle;
	eio_obj_t *tree_listen_obj, *task_obj;
	int i;
	
	pmi2_handle = eio_handle_create();

	//fd_set_nonblocking(tree_sock);
	tree_listen_obj = eio_obj_create(tree_sock, &tree_listen_ops,
					 (void *)(-1));
	eio_new_initial_obj(pmi2_handle, tree_listen_obj);
	
	/* for stepd, add the sockets to tasks */
	if (in_stepd()) {
		for (i = 0; i < job_info.ltasks; i ++) {
			task_obj = eio_obj_create(STEPD_PMI_SOCK(i), &task_ops,
						  (void*)(long)(i));
			eio_new_initial_obj(pmi2_handle, task_obj);
		}
		initialized = xmalloc(job_info.ltasks * sizeof(int));
		finalized = xmalloc(job_info.ltasks * sizeof(int));
	}
	
	eio_handle_mainloop(pmi2_handle);

	debug("mpi/pmi2: agent thread exit");

	eio_handle_destroy(pmi2_handle);
	return NULL;
}
Exemple #2
0
/*
 * TODO: we need to keep track of the "me"
 * structures created here, because we need to
 * free them in "pmixp_stepd_finalize"
 */
void pmixp_server_slurm_conn(int fd)
{
	eio_obj_t *obj;
	pmixp_conn_t *conn = NULL;

	PMIXP_DEBUG("Request from fd = %d", fd);
	pmixp_debug_hang(0);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);
	conn = pmixp_conn_new_temp(PMIXP_PROTO_SLURM, fd, _slurm_new_msg);

	/* try to process right here */
	pmixp_conn_progress_rcv(conn);
	if (!pmixp_conn_is_alive(conn)) {
		/* success, don't need this connection anymore */
		pmixp_conn_return(conn);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &slurm_peer_ops, (void *)conn);
	eio_new_obj(pmixp_info_io(), obj);
}
Exemple #3
0
void pmixp_server_direct_conn(int fd)
{
	eio_obj_t *obj;
	pmixp_conn_t *conn;
	PMIXP_DEBUG("Request from fd = %d", fd);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);
	pmixp_fd_set_nodelay(fd);
	conn = pmixp_conn_new_temp(PMIXP_PROTO_DIRECT, fd,
				   _direct_conn_establish);

	/* try to process right here */
	pmixp_conn_progress_rcv(conn);
	if (!pmixp_conn_is_alive(conn)) {
		/* success, don't need this connection anymore */
		pmixp_conn_return(conn);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &direct_peer_ops, (void *)conn);
	eio_new_obj(pmixp_info_io(), obj);
	/* wakeup this connection to get processed */
	eio_signal_wakeup(pmixp_info_io());
}
Exemple #4
0
/*
 * Receive the first message identifying initiator
 */
static void
_direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
{
	pmixp_io_engine_t *eng = pmixp_conn_get_eng(conn);
	pmixp_base_hdr_t *hdr = (pmixp_base_hdr_t *)_hdr;
	pmixp_dconn_t *dconn = NULL;
	pmixp_conn_t *new_conn;
	eio_obj_t *obj;
	int fd;

	fd = pmixp_io_detach(eng);

	dconn = pmixp_dconn_accept(hdr->nodeid, fd);
	if (!dconn) {
		/* connection was refused because we already
		 * have established connection
		 * It seems that some sort of race condition occured
		 */
		char *nodename = pmixp_info_job_host(hdr->nodeid);
		close(fd);
		PMIXP_ERROR("Failed to accept direct connection from %s",
			    nodename);
		xfree(nodename);
		return;
	}
	new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT,
					  pmixp_dconn_engine(dconn),
					  _direct_new_msg_conn,
					  _direct_return_connection, dconn);
	pmixp_dconn_unlock(dconn);
	obj = eio_obj_create(fd, &direct_peer_ops, (void *)new_conn);
	eio_new_obj(pmixp_info_io(), obj);
	/* wakeup this connection to get processed */
	eio_signal_wakeup(pmixp_info_io());
}
/*
 * TODO: we need to keep track of the "me"
 * structures created here, because we need to
 * free them in "pmixp_stepd_finalize"
 */
void pmix_server_new_conn(int fd)
{
	eio_obj_t *obj;
	PMIXP_DEBUG("Request from fd = %d", fd);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);

	pmixp_io_engine_t *me = xmalloc(sizeof(pmixp_io_engine_t));
	pmix_io_init(me, fd, srv_rcvd_header);
	/* We use slurm_forward_data to send message to stepd's
	 * SLURM will put user ID there. We need to skip it.
	 */
	pmix_io_rcvd_padding(me, sizeof(uint32_t));

	if( 2 == _process_message(me) ){
		/* connection was fully processed here */
		xfree(me);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &peer_ops, (void *)me);
	eio_new_obj(pmixp_info_io(), obj);
}
Exemple #6
0
Fichier : req.c Projet : VURM/slurm
int
msg_thr_create(slurmd_job_t *job)
{
	int fd;
	eio_obj_t *eio_obj;
	pthread_attr_t attr;
	int rc = SLURM_SUCCESS, retries = 0;
	errno = 0;
	fd = _domain_socket_create(conf->spooldir, conf->node_name,
				   job->jobid, job->stepid);
	if (fd == -1)
		return SLURM_ERROR;

	fd_set_nonblocking(fd);

	eio_obj = eio_obj_create(fd, &msg_socket_ops, (void *)job);
	job->msg_handle = eio_handle_create();
	eio_new_initial_obj(job->msg_handle, eio_obj);

	slurm_attr_init(&attr);

	while (pthread_create(&job->msgid, &attr,
			      &_msg_thr_internal, (void *)job)) {
		error("msg_thr_create: pthread_create error %m");
		if (++retries > MAX_RETRIES) {
			error("msg_thr_create: Can't create pthread");
			rc = SLURM_ERROR;
			break;
		}
		usleep(10);	/* sleep and again */
	}

	slurm_attr_destroy(&attr);

	return rc;
}
Exemple #7
0
static int _process_extended_hdr(pmixp_base_hdr_t *hdr, Buf buf)
{
	char nhdr[PMIXP_BASE_HDR_MAX];
	bool send_init = false;
	size_t dsize = 0, hsize = 0;
	pmixp_dconn_t *dconn;
	_direct_proto_message_t *init_msg = NULL;
	int rc = SLURM_SUCCESS;
	char *ep_data = NULL;
	uint32_t ep_len = 0;

	dconn = pmixp_dconn_lock(hdr->nodeid);
	if (!dconn) {
		/* Should not happen */
		xassert( dconn );
		abort();
	}

	/* Retrieve endpoint information */
	_base_hdr_unpack_ext(buf, &ep_data, &ep_len);

	/* Check if init message is required to establish
	 * the connection
	 */
	if (!pmixp_dconn_require_connect(dconn, &send_init)) {
		goto unlock;
	}

	if (send_init) {
		Buf buf_init = pmixp_server_buf_new();
		pmixp_base_hdr_t bhdr;
		init_msg = xmalloc(sizeof(*init_msg));

		PMIXP_BASE_HDR_SETUP(bhdr, PMIXP_MSG_INIT_DIRECT, 0, buf_init);
		bhdr.ext_flag = 1;
		hsize = _direct_hdr_pack(&bhdr, nhdr);

		init_msg->sent_cb = pmixp_server_sent_buf_cb;
		init_msg->cbdata = buf_init;
		init_msg->hdr = bhdr;
		init_msg->buffer = _buf_finalize(buf_init, nhdr, hsize,
						 &dsize);
		init_msg->buf_ptr = buf_init;
	}

	rc = pmixp_dconn_connect(dconn, ep_data, ep_len, init_msg);
	if (rc) {
		PMIXP_ERROR("Unable to connect to %d", dconn->nodeid);
		if (init_msg) {
			/* need to release `init_msg` here */
			free_buf(init_msg->buf_ptr);
			xfree(init_msg);
		}
		goto unlock;
	}

	switch (pmixp_dconn_progress_type(dconn)) {
	case PMIXP_DCONN_PROGRESS_SW:{
		/* this direct connection has fd that needs to be
		 * polled to progress, use connection interface for that
		 */
		pmixp_io_engine_t *eng = pmixp_dconn_engine(dconn);
		pmixp_conn_t *conn;
		conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, eng,
					      _direct_new_msg_conn,
					      _direct_return_connection,
					      dconn);
		if (conn) {
			eio_obj_t *obj;
			obj = eio_obj_create(pmixp_io_fd(eng),
					     &direct_peer_ops,
					     (void *)conn);
			eio_new_obj(pmixp_info_io(), obj);
			eio_signal_wakeup(pmixp_info_io());
		} else {
			/* TODO: handle this error */
			rc = SLURM_ERROR;
			goto unlock;
		}
		break;
	}
	case PMIXP_DCONN_PROGRESS_HW: {
		break;
	}
	default:
		/* Should not happen */
		xassert(0 && pmixp_dconn_progress_type(dconn));
		/* TODO: handle this error */
	}
unlock:
	pmixp_dconn_unlock(dconn);
	return rc;
}