Exemple #1
0
static int _signal_batch_script_step(const resource_allocation_response_msg_t
				     *allocation, uint32_t signal)
{
	slurm_msg_t msg;
	kill_tasks_msg_t rpc;
	int rc = SLURM_SUCCESS;
	char *name = nodelist_nth_host(allocation->node_list, 0);
	if (!name) {
		error("_signal_batch_script_step: "
		      "can't get the first name out of %s",
		      allocation->node_list);
		return -1;
	}
	rpc.job_id = allocation->job_id;
	rpc.job_step_id = SLURM_BATCH_SCRIPT;
	rpc.signal = signal;

	slurm_msg_t_init(&msg);
	msg.msg_type = REQUEST_SIGNAL_TASKS;
	msg.data = &rpc;
	if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
		error("_signal_batch_script_step: "
		      "can't find address for host %s, check slurm.conf",
		      name);
		free(name);
		return -1;
	}
	free(name);
	if (slurm_send_recv_rc_msg_only_one(&msg, &rc, 0) < 0) {
		error("_signal_batch_script_step: %m");
		rc = -1;
	}
	return rc;
}
Exemple #2
0
/* Ping primary ControlMachine
 * RET 0 if no error */
static int _ping_controller(void)
{
	int rc;
	slurm_msg_t req;
	/* Locks: Read configuration */
	slurmctld_lock_t config_read_lock = {
		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };

	/*
	 *  Set address of controller to ping
	 */
	slurm_msg_t_init(&req);
	lock_slurmctld(config_read_lock);
	debug3("pinging slurmctld at %s", slurmctld_conf.control_addr);
	slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port,
	               slurmctld_conf.control_addr);
	unlock_slurmctld(config_read_lock);

	req.msg_type = REQUEST_PING;

	if (slurm_send_recv_rc_msg_only_one(&req, &rc, 0) < 0) {
		error("_ping_controller/slurm_send_node_msg error: %m");
		return SLURM_ERROR;
	}

	if (rc) {
		error("_ping_controller/response error %d", rc);
		return SLURM_PROTOCOL_ERROR;
	}

	return SLURM_PROTOCOL_SUCCESS;
}
Exemple #3
0
/* Forward keypair info to other tasks as required.
 * Clear message forward structure upon completion.
 * The messages are forwarded sequentially. */
static int _forward_comm_set(struct kvs_comm_set *kvs_set_ptr)
{
	int i, rc = SLURM_SUCCESS;
	int tmp_host_cnt = kvs_set_ptr->host_cnt;
	slurm_msg_t msg_send;
	int msg_rc;

	kvs_set_ptr->host_cnt = 0;
	for (i=0; i<tmp_host_cnt; i++) {
		if (kvs_set_ptr->kvs_host_ptr[i].port == 0)
			continue;	/* empty */
		slurm_msg_t_init(&msg_send);
		msg_send.msg_type = PMI_KVS_GET_RESP;
		msg_send.data = (void *) kvs_set_ptr;
		slurm_set_addr(&msg_send.address,
			kvs_set_ptr->kvs_host_ptr[i].port,
			kvs_set_ptr->kvs_host_ptr[i].hostname);
		if (slurm_send_recv_rc_msg_only_one(&msg_send,
				&msg_rc, 0) < 0) {
			error("Could not forward msg to %s",
				kvs_set_ptr->kvs_host_ptr[i].hostname);
			msg_rc = 1;
		}
		rc = MAX(rc, msg_rc);
		xfree(kvs_set_ptr->kvs_host_ptr[i].hostname);
	}
	xfree(kvs_set_ptr->kvs_host_ptr);
	return rc;
}
Exemple #4
0
static void *_msg_thread(void *x)
{
	struct msg_arg *msg_arg_ptr = (struct msg_arg *) x;
	int rc, success = 0, timeout;
	slurm_msg_t msg_send;

	slurm_msg_t_init(&msg_send);

	debug2("KVS_Barrier msg to %s:%u",
		msg_arg_ptr->bar_ptr->hostname,
		msg_arg_ptr->bar_ptr->port);
	msg_send.msg_type = PMI_KVS_GET_RESP;
	msg_send.data = (void *) msg_arg_ptr->kvs_ptr;
	slurm_set_addr(&msg_send.address,
		msg_arg_ptr->bar_ptr->port,
		msg_arg_ptr->bar_ptr->hostname);

	timeout = slurm_get_msg_timeout() * 10000;
	if (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) {
		error("slurm_send_recv_rc_msg_only_one: %m");
	} else if (rc != SLURM_SUCCESS) {
		error("KVS_Barrier confirm from %s, rc=%d",
			msg_arg_ptr->bar_ptr->hostname, rc);
	} else {
		/* successfully transmitted KVS keypairs */
		success = 1;
	}

	slurm_mutex_lock(&agent_mutex);
	agent_cnt--;
	pthread_cond_signal(&agent_cond);
	slurm_mutex_unlock(&agent_mutex);
	xfree(x);
	return NULL;
}
Exemple #5
0
/* Transmit PMI Keyval space data */
int slurm_send_kvs_comm_set(struct kvs_comm_set *kvs_set_ptr,
		int pmi_rank, int pmi_size)
{
	slurm_msg_t msg_send;
	int rc, retries = 0, timeout = 0;

	if (kvs_set_ptr == NULL)
		return EINVAL;

	if ((rc = _get_addr()) != SLURM_SUCCESS)
		return rc;
	_set_pmi_time();

	slurm_msg_t_init(&msg_send);
	msg_send.address = srun_addr;
	msg_send.msg_type = PMI_KVS_PUT_REQ;
	msg_send.data = (void *) kvs_set_ptr;

	/* Send the RPC to the local srun communcation manager.
	 * Since the srun can be sent thousands of messages at
	 * the same time and refuse some connections, retry as
	 * needed. Spread out messages by task's rank. Also
	 * increase the timeout if many tasks since the srun
	 * command is very overloaded.
	 * We also increase the timeout (default timeout is
	 * 10 secs). */
	_delay_rpc(pmi_rank, pmi_size);
	if      (pmi_size > 4000)	/* 240 secs */
		timeout = slurm_get_msg_timeout() * 24000;
	else if (pmi_size > 1000)	/* 120 secs */
		timeout = slurm_get_msg_timeout() * 12000;
	else if (pmi_size > 100)	/* 50 secs */
		timeout = slurm_get_msg_timeout() * 5000;
	else if (pmi_size > 10)		/* 20 secs */
		timeout = slurm_get_msg_timeout() * 2000;

	while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) {
		if (retries++ > MAX_RETRIES) {
			error("slurm_send_kvs_comm_set: %m");
			return SLURM_ERROR;
		} else
			debug("send_kvs retry %d", retries);
		_delay_rpc(pmi_rank, pmi_size);
	}

	return rc;
}
Exemple #6
0
/*
 * Tell the primary_controller to relinquish control, primary control_machine
 *	has to suspend operation
 * Based on _shutdown_backup_controller from controller.c
 * wait_time - How long to wait for primary controller to write state, seconds.
 * RET 0 or an error code
 * NOTE: READ lock_slurmctld config before entry (or be single-threaded)
 */
static int _shutdown_primary_controller(int wait_time)
{
	int rc;
	slurm_msg_t req;

	slurm_msg_t_init(&req);
	if ((slurmctld_conf.control_addr == NULL) ||
	    (slurmctld_conf.control_addr[0] == '\0')) {
		error("_shutdown_primary_controller: "
		      "no primary controller to shutdown");
		return SLURM_ERROR;
	}

	slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port,
		       slurmctld_conf.control_addr);

	/* send request message */
	req.msg_type = REQUEST_CONTROL;

	if (slurm_send_recv_rc_msg_only_one(&req, &rc,
				(CONTROL_TIMEOUT * 1000)) < 0) {
		error("_shutdown_primary_controller:send/recv: %m");
		return SLURM_ERROR;
	}
	if (rc == ESLURM_DISABLED)
		debug("primary controller responding");
	else if (rc == 0) {
		debug("primary controller has relinquished control");
	} else {
		error("_shutdown_primary_controller: %s", slurm_strerror(rc));
		return SLURM_ERROR;
	}

	/* FIXME: Ideally the REQUEST_CONTROL RPC does not return until all
	 * other activity has ceased and the state has been saved. That is
	 * not presently the case (it returns when no other work is pending,
	 * so the state save should occur right away). We sleep for a while
	 * here and give the primary controller time to shutdown */
	if (wait_time)
		sleep(wait_time);

	return SLURM_SUCCESS;
}
Exemple #7
0
static int _terminate_batch_script_step(const resource_allocation_response_msg_t
					* allocation)
{
	slurm_msg_t msg;
	kill_tasks_msg_t rpc;
	int rc = SLURM_SUCCESS;
	int i;
	char *name = nodelist_nth_host(allocation->node_list, 0);
	if (!name) {
		error("_terminate_batch_script_step: "
		      "can't get the first name out of %s",
		      allocation->node_list);
		return -1;
	}

	rpc.job_id = allocation->job_id;
	rpc.job_step_id = SLURM_BATCH_SCRIPT;
	rpc.signal = (uint32_t)-1; /* not used by slurmd */

	slurm_msg_t_init(&msg);
	msg.msg_type = REQUEST_TERMINATE_TASKS;
	msg.data = &rpc;

	if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) {
		error("_terminate_batch_script_step: "
		      "can't find address for host %s, check slurm.conf",
		      name);
		free(name);
		return -1;
	}
	free(name);
	i = slurm_send_recv_rc_msg_only_one(&msg, &rc, 0);
	if (i != 0)
		rc = i;

	return rc;
}
Exemple #8
0
/* Wait for barrier and get full PMI Keyval space data */
int  slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr,
		int pmi_rank, int pmi_size)
{
	int rc, srun_fd, retries = 0, timeout = 0;
	slurm_msg_t msg_send, msg_rcv;
	slurm_addr_t slurm_addr, srun_reply_addr;
	char hostname[64];
	uint16_t port;
	kvs_get_msg_t data;
	char *env_pmi_ifhn;

	if (kvs_set_ptr == NULL)
		return EINVAL;
	*kvs_set_ptr = NULL;	/* initialization */

	if ((rc = _get_addr()) != SLURM_SUCCESS) {
		error("_get_addr: %m");
		return rc;
	}

	_set_pmi_time();

	if (pmi_fd < 0) {
		if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) {
			error("slurm_init_msg_engine_port: %m");
			return SLURM_ERROR;
		}
		fd_set_blocking(pmi_fd);
	}
	if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) {
		error("slurm_get_stream_addr: %m");
		return SLURM_ERROR;
	}
	/* hostname is not set here, so slurm_get_addr fails
	slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */
	port = ntohs(slurm_addr.sin_port);
	if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) {
		strncpy(hostname, env_pmi_ifhn, sizeof(hostname));
		hostname[sizeof(hostname)-1] = 0;
	} else
		gethostname_short(hostname, sizeof(hostname));

	data.task_id = pmi_rank;
	data.size = pmi_size;
	data.port = port;
	data.hostname = hostname;
	slurm_msg_t_init(&msg_send);
	slurm_msg_t_init(&msg_rcv);
	msg_send.address = srun_addr;
	msg_send.msg_type = PMI_KVS_GET_REQ;
	msg_send.data = &data;

	/* Send the RPC to the local srun communcation manager.
	 * Since the srun can be sent thousands of messages at
	 * the same time and refuse some connections, retry as
	 * needed. Wait until all key-pairs have been sent by
	 * all tasks then spread out messages by task's rank.
	 * Also increase the message timeout if many tasks
	 * since the srun command can get very overloaded (the
	 * default timeout is 10 secs).
	 */
	_delay_rpc(pmi_rank, pmi_size);
	if      (pmi_size > 4000)	/* 240 secs */
		timeout = slurm_get_msg_timeout() * 24000;
	else if (pmi_size > 1000)	/* 120 secs */
		timeout = slurm_get_msg_timeout() * 12000;
	else if (pmi_size > 100)	/* 60 secs */
		timeout = slurm_get_msg_timeout() * 6000;
	else if (pmi_size > 10)		/* 20 secs */
		timeout = slurm_get_msg_timeout() * 2000;

	while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) {
		if (retries++ > MAX_RETRIES) {
			error("slurm_get_kvs_comm_set: %m");
			return SLURM_ERROR;
		} else
			debug("get kvs retry %d", retries);
		_delay_rpc(pmi_rank, pmi_size);
	}
	if (rc != SLURM_SUCCESS) {
		error("slurm_get_kvs_comm_set error_code=%d", rc);
		return rc;
	}

	/* get the message after all tasks reach the barrier */
	srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr);
	if (srun_fd < 0) {
		error("slurm_accept_msg_conn: %m");
		return errno;
	}

	while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) {
		if (errno == EINTR)
			continue;
		error("slurm_receive_msg: %m");
		slurm_close(srun_fd);
		return errno;
	}
	if (msg_rcv.auth_cred)
		(void)g_slurm_auth_destroy(msg_rcv.auth_cred);

	if (msg_rcv.msg_type != PMI_KVS_GET_RESP) {
		error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type);
		slurm_close(srun_fd);
		return SLURM_UNEXPECTED_MSG_ERROR;
	}
	if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0)
		error("slurm_send_rc_msg: %m");

	slurm_close(srun_fd);
	*kvs_set_ptr = msg_rcv.data;

	rc = _forward_comm_set(*kvs_set_ptr);
	return rc;
}