Esempio n. 1
0
/*
 *  Send a msg to the next msg aggregation collector node. If primary
 *  collector is unavailable or returns error, try backup collector.
 *  If backup collector is unavailable or returns error, send msg
 *  directly to controller.
 */
static int _send_to_next_collector(slurm_msg_t *msg)
{
	slurm_addr_t *next_dest = NULL;
	bool i_am_collector;
	int rc = SLURM_SUCCESS;

	if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
		info("msg aggr: send_to_next_collector: getting primary next "
		     "collector");
	if ((next_dest = route_g_next_collector(&i_am_collector))) {
		if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE) {
			char addrbuf[100];
			slurm_print_slurm_addr(next_dest, addrbuf, 32);
			info("msg aggr: send_to_next_collector: *next_dest is "
			     "%s", addrbuf);
		}
		memcpy(&msg->address, next_dest, sizeof(slurm_addr_t));
		rc = slurm_send_only_node_msg(msg);
	}

	if (!next_dest || (rc != SLURM_SUCCESS))
		rc = _send_to_backup_collector(msg, rc);

	return rc;
}
Esempio n. 2
0
static int _send_to_backup_collector(slurm_msg_t *msg, int rc)
{
	slurm_addr_t *next_dest = NULL;

	if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE) {
		info("_send_to_backup_collector: primary %s, "
		     "getting backup",
		     rc ? "can't be reached" : "is null");
	}

	if ((next_dest = route_g_next_collector_backup())) {
		if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE) {
			char addrbuf[100];
			slurm_print_slurm_addr(next_dest, addrbuf, 32);
			info("_send_to_backup_collector: *next_dest is "
			     "%s", addrbuf);
		}
		memcpy(&msg->address, next_dest, sizeof(slurm_addr_t));
		rc = slurm_send_only_node_msg(msg);
	}

	if (!next_dest ||  (rc != SLURM_SUCCESS)) {
		if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
			info("_send_to_backup_collector: backup %s, "
			     "sending msg to controller",
			     rc ? "can't be reached" : "is null");
		rc = slurm_send_only_controller_msg(msg, working_cluster_rec);
	}

	return rc;
}
Esempio n. 3
0
extern void msg_aggr_resp(slurm_msg_t *msg)
{
	slurm_msg_t *next_msg;
	composite_msg_t *comp_msg;
	msg_aggr_t *msg_aggr;
	ListIterator itr;

	comp_msg = (composite_msg_t *)msg->data;
	itr = list_iterator_create(comp_msg->msg_list);
	if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
		info("msg_aggr_resp: processing composite msg_list...");
	while ((next_msg = list_next(itr))) {
		switch (next_msg->msg_type) {
		case RESPONSE_SLURM_RC:
			/* signal sending thread that slurmctld received this
			 * epilog complete msg */
			if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
				info("msg_aggr_resp: rc message found for "
				     "index %u signaling sending thread",
				     next_msg->msg_index);
			slurm_mutex_lock(&msg_collection.aggr_mutex);
			if (!(msg_aggr = _handle_msg_aggr_ret(
				      next_msg->msg_index, 1))) {
				debug2("msg_aggr_resp: error: unable to "
				       "locate aggr message struct for job %u",
					next_msg->msg_index);
				slurm_mutex_unlock(&msg_collection.aggr_mutex);
				continue;
			}
			pthread_cond_signal(&msg_aggr->wait_cond);
			slurm_mutex_unlock(&msg_collection.aggr_mutex);
			break;
		case RESPONSE_MESSAGE_COMPOSITE:
			comp_msg = (composite_msg_t *)next_msg->data;
			/* set up the address here for the next node */
			memcpy(&next_msg->address, &comp_msg->sender,
			       sizeof(slurm_addr_t));

			if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE) {
				char addrbuf[100];
				slurm_print_slurm_addr(&next_msg->address,
						       addrbuf, 32);
				info("msg_aggr_resp: composite response msg "
				     "found for %s", addrbuf);
			}

			slurm_send_only_node_msg(next_msg);

			break;
		default:
			error("_rpc_composite_resp: invalid msg type in "
			      "composite msg_list");
			break;
		}
	}
	list_iterator_destroy(itr);
	if (msg_collection.debug_flags & DEBUG_FLAG_ROUTE)
		info("msg aggr: _rpc_composite_resp: finished processing "
		     "composite msg_list...");
}
Esempio n. 4
0
/*
 * _thread_per_group_rpc - thread to issue an RPC for a group of nodes
 *                         sending message out to one and forwarding it to
 *                         others if necessary.
 * IN/OUT args - pointer to task_info_t, xfree'd on completion
 */
static void *_thread_per_group_rpc(void *args)
{
	int rc = SLURM_SUCCESS;
	slurm_msg_t msg;
	task_info_t *task_ptr = (task_info_t *) args;
	/* we cache some pointers from task_info_t because we need
	 * to xfree args before being finished with their use. xfree
	 * is required for timely termination of this pthread because
	 * xfree could lock it at the end, preventing a timely
	 * thread_exit */
	pthread_mutex_t *thread_mutex_ptr   = task_ptr->thread_mutex_ptr;
	pthread_cond_t  *thread_cond_ptr    = task_ptr->thread_cond_ptr;
	uint32_t        *threads_active_ptr = task_ptr->threads_active_ptr;
	thd_t           *thread_ptr         = task_ptr->thread_struct_ptr;
	state_t thread_state = DSH_NO_RESP;
	slurm_msg_type_t msg_type = task_ptr->msg_type;
	bool is_kill_msg, srun_agent;
	List ret_list = NULL;
	ListIterator itr;
	ret_data_info_t *ret_data_info = NULL;
	int found = 0;
	int sig_array[2] = {SIGUSR1, 0};
	/* Locks: Write job, write node */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };

	xassert(args != NULL);
	xsignal(SIGUSR1, _sig_handler);
	xsignal_unblock(sig_array);
	is_kill_msg = (	(msg_type == REQUEST_KILL_TIMELIMIT)	||
			(msg_type == REQUEST_TERMINATE_JOB) );
	srun_agent = (	(msg_type == SRUN_PING)			||
			(msg_type == SRUN_EXEC)			||
			(msg_type == SRUN_JOB_COMPLETE)		||
			(msg_type == SRUN_STEP_MISSING)		||
			(msg_type == SRUN_TIMEOUT)		||
			(msg_type == SRUN_USER_MSG)		||
			(msg_type == RESPONSE_RESOURCE_ALLOCATION) ||
			(msg_type == SRUN_NODE_FAIL) );

	thread_ptr->start_time = time(NULL);

	slurm_mutex_lock(thread_mutex_ptr);
	thread_ptr->state = DSH_ACTIVE;
	thread_ptr->end_time = thread_ptr->start_time + COMMAND_TIMEOUT;
	slurm_mutex_unlock(thread_mutex_ptr);

	/* send request message */
	slurm_msg_t_init(&msg);
	msg.msg_type = msg_type;
	msg.data     = task_ptr->msg_args_ptr;
#if 0
 	info("sending message type %u to %s", msg_type, thread_ptr->nodelist);
#endif
	if (task_ptr->get_reply) {
		if(thread_ptr->addr) {
			msg.address = *thread_ptr->addr;

			if(!(ret_list = slurm_send_addr_recv_msgs(
				     &msg, thread_ptr->nodelist, 0))) {
				error("_thread_per_group_rpc: "
				      "no ret_list given");
				goto cleanup;
			}


		} else {
			if(!(ret_list = slurm_send_recv_msgs(
				     thread_ptr->nodelist,
				     &msg, 0, true))) {
				error("_thread_per_group_rpc: "
				      "no ret_list given");
				goto cleanup;
			}
		}
	} else {
		if(thread_ptr->addr) {
			//info("got the address");
			msg.address = *thread_ptr->addr;
		} else {
			//info("no address given");
			if(slurm_conf_get_addr(thread_ptr->nodelist,
					       &msg.address) == SLURM_ERROR) {
				error("_thread_per_group_rpc: "
				      "can't find address for host %s, "
				      "check slurm.conf",
				      thread_ptr->nodelist);
				goto cleanup;
			}
		}
		//info("sending %u to %s", msg_type, thread_ptr->nodelist);
		if (slurm_send_only_node_msg(&msg) == SLURM_SUCCESS) {
			thread_state = DSH_DONE;
		} else {
			if (!srun_agent)
				_comm_err(thread_ptr->nodelist, msg_type);
		}
		goto cleanup;
	}

	//info("got %d messages back", list_count(ret_list));
	found = 0;
	itr = list_iterator_create(ret_list);
	while ((ret_data_info = list_next(itr)) != NULL) {
		rc = slurm_get_return_code(ret_data_info->type,
					   ret_data_info->data);
		/* SPECIAL CASE: Mark node as IDLE if job already
		   complete */
		if (is_kill_msg &&
		    (rc == ESLURMD_KILL_JOB_ALREADY_COMPLETE)) {
			kill_job_msg_t *kill_job;
			kill_job = (kill_job_msg_t *)
				task_ptr->msg_args_ptr;
			rc = SLURM_SUCCESS;
			lock_slurmctld(job_write_lock);
			if (job_epilog_complete(kill_job->job_id,
						ret_data_info->
						node_name,
						rc))
				run_scheduler = true;
			unlock_slurmctld(job_write_lock);
		}
		/* SPECIAL CASE: Kill non-startable batch job,
		 * Requeue the job on ESLURMD_PROLOG_FAILED */
		if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) &&
		    (rc != SLURM_SUCCESS) && (rc != ESLURMD_PROLOG_FAILED) &&
		    (ret_data_info->type != RESPONSE_FORWARD_FAILED)) {
			batch_job_launch_msg_t *launch_msg_ptr =
				task_ptr->msg_args_ptr;
			uint32_t job_id = launch_msg_ptr->job_id;
			info("Killing non-startable batch job %u: %s",
			     job_id, slurm_strerror(rc));
			thread_state = DSH_DONE;
			ret_data_info->err = thread_state;
			lock_slurmctld(job_write_lock);
			job_complete(job_id, 0, false, false, _wif_status());
			unlock_slurmctld(job_write_lock);
			continue;
		}

		if (((msg_type == REQUEST_SIGNAL_TASKS) ||
		     (msg_type == REQUEST_TERMINATE_TASKS)) &&
		     (rc == ESRCH)) {
			/* process is already dead, not a real error */
			rc = SLURM_SUCCESS;
		}

		switch (rc) {
		case SLURM_SUCCESS:
			/* debug("agent processed RPC to node %s", */
			/*       ret_data_info->node_name); */
			thread_state = DSH_DONE;
			break;
		case SLURM_UNKNOWN_FORWARD_ADDR:
			error("We were unable to forward message to '%s'.  "
			      "Make sure the slurm.conf for each slurmd "
			      "contain all other nodes in your system.",
			      ret_data_info->node_name);
			thread_state = DSH_NO_RESP;
			break;
		case ESLURMD_EPILOG_FAILED:
			error("Epilog failure on host %s, "
			      "setting DOWN",
			      ret_data_info->node_name);

			thread_state = DSH_FAILED;
			break;
		case ESLURMD_PROLOG_FAILED:
			thread_state = DSH_FAILED;
			break;
		case ESLURM_INVALID_JOB_ID:
			/* Not indicative of a real error */
		case ESLURMD_JOB_NOTRUNNING:
			/* Not indicative of a real error */
			debug2("agent processed RPC to node %s: %s",
			       ret_data_info->node_name,
			       slurm_strerror(rc));

			thread_state = DSH_DONE;
			break;
		default:
			if (!srun_agent) {
				if (ret_data_info->err)
					errno = ret_data_info->err;
				else
					errno = rc;
				rc = _comm_err(ret_data_info->node_name,
					       msg_type);
			}
			if (srun_agent)
				thread_state = DSH_FAILED;
			else if(ret_data_info->type == RESPONSE_FORWARD_FAILED)
				/* check if a forward failed */
				thread_state = DSH_NO_RESP;
			else {	/* some will fail that don't mean anything went
				 * bad like a job term request on a job that is
				 * already finished, we will just exit on those
				 * cases */
				thread_state = DSH_DONE;
			}
		}
		ret_data_info->err = thread_state;
	}
	list_iterator_destroy(itr);

cleanup:
	xfree(args);

	/* handled at end of thread just in case resend is needed */
	destroy_forward(&msg.forward);
	slurm_mutex_lock(thread_mutex_ptr);
	thread_ptr->ret_list = ret_list;
	thread_ptr->state = thread_state;
	thread_ptr->end_time = (time_t) difftime(time(NULL),
						 thread_ptr->start_time);
	/* Signal completion so another thread can replace us */
	(*threads_active_ptr)--;
	pthread_cond_signal(thread_cond_ptr);
	slurm_mutex_unlock(thread_mutex_ptr);
	return (void *) NULL;
}