Example #1
0
/*
 *
 * Returns SLURM_SUCCESS if successful.  On error returns SLURM_ERROR
 * and sets errno.
 */
int
stepd_completion(int fd, uint16_t protocol_version, step_complete_msg_t *sent)
{
	int req = REQUEST_STEP_COMPLETION_V2;
	int rc;
	int errnum = 0;
	Buf buffer;
	int len = 0;

	buffer = init_buf(0);

	debug("Entering stepd_completion for %u.%u, range_first = %d, range_last = %d",
	      sent->job_id, sent->job_step_id,
	      sent->range_first, sent->range_last);

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_write(fd, &req, sizeof(int));
		safe_write(fd, &sent->range_first, sizeof(int));
		safe_write(fd, &sent->range_last, sizeof(int));
		safe_write(fd, &sent->step_rc, sizeof(int));

		/*
		 * We must not use setinfo over a pipe with slurmstepd here
		 * Indeed, slurmd does a large use of getinfo over a pipe
		 * with slurmstepd and doing the reverse can result in
		 * a deadlock scenario with slurmstepd :
		 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
		 * Do pack/unpack instead to be sure of independances of
		 * slurmd and slurmstepd
		 */
		jobacctinfo_pack(sent->jobacct, protocol_version,
				 PROTOCOL_TYPE_SLURM, buffer);
		len = get_buf_offset(buffer);
		safe_write(fd, &len, sizeof(int));
		safe_write(fd, get_buf_data(buffer), len);
		free_buf(buffer);

		/* Receive the return code and errno */
		safe_read(fd, &rc, sizeof(int));
		safe_read(fd, &errnum, sizeof(int));
	} else {
		error("%s: bad protocol version %hu",
		      __func__, protocol_version);
		rc = SLURM_ERROR;
	}

	errno = errnum;
	return rc;

rwfail:
	FREE_NULL_BUFFER(buffer);
	return -1;
}
Example #2
0
void pmixp_coll_ring_free(pmixp_coll_ring_t *ring)
{
	int i;

	pmixp_coll_ring_ctx_t *coll_ctx;
	for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) {
		coll_ctx = &ring->ctx_array[i];
		FREE_NULL_BUFFER(coll_ctx->ring_buf);
		xfree(coll_ctx->contrib_map);
	}
	list_destroy(ring->fwrd_buf_pool);
	list_destroy(ring->ring_buf_pool);
}
Example #3
0
extern int jobacctinfo_setinfo(jobacctinfo_t *jobacct,
			       enum jobacct_data_type type, void *data,
			       uint16_t protocol_version)
{
	int rc = SLURM_SUCCESS;
	int *fd = (int *)data;
	struct rusage *rusage = (struct rusage *)data;
	uint32_t *uint32 = (uint32_t *) data;
	uint64_t *uint64 = (uint64_t *) data;
	double *dub = (double *) data;
	jobacct_id_t *jobacct_id = (jobacct_id_t *) data;
	struct jobacctinfo *send = (struct jobacctinfo *) data;
	Buf buffer = NULL;
	if (!plugin_polling)
		return SLURM_SUCCESS;

	switch (type) {
	case JOBACCT_DATA_TOTAL:
		memcpy(jobacct, send, sizeof(struct jobacctinfo));
		break;
	case JOBACCT_DATA_PIPE:
		if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
			int len;
			buffer = init_buf(0);
			jobacctinfo_pack(jobacct, protocol_version,
					 PROTOCOL_TYPE_SLURM, buffer);
			len = get_buf_offset(buffer);
			safe_write(*fd, &len, sizeof(int));
			safe_write(*fd, get_buf_data(buffer), len);
			FREE_NULL_BUFFER(buffer);
		}

		break;
	case JOBACCT_DATA_RUSAGE:
		if (rusage->ru_utime.tv_sec > jobacct->user_cpu_sec)
			jobacct->user_cpu_sec = rusage->ru_utime.tv_sec;
		jobacct->user_cpu_usec = rusage->ru_utime.tv_usec;
		if (rusage->ru_stime.tv_sec > jobacct->sys_cpu_sec)
			jobacct->sys_cpu_sec = rusage->ru_stime.tv_sec;
		jobacct->sys_cpu_usec = rusage->ru_stime.tv_usec;
		break;
	case JOBACCT_DATA_MAX_RSS:
		jobacct->max_rss = *uint64;
		break;
	case JOBACCT_DATA_MAX_RSS_ID:
		jobacct->max_rss_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_RSS:
		jobacct->tot_rss = *uint64;
		break;
	case JOBACCT_DATA_MAX_VSIZE:
		jobacct->max_vsize = *uint64;
		break;
	case JOBACCT_DATA_MAX_VSIZE_ID:
		jobacct->max_vsize_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_VSIZE:
		jobacct->tot_vsize = *uint64;
		break;
	case JOBACCT_DATA_MAX_PAGES:
		jobacct->max_pages = *uint64;
		break;
	case JOBACCT_DATA_MAX_PAGES_ID:
		jobacct->max_pages_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_PAGES:
		jobacct->tot_pages = *uint64;
		break;
	case JOBACCT_DATA_MIN_CPU:
		jobacct->min_cpu = *uint32;
		break;
	case JOBACCT_DATA_MIN_CPU_ID:
		jobacct->min_cpu_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_CPU:
		jobacct->tot_cpu = *dub;
		break;
	case JOBACCT_DATA_ACT_CPUFREQ:
		jobacct->act_cpufreq = *uint32;
		break;
	case JOBACCT_DATA_CONSUMED_ENERGY:
		jobacct->energy.consumed_energy = *uint64;
		break;
	case JOBACCT_DATA_MAX_DISK_READ:
		jobacct->max_disk_read = *dub;
		break;
	case JOBACCT_DATA_MAX_DISK_READ_ID:
		jobacct->max_disk_read_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_DISK_READ:
		jobacct->tot_disk_read = *dub;
		break;
	case JOBACCT_DATA_MAX_DISK_WRITE:
		jobacct->max_disk_write = *dub;
		break;
	case JOBACCT_DATA_MAX_DISK_WRITE_ID:
		jobacct->max_disk_write_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_DISK_WRITE:
		jobacct->tot_disk_write = *dub;
		break;
	default:
		debug("jobacct_g_set_setinfo data_type %d invalid", type);
	}

	return rc;

rwfail:
	FREE_NULL_BUFFER(buffer);
	return SLURM_ERROR;
}
Example #4
0
static void *
_handle_accept(void *arg)
{
	/*struct request_params *param = (struct request_params *)arg;*/
	int fd = ((struct request_params *)arg)->fd;
	stepd_step_rec_t *job = ((struct request_params *)arg)->job;
	int req;
	int len;
	Buf buffer = NULL;
	void *auth_cred;
	int rc;
	uid_t uid;
	gid_t gid;
	char *auth_info;

	debug3("Entering _handle_accept (new thread)");
	xfree(arg);

	safe_read(fd, &req, sizeof(int));
	if (req != REQUEST_CONNECT) {
		error("First message must be REQUEST_CONNECT");
		goto fail;
	}

	safe_read(fd, &len, sizeof(int));
	buffer = init_buf(len);
	safe_read(fd, get_buf_data(buffer), len);

	/* Unpack and verify the auth credential */
	auth_cred = g_slurm_auth_unpack(buffer);
	if (auth_cred == NULL) {
		error("Unpacking authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(NULL)));
		free_buf(buffer);
		goto fail;
	}
	auth_info = slurm_get_auth_info();
	rc = g_slurm_auth_verify(auth_cred, auth_info);
	if (rc != SLURM_SUCCESS) {
		error("Verifying authentication credential: %s",
		      g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred)));
		xfree(auth_info);
		(void) g_slurm_auth_destroy(auth_cred);
		FREE_NULL_BUFFER(buffer);
		goto fail;
	}

	/* Get the uid & gid from the credential, then destroy it. */
	uid = g_slurm_auth_get_uid(auth_cred, auth_info);
	gid = g_slurm_auth_get_gid(auth_cred, auth_info);
	xfree(auth_info);
	debug3("  Identity: uid=%d, gid=%d", uid, gid);
	g_slurm_auth_destroy(auth_cred);
	FREE_NULL_BUFFER(buffer);

	rc = SLURM_PROTOCOL_VERSION;
	safe_write(fd, &rc, sizeof(int));

	while (1) {
		rc = _handle_request(fd, job, uid, gid);
		if (rc != SLURM_SUCCESS)
			break;
	}

	if (close(fd) == -1)
		error("Closing accepted fd: %m");

	slurm_mutex_lock(&message_lock);
	message_connections--;
	slurm_cond_signal(&message_cond);
	slurm_mutex_unlock(&message_lock);

	debug3("Leaving  _handle_accept");
	return NULL;

fail:
	rc = SLURM_FAILURE;
	safe_write(fd, &rc, sizeof(int));
rwfail:
	if (close(fd) == -1)
		error("Closing accepted fd after error: %m");
	debug("Leaving  _handle_accept on an error");
	FREE_NULL_BUFFER(buffer);
	return NULL;
}
Example #5
0
static slurmd_conf_t *read_slurmd_conf_lite(int fd)
{
	int rc;
	int len;
	Buf buffer = NULL;
	slurmd_conf_t *confl, *local_conf = NULL;
	int tmp_int = 0;

	/*  First check to see if we've already initialized the
	 *   global slurmd_conf_t in 'conf'. Allocate memory if not.
	 */
	if (conf) {
		confl = conf;
	} else {
		local_conf = xmalloc(sizeof(slurmd_conf_t));
		confl = local_conf;
	}

	safe_read(fd, &len, sizeof(int));

	buffer = init_buf(len);
	safe_read(fd, buffer->head, len);

	rc = unpack_slurmd_conf_lite_no_alloc(confl, buffer);
	if (rc == SLURM_ERROR)
		fatal("slurmstepd: problem with unpack of slurmd_conf");

	free_buf(buffer);

	confl->log_opts.prefix_level = 1;
	confl->log_opts.stderr_level = confl->debug_level;
	confl->log_opts.logfile_level = confl->debug_level;
	confl->log_opts.syslog_level = confl->debug_level;
	/*
	 * If daemonizing, turn off stderr logging -- also, if
	 * logging to a file, turn off syslog.
	 *
	 * Otherwise, if remaining in foreground, turn off logging
	 * to syslog (but keep logfile level)
	 */
	if (confl->daemonize) {
		confl->log_opts.stderr_level = LOG_LEVEL_QUIET;
		if (confl->logfile)
			confl->log_opts.syslog_level = LOG_LEVEL_QUIET;
	} else
		confl->log_opts.syslog_level  = LOG_LEVEL_QUIET;

	confl->acct_freq_task = (uint16_t)NO_VAL;
	tmp_int = acct_gather_parse_freq(PROFILE_TASK,
				       confl->job_acct_gather_freq);
	if (tmp_int != -1)
		confl->acct_freq_task = tmp_int;


	return (confl);

rwfail:
	FREE_NULL_BUFFER(buffer);
	xfree(local_conf);
	return (NULL);
}