Beispiel #1
0
/*
 *
 * Returns SLURM_SUCCESS if successful.  On error returns SLURM_ERROR
 * and sets errno.
 */
int
stepd_completion(int fd, uint16_t protocol_version, step_complete_msg_t *sent)
{
	int req = REQUEST_STEP_COMPLETION_V2;
	int rc;
	int errnum = 0;
	Buf buffer;
	int len = 0;

	buffer = init_buf(0);

	debug("Entering stepd_completion for %u.%u, range_first = %d, range_last = %d",
	      sent->job_id, sent->job_step_id,
	      sent->range_first, sent->range_last);

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_write(fd, &req, sizeof(int));
		safe_write(fd, &sent->range_first, sizeof(int));
		safe_write(fd, &sent->range_last, sizeof(int));
		safe_write(fd, &sent->step_rc, sizeof(int));

		/*
		 * We must not use setinfo over a pipe with slurmstepd here
		 * Indeed, slurmd does a large use of getinfo over a pipe
		 * with slurmstepd and doing the reverse can result in
		 * a deadlock scenario with slurmstepd :
		 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
		 * Do pack/unpack instead to be sure of independances of
		 * slurmd and slurmstepd
		 */
		jobacctinfo_pack(sent->jobacct, protocol_version,
				 PROTOCOL_TYPE_SLURM, buffer);
		len = get_buf_offset(buffer);
		safe_write(fd, &len, sizeof(int));
		safe_write(fd, get_buf_data(buffer), len);
		free_buf(buffer);

		/* Receive the return code and errno */
		safe_read(fd, &rc, sizeof(int));
		safe_read(fd, &errnum, sizeof(int));
	} else {
		error("%s: bad protocol version %hu",
		      __func__, protocol_version);
		rc = SLURM_ERROR;
	}

	errno = errnum;
	return rc;

rwfail:
	FREE_NULL_BUFFER(buffer);
	return -1;
}
Beispiel #2
0
/*
 *
 * Returns SLURM_SUCCESS if successful.  On error returns SLURM_ERROR
 * and sets errno.
 */
int
stepd_completion(int fd, step_complete_msg_t *sent)
{
	int req = REQUEST_STEP_COMPLETION_V2;
	int rc;
	int errnum = 0;
	Buf buffer;
	int len = 0;
	int version = SLURM_PROTOCOL_VERSION;

	buffer = init_buf(0);

	debug("Entering stepd_completion, range_first = %d, range_last = %d",
	      sent->range_first, sent->range_last);
	safe_write(fd, &req, sizeof(int));
	safe_write(fd, &version, sizeof(int));
	safe_write(fd, &sent->range_first, sizeof(int));
	safe_write(fd, &sent->range_last, sizeof(int));
	safe_write(fd, &sent->step_rc, sizeof(int));
	/*
	 * We must not use setinfo over a pipe with slurmstepd here 
	 * Indeed, slurmd does a large use of getinfo over a pipe
	 * with slurmstepd and doing the reverse can result in a deadlock
	 * scenario with slurmstepd : 
	 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
	 * Do pack/unpack instead to be sure of independances of 
	 * slurmd and slurmstepd
	 */
	jobacctinfo_pack(sent->jobacct, SLURM_PROTOCOL_VERSION,
			 PROTOCOL_TYPE_SLURM, buffer);
	len = get_buf_offset(buffer);
	safe_write(fd, &len, sizeof(int));
	safe_write(fd, get_buf_data(buffer), len);
	free_buf(buffer);

	/* Receive the return code and errno */
	safe_read(fd, &rc, sizeof(int));
	safe_read(fd, &errnum, sizeof(int));

	errno = errnum;
	return rc;
rwfail:
	return -1;
}
Beispiel #3
0
static void _pack_step_complete_msg(dbd_step_comp_msg_t *msg,
				    uint16_t rpc_version, Buf buffer)
{
	if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) {
		pack32(msg->assoc_id, buffer);
		pack64(msg->db_index, buffer);
		pack_time(msg->end_time, buffer);
		pack32(msg->exit_code, buffer);
		jobacctinfo_pack((struct jobacctinfo *)msg->jobacct,
				 rpc_version, PROTOCOL_TYPE_DBD, buffer);
		pack32(msg->job_id, buffer);
		pack_time(msg->job_submit_time, buffer);
		packstr(msg->job_tres_alloc_str, buffer);
		pack32(msg->req_uid, buffer);
		pack_time(msg->start_time, buffer);
		pack16(msg->state, buffer);
		pack32(msg->step_id, buffer);
		pack32(msg->total_tasks, buffer);
	}
}
Beispiel #4
0
extern int jobacctinfo_setinfo(jobacctinfo_t *jobacct,
			       enum jobacct_data_type type, void *data,
			       uint16_t protocol_version)
{
	int rc = SLURM_SUCCESS;
	int *fd = (int *)data;
	struct rusage *rusage = (struct rusage *)data;
	uint32_t *uint32 = (uint32_t *) data;
	uint64_t *uint64 = (uint64_t *) data;
	double *dub = (double *) data;
	jobacct_id_t *jobacct_id = (jobacct_id_t *) data;
	struct jobacctinfo *send = (struct jobacctinfo *) data;
	Buf buffer = NULL;
	if (!plugin_polling)
		return SLURM_SUCCESS;

	switch (type) {
	case JOBACCT_DATA_TOTAL:
		memcpy(jobacct, send, sizeof(struct jobacctinfo));
		break;
	case JOBACCT_DATA_PIPE:
		if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
			int len;
			buffer = init_buf(0);
			jobacctinfo_pack(jobacct, protocol_version,
					 PROTOCOL_TYPE_SLURM, buffer);
			len = get_buf_offset(buffer);
			safe_write(*fd, &len, sizeof(int));
			safe_write(*fd, get_buf_data(buffer), len);
			FREE_NULL_BUFFER(buffer);
		}

		break;
	case JOBACCT_DATA_RUSAGE:
		if (rusage->ru_utime.tv_sec > jobacct->user_cpu_sec)
			jobacct->user_cpu_sec = rusage->ru_utime.tv_sec;
		jobacct->user_cpu_usec = rusage->ru_utime.tv_usec;
		if (rusage->ru_stime.tv_sec > jobacct->sys_cpu_sec)
			jobacct->sys_cpu_sec = rusage->ru_stime.tv_sec;
		jobacct->sys_cpu_usec = rusage->ru_stime.tv_usec;
		break;
	case JOBACCT_DATA_MAX_RSS:
		jobacct->max_rss = *uint64;
		break;
	case JOBACCT_DATA_MAX_RSS_ID:
		jobacct->max_rss_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_RSS:
		jobacct->tot_rss = *uint64;
		break;
	case JOBACCT_DATA_MAX_VSIZE:
		jobacct->max_vsize = *uint64;
		break;
	case JOBACCT_DATA_MAX_VSIZE_ID:
		jobacct->max_vsize_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_VSIZE:
		jobacct->tot_vsize = *uint64;
		break;
	case JOBACCT_DATA_MAX_PAGES:
		jobacct->max_pages = *uint64;
		break;
	case JOBACCT_DATA_MAX_PAGES_ID:
		jobacct->max_pages_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_PAGES:
		jobacct->tot_pages = *uint64;
		break;
	case JOBACCT_DATA_MIN_CPU:
		jobacct->min_cpu = *uint32;
		break;
	case JOBACCT_DATA_MIN_CPU_ID:
		jobacct->min_cpu_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_CPU:
		jobacct->tot_cpu = *dub;
		break;
	case JOBACCT_DATA_ACT_CPUFREQ:
		jobacct->act_cpufreq = *uint32;
		break;
	case JOBACCT_DATA_CONSUMED_ENERGY:
		jobacct->energy.consumed_energy = *uint64;
		break;
	case JOBACCT_DATA_MAX_DISK_READ:
		jobacct->max_disk_read = *dub;
		break;
	case JOBACCT_DATA_MAX_DISK_READ_ID:
		jobacct->max_disk_read_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_DISK_READ:
		jobacct->tot_disk_read = *dub;
		break;
	case JOBACCT_DATA_MAX_DISK_WRITE:
		jobacct->max_disk_write = *dub;
		break;
	case JOBACCT_DATA_MAX_DISK_WRITE_ID:
		jobacct->max_disk_write_id = *jobacct_id;
		break;
	case JOBACCT_DATA_TOT_DISK_WRITE:
		jobacct->tot_disk_write = *dub;
		break;
	default:
		debug("jobacct_g_set_setinfo data_type %d invalid", type);
	}

	return rc;

rwfail:
	FREE_NULL_BUFFER(buffer);
	return SLURM_ERROR;
}