Esempio n. 1
0
extern int slurm_persist_conn_process_msg(slurm_persist_conn_t *persist_conn,
					  persist_msg_t *persist_msg,
					  char *msg_char, uint32_t msg_size,
					  Buf *out_buffer, bool first)
{
	int rc;
	Buf recv_buffer = NULL;
	char *comment = NULL;

	/* puts msg_char into buffer struct */
	recv_buffer = create_buf(msg_char, msg_size);

	memset(persist_msg, 0, sizeof(persist_msg_t));
	rc = slurm_persist_msg_unpack(persist_conn, persist_msg, recv_buffer);
	xfer_buf_data(recv_buffer); /* delete in_buffer struct
				     * without xfree of msg_char
				     * (done later in this
				     * function). */
	if (rc != SLURM_SUCCESS) {
		comment = xstrdup_printf("Failed to unpack %s message",
					 slurmdbd_msg_type_2_str(
						 persist_msg->msg_type, true));
		error("CONN:%u %s", persist_conn->fd, comment);
		*out_buffer = slurm_persist_make_rc_msg(
			persist_conn, rc, comment, persist_msg->msg_type);
		xfree(comment);
	} else if (first &&
		   (persist_msg->msg_type != REQUEST_PERSIST_INIT)) {
		comment = "Initial RPC not REQUEST_PERSIST_INIT";
		error("CONN:%u %s type (%d)",
		      persist_conn->fd, comment, persist_msg->msg_type);
		rc = EINVAL;
		*out_buffer = slurm_persist_make_rc_msg(
			persist_conn, rc, comment,
			REQUEST_PERSIST_INIT);
	} else if (!first &&
		   (persist_msg->msg_type == REQUEST_PERSIST_INIT)) {
		comment = "REQUEST_PERSIST_INIT sent after connection established";
		error("CONN:%u %s", persist_conn->fd, comment);
		rc = EINVAL;
		*out_buffer = slurm_persist_make_rc_msg(
			persist_conn, rc, comment, REQUEST_PERSIST_INIT);
	}

	return rc;
}
/*
 * Pack message header.
 * Returns packed size
 * Note: asymmetric to _recv_unpack_hdr because of additional SLURM header
 */
static int _send_pack_hdr(void *host, void *net)
{
	send_header_t *ptr = (send_header_t *)host;
	Buf packbuf = create_buf(net, sizeof(send_header_t));
	int size = 0;
	pack32(ptr->magic, packbuf);
	pack32(ptr->type, packbuf);
	pack32(ptr->seq, packbuf);
	pack32(ptr->nodeid, packbuf);
	pack32(ptr->msgsize, packbuf);
	size = get_buf_offset(packbuf);
	xassert(size == SEND_HDR_SIZE);
	/* free the Buf packbuf, but not the memory to which it points */
	packbuf->head = NULL;
	free_buf(packbuf);
	return size;
}
Esempio n. 3
0
sound_type snd_make_follow(sound_type sndin, double floor, double risetime, double falltime, long lookahead)
{
    register follow_susp_type susp;
    rate_type sr = sndin->sr;
    time_type t0 = sndin->t0;
    sample_type scale_factor = 1.0F;
    time_type t0_min = t0;
    falloc_generic(susp, follow_susp_node, "snd_make_follow");
    susp->lookahead = lookahead = lookahead + 1;
    susp->delaybuf = create_buf(floor, lookahead);
    susp->delayptr = susp->delaybuf;
    susp->prevptr = susp->delaybuf + lookahead - 1;
    *(susp->prevptr) = (sample_type) floor;;
    susp->endptr = susp->delaybuf + lookahead;
    susp->floor = floor; floor = log(floor);;
    susp->rise_factor = exp(- floor / (sndin->sr * risetime + 0.5));
    susp->fall_factor = exp(floor / (sndin->sr * falltime + 0.5));
    susp->value = susp->floor;
    susp->susp.fetch = follow_s_fetch;
    susp->terminate_cnt = UNKNOWN;
    /* handle unequal start times, if any */
    if (t0 < sndin->t0) sound_prepend_zeros(sndin, t0);
    /* minimum start time over all inputs: */
    t0_min = min(sndin->t0, t0);
    /* how many samples to toss before t0: */
    susp->susp.toss_cnt = (long) ((t0 - t0_min) * sr + 0.5);
    if (susp->susp.toss_cnt > 0) {
        susp->susp.keep_fetch = susp->susp.fetch;
        susp->susp.fetch = follow_toss_fetch;
    }

    /* initialize susp state */
    susp->susp.free = follow_free;
    susp->susp.sr = sr;
    susp->susp.t0 = t0;
    susp->susp.mark = follow_mark;
    susp->susp.print_tree = follow_print_tree;
    susp->susp.name = "follow";
    susp->susp.log_stop_cnt = UNKNOWN;
    susp->susp.current = 0;
    susp->sndin = sndin;
    susp->sndin_cnt = 0;
    return sound_create((snd_susp_type)susp, t0, sr, scale_factor);
}
Esempio n. 4
0
csdbparser::enResult csdbparser::setup_srcfil_read(void)
{
CSDBP_GENERAL_CHK();

long int num;

if (fseek(m_fp, m_trailer_start, SEEK_SET) != 0) {return resFILE_ACCESS_ERR;}

fscanf(m_fp, "%ld\n", &num); // number of source directories
while (num-- > 0) {fgets(m_buf, CSDBP_MINIM_BUFSIZE, m_fp);}
fscanf(m_fp, "%ld\n", &num); // number of include directories
while (num-- > 0) {fgets(m_buf, CSDBP_MINIM_BUFSIZE, m_fp);}

fscanf(m_fp, "%ld\n", &num); // number of files
fscanf(m_fp, "%ld\n", &num); // string size required
create_buf(num);

return resOK;
}
Esempio n. 5
0
void test_copy_constructor()
{
    BuffAggr b1(5);
    // test for simple non-overlapping additions
    uint32_t off = 10;
    for ( uint32_t i=1; i<5; i++) {
        uint8_t *buf = create_buf(i * 3);
        b1.set_buf(buf, off, (i*3));
        off += i*3;
    }
    // test for copy constructor
    BuffAggr b2(b1);
    std::cout << "B1 State: " << b1.ToString() << std::endl;
    std::cout << "B2 State: " << b2.ToString() << std::endl;

    // assignment operator test
    BuffAggr b3 = b1;
    std::cout << "B1 State: " << b1.ToString() << std::endl;
    std::cout << "B3 State: " << b3.ToString() << std::endl;
}
Esempio n. 6
0
static void _respond_with_error(int seq_num, char *sender_host,
				char *sender_ns, int status)
{
	Buf buf = create_buf(NULL, 0);
	char *addr;
	int rc;

	/* rank doesn't matter here, don't send it */
	_setup_header(buf, DMDX_RESPONSE, pmixp_info_namespace(), -1, status);
	/* generate namespace usocket name */
	addr = pmixp_info_nspace_usock(sender_ns);
	/* send response */
	rc = pmixp_server_send(sender_host, PMIXP_MSG_DMDX, seq_num, addr,
			get_buf_data(buf), get_buf_offset(buf), 1);
	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Cannot send direct modex error" " response to %s",
				sender_host);
	}
	xfree(addr);
	free_buf(buf);
}
Esempio n. 7
0
static int _unpack_buffer(void **out,
			  uint16_t rpc_version,
			  Buf buffer)
{
	Buf out_ptr = NULL;
	char *msg = NULL;
	uint32_t uint32_tmp;

	safe_unpackmem_xmalloc(&msg, &uint32_tmp, buffer);
	if (!(out_ptr = create_buf(msg, uint32_tmp)))
		goto unpack_error;
	*out = out_ptr;

	return SLURM_SUCCESS;

unpack_error:
	xfree(msg);
	slurmdbd_free_buffer(out_ptr);
	*out = NULL;
	return SLURM_ERROR;

}
Esempio n. 8
0
static int
_restore_cred_state(slurm_cred_ctx_t ctx)
{
	char *file_name = NULL, *data = NULL;
	uint32_t data_size = 0;
	int cred_fd, data_allocated, data_read = 0;
	Buf buffer = NULL;

	if ( (mkdir(conf->spooldir, 0755) < 0) && (errno != EEXIST) ) {
		fatal("mkdir(%s): %m", conf->spooldir);
		return SLURM_ERROR;
	}

	file_name = xstrdup(conf->spooldir);
	xstrcat(file_name, "/cred_state");
	cred_fd = open(file_name, O_RDONLY);
	if (cred_fd < 0)
		goto cleanup;

	data_allocated = 1024;
	data = xmalloc(sizeof(char)*data_allocated);
	while ((data_read = read(cred_fd, &data[data_size], 1024)) == 1024) {
		data_size += data_read;
		data_allocated += 1024;
		xrealloc(data, data_allocated);
	}
	data_size += data_read;
	close(cred_fd);
	buffer = create_buf(data, data_size);

	slurm_cred_ctx_unpack(ctx, buffer);

cleanup:
	xfree(file_name);
	if (buffer)
		free_buf(buffer);
	return SLURM_SUCCESS;
}
Esempio n. 9
0
csdbparser::enResult csdbparser::parse_headers(void)
{
    int slen;

    if (m_fp == NULL)
    {
        return resFILE_NOT_OPEN;
    }
    create_buf();

    m_base_path.clear();
    m_calling_func.clear();
    m_calling_macro.clear();
    m_current_srcfile.clear();

// Read out the first line i.e. the header
    if (fgets(m_buf, CSDBP_MINIM_BUFSIZE, m_fp) == NULL)
    {
        return resFILE_ACCESS_ERR;
    }
    slen = strlen(chomp(m_buf));

    std::string s(static_cast<const char*>(m_buf));
    m_trailer_start = atol(s.substr(slen - 10).c_str());

// Remove the part of the string after the base path
    if (s.compare(slen - 28, strlen(CSDBP_SUP_PARAM), CSDBP_SUP_PARAM) == 0)
        m_buf[slen - 28] = 0;
    else m_buf[slen - 14] = 0;

//slen = strlen(m_buf);

    s = static_cast<const char*>(m_buf);
    m_base_path = s.substr(strlen("cscope ") + strlen(CSDBP_SUPPORTED_VER) + 1);
    if (*(m_base_path.rbegin()) == '\"') m_base_path.erase(m_base_path.end() - 1);

    return resOK;
}
Esempio n. 10
0
static size_t _base_hdr_pack_full_samearch(pmixp_base_hdr_t *hdr, void *net)
{
	int offset = 0;

	if (hdr->ext_flag) {
		hdr->msgsize += PMIXP_BASE_HDR_EXT_SIZE(pmixp_dconn_ep_len());
	}

	WRITE_HDR_FIELD(net, offset, hdr->magic);
	WRITE_HDR_FIELD(net, offset, hdr->type);
	WRITE_HDR_FIELD(net, offset, hdr->seq);
	WRITE_HDR_FIELD(net, offset, hdr->nodeid);
	WRITE_HDR_FIELD(net, offset, hdr->msgsize);
	WRITE_HDR_FIELD(net, offset, hdr->ext_flag);
	if (hdr->ext_flag) {
		Buf buf = create_buf(net + offset, PMIXP_BASE_HDR_MAX);
		packmem(pmixp_dconn_ep_data(), pmixp_dconn_ep_len(), buf);
		offset += get_buf_offset(buf);
		buf->head = NULL;
		free_buf(buf);
	}
	return offset;
}
Esempio n. 11
0
/*
 * See process_handler_t prototype description
 * on the details of this function output values
 */
static void _direct_new_msg_conn(pmixp_conn_t *conn, void *_hdr, void *msg)
{
	pmixp_base_hdr_t *hdr = (pmixp_base_hdr_t*)_hdr;
	Buf buf = create_buf(msg, hdr->msgsize);
	_process_server_request(hdr, buf);
}
Esempio n. 12
0
File: req.c Progetto: birc-aeh/slurm
static int
_handle_completion(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int first;
	int last;
	jobacctinfo_t *jobacct = NULL;
	int step_rc;
	char* buf;
	int len;
	Buf buffer;
	int version;	/* For future use */
	bool lock_set = false;

	debug("_handle_completion for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("step completion message from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		/* Send the return code and errno */
		safe_write(fd, &rc, sizeof(int));
		safe_write(fd, &errnum, sizeof(int));
		return SLURM_SUCCESS;
	}

	safe_read(fd, &version, sizeof(int));
	safe_read(fd, &first, sizeof(int));
	safe_read(fd, &last, sizeof(int));
	safe_read(fd, &step_rc, sizeof(int));

	/*
	 * We must not use getinfo over a pipe with slurmd here
	 * Indeed, slurmstepd does a large use of setinfo over a pipe
	 * with slurmd and doing the reverse can result in a deadlock
	 * scenario with slurmd :
	 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
	 * Do pack/unpack instead to be sure of independances of
	 * slurmd and slurmstepd
	 */
	safe_read(fd, &len, sizeof(int));
	buf = xmalloc(len);
	safe_read(fd, buf, len);
	buffer = create_buf(buf, len);
	jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION,
			   PROTOCOL_TYPE_SLURM, buffer, 1);
	free_buf(buffer);

	/*
	 * Record the completed nodes
	 */
	pthread_mutex_lock(&step_complete.lock);
	lock_set = true;
	if (! step_complete.wait_children) {
		rc = -1;
		errnum = ETIMEDOUT; /* not used anyway */
		goto timeout;
	}

	/* SlurmUser or root can craft a launch without a valid credential
	 * ("srun --no-alloc ...") and no tree information can be built
	 *  without the hostlist from the credential. */
	if (step_complete.rank >= 0) {
#if 0
		char bits_string[128];
		debug2("Setting range %d (bit %d) through %d(bit %d)",
		       first, first-(step_complete.rank+1),
		       last, last-(step_complete.rank+1));
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  before bits: %s", bits_string);
#endif
		bit_nset(step_complete.bits,
			 first - (step_complete.rank+1),
			 last - (step_complete.rank+1));
#if 0
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  after bits: %s", bits_string);
#endif
	}
	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);

	/************* acct stuff ********************/
	jobacctinfo_aggregate(step_complete.jobacct, jobacct);
timeout:
	jobacctinfo_destroy(jobacct);
	/*********************************************/

	/* Send the return code and errno, we do this within the locked
	 * region to ensure that the stepd doesn't exit before we can
	 * perform this send. */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	pthread_cond_signal(&step_complete.cond);
	pthread_mutex_unlock(&step_complete.lock);

	return SLURM_SUCCESS;


rwfail:	if (lock_set) {
		pthread_cond_signal(&step_complete.cond);
		pthread_mutex_unlock(&step_complete.lock);
	}
	return SLURM_FAILURE;
}
Esempio n. 13
0
/*
 * Restore global nodeinfo from a file.
 *
 * NOTE: switch_p_libstate_restore is only called by slurmctld, and only
 * once at start-up.  We exploit this fact to spawn a pthread to
 * periodically call _switch_p_libstate_save().
 */
extern int switch_p_libstate_restore ( char * dir_name, bool recover )
{
	char *data = NULL, *file_name;
	Buf buffer = NULL;
	int error_code = SLURM_SUCCESS;
	int state_fd, data_allocated = 0, data_read = 0, data_size = 0;
	DEF_TIMERS;

	xassert(dir_name != NULL);

	if (debug_flags & DEBUG_FLAG_SWITCH) {
		START_TIMER;
		info("switch_p_libstate_restore() starting");
	}
	_spawn_state_save_thread(xstrdup(dir_name));
	if (!recover)   /* clean start, no recovery */
		return nrt_init();

	file_name = xstrdup(dir_name);
	xstrcat(file_name, "/nrt_state");
	state_fd = open (file_name, O_RDONLY);
	if (state_fd >= 0) {
		data_allocated = NRT_BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read (state_fd, &data[data_size],
					  NRT_BUF_SIZE);
			if ((data_read < 0) && (errno == EINTR))
				continue;
			if (data_read < 0) {
				error ("Read error on %s, %m", file_name);
				error_code = SLURM_ERROR;
				break;
			} else if (data_read == 0)
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close (state_fd);
		xfree(file_name);
	} else {
		error("No %s file for switch/nrt state recovery", file_name);
		error("Starting switch/nrt with clean state");
		xfree(file_name);
		return nrt_init();
	}

	if (error_code == SLURM_SUCCESS) {
		buffer = create_buf (data, data_size);
		data = NULL;    /* now in buffer, don't xfree() */
		if (nrt_libstate_restore(buffer) < 0)
			error_code = SLURM_ERROR;
	}

	if (buffer)
		free_buf(buffer);
	xfree(data);
	if (debug_flags & DEBUG_FLAG_SWITCH) {
		END_TIMER;
		info("switch_p_libstate_restore() ending %s", TIME_STR);
	}

	return error_code;
}
Esempio n. 14
0
// create list of blocks based on map strings
struct block * get_mapbuf_str (char * str) {
    struct block * buffer = create_buf();
    unsigned short l = strlen(str);
    unsigned short i, j;
    unsigned short is_specialkey = 0;
    char sk[MAXSC+1];
    sk[0] = '\0';

    for (i=0; i<l; i++) {

        // Agrego special keys
        if (str[i] == '<') {
           is_specialkey = 1;

        } else if (str[i] == '>') {
           is_specialkey = 0;
           if (! strcmp(sk, "CR"))                                  // CR - ENTER key
               addto_buf(buffer, OKEY_ENTER);
           else if (! strcmp(sk, "TAB"))                            // TAB
               addto_buf(buffer, OKEY_TAB);
           else if (! strcmp(sk, "LEFT"))                           // LEFT
               addto_buf(buffer, OKEY_LEFT);
           else if (! strcmp(sk, "RIGHT"))                          // RIGHT
               addto_buf(buffer, OKEY_RIGHT);
           else if (! strcmp(sk, "DOWN"))                           // DOWN
               addto_buf(buffer, OKEY_DOWN);
           else if (! strcmp(sk, "UP"))                             // UP
               addto_buf(buffer, OKEY_UP);
           else if (! strcmp(sk, "DEL"))                            // DEL
               addto_buf(buffer, OKEY_DEL);
           else if (! strcmp(sk, "BS"))                             // BS
               addto_buf(buffer, OKEY_BS);
           else if (! strcmp(sk, "HOME"))                           // HOME
               addto_buf(buffer, OKEY_HOME);
           else if (! strcmp(sk, "END"))                            // END
               addto_buf(buffer, OKEY_END);
           else if (! strcmp(sk, "PGDOWN"))                         // PGDOWN
               addto_buf(buffer, OKEY_PGDOWN);
           else if (! strcmp(sk, "PGUP"))                           // PGUP
               addto_buf(buffer, OKEY_PGUP);
           else if (! strncmp(sk, "C-", 2) && strlen(sk) == 3       // C-X
                    && ( (sk[2] > 64 && sk[2] < 91) || (sk[2] > 96 && sk[2] < 123)) )
               addto_buf(buffer, ctl(tolower(sk[2])));

           sk[0]='\0';

        } else if (is_specialkey && strlen(sk) < MAXSC-1) {
           add_char(sk, str[i], strlen(sk));

        // Agrego otros caracteres
        } else {
           addto_buf(buffer, (int) str[i]);
        }
    }

    // en caso de que se tenga en el buffer un string del tipo "<algo", sin la terminación ">", se inserta en el buffer
    if (is_specialkey && i == l) {
        j = strlen(sk);
        addto_buf(buffer, '<');
        for (i=0; i<j; i++) addto_buf(buffer, (int) str[l-j+i]);
    }
    return buffer;
}
Esempio n. 15
0
extern int jobacctinfo_getinfo(
	jobacctinfo_t *jobacct, enum jobacct_data_type type, void *data,
	uint16_t protocol_version)
{
	int rc = SLURM_SUCCESS;
	int *fd = (int *)data;
	uint32_t *uint32 = (uint32_t *) data;
	uint64_t *uint64 = (uint64_t *) data;
	double *dub = (double *) data;
	jobacct_id_t *jobacct_id = (jobacct_id_t *) data;
	struct rusage *rusage = (struct rusage *)data;
	struct jobacctinfo *send = (struct jobacctinfo *) data;
	char *buf = NULL;

	if (!plugin_polling)
		return SLURM_SUCCESS;

	/* jobacct needs to be allocated before this is called.	*/
	xassert(jobacct);

	switch (type) {
	case JOBACCT_DATA_TOTAL:
		memcpy(send, jobacct, sizeof(struct jobacctinfo));
		break;
	case JOBACCT_DATA_PIPE:
		if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
			int len;
			Buf buffer;

			safe_read(*fd, &len, sizeof(int));
			buf = xmalloc(len);
			safe_read(*fd, buf, len);
			buffer = create_buf(buf, len);
			jobacctinfo_unpack(&jobacct, protocol_version,
					   PROTOCOL_TYPE_SLURM, buffer, 0);
			free_buf(buffer);
		}

		break;
	case JOBACCT_DATA_RUSAGE:
		memset(rusage, 0, sizeof(struct rusage));
		rusage->ru_utime.tv_sec = jobacct->user_cpu_sec;
		rusage->ru_utime.tv_usec = jobacct->user_cpu_usec;
		rusage->ru_stime.tv_sec = jobacct->sys_cpu_sec;
		rusage->ru_stime.tv_usec = jobacct->sys_cpu_usec;
		break;
	case JOBACCT_DATA_MAX_RSS:
		*uint64 = jobacct->max_rss;
		break;
	case JOBACCT_DATA_MAX_RSS_ID:
		*jobacct_id = jobacct->max_rss_id;
		break;
	case JOBACCT_DATA_TOT_RSS:
		*uint64 = jobacct->tot_rss;
		break;
	case JOBACCT_DATA_MAX_VSIZE:
		*uint64 = jobacct->max_vsize;
		break;
	case JOBACCT_DATA_MAX_VSIZE_ID:
		*jobacct_id = jobacct->max_vsize_id;
		break;
	case JOBACCT_DATA_TOT_VSIZE:
		*uint64 = jobacct->tot_vsize;
		break;
	case JOBACCT_DATA_MAX_PAGES:
		*uint64 = jobacct->max_pages;
		break;
	case JOBACCT_DATA_MAX_PAGES_ID:
		*jobacct_id = jobacct->max_pages_id;
		break;
	case JOBACCT_DATA_TOT_PAGES:
		*uint64 = jobacct->tot_pages;
		break;
	case JOBACCT_DATA_MIN_CPU:
		*uint32 = jobacct->min_cpu;
		break;
	case JOBACCT_DATA_MIN_CPU_ID:
		*jobacct_id = jobacct->min_cpu_id;
		break;
	case JOBACCT_DATA_TOT_CPU:
		*dub = jobacct->tot_cpu;
		break;
	case JOBACCT_DATA_ACT_CPUFREQ:
		*uint32 = jobacct->act_cpufreq;
		break;
	case JOBACCT_DATA_CONSUMED_ENERGY:
		*uint64 = jobacct->energy.consumed_energy;
		break;
	case JOBACCT_DATA_MAX_DISK_READ:
		*dub = jobacct->max_disk_read;
		break;
	case JOBACCT_DATA_MAX_DISK_READ_ID:
		*jobacct_id = jobacct->max_disk_read_id;
		break;
	case JOBACCT_DATA_TOT_DISK_READ:
		*dub = jobacct->tot_disk_read;
		break;
	case JOBACCT_DATA_MAX_DISK_WRITE:
		*dub = jobacct->max_disk_write;
		break;
	case JOBACCT_DATA_MAX_DISK_WRITE_ID:
		*jobacct_id = jobacct->max_disk_write_id;
		break;
	case JOBACCT_DATA_TOT_DISK_WRITE:
		*dub = jobacct->tot_disk_write;
		break;
	default:
		debug("jobacct_g_set_getinfo data_type %d invalid", type);
	}
	return rc;

rwfail:
	xfree(buf);
	return SLURM_ERROR;
}
Esempio n. 16
0
File: sicp.c Progetto: rohgarg/slurm
static void _load_sicp_state(void)
{
    int data_allocated, data_read = 0;
    uint32_t data_size = 0;
    int state_fd, sicp_cnt = 0;
    char *data = NULL, *state_file;
    struct stat stat_buf;
    Buf buffer;
    char *ver_str = NULL;
    uint32_t ver_str_len;
    uint16_t protocol_version = (uint16_t)NO_VAL;
    uint32_t job_id = 0;
    uint32_t job_state = 0;
    sicp_job_t *sicp_ptr;
    time_t buf_time, now;

    /* read the file */
    lock_state_files();
    state_file = xstrdup(slurmctld_conf.state_save_location);
    xstrcat(state_file, "/sicp_state");
    state_fd = open(state_file, O_RDONLY);
    if (state_fd < 0) {
        error("Could not open job state file %s: %m", state_file);
        unlock_state_files();
        xfree(state_file);
        return;
    } else if (fstat(state_fd, &stat_buf) < 0) {
        error("Could not stat job state file %s: %m", state_file);
        unlock_state_files();
        (void) close(state_fd);
        xfree(state_file);
        return;
    } else if (stat_buf.st_size < 10) {
        error("Job state file %s too small", state_file);
        unlock_state_files();
        (void) close(state_fd);
        xfree(state_file);
        return;
    }

    data_allocated = BUF_SIZE;
    data = xmalloc(data_allocated);
    while (1) {
        data_read = read(state_fd, &data[data_size], BUF_SIZE);
        if (data_read < 0) {
            if (errno == EINTR)
                continue;
            else {
                error("Read error on %s: %m", state_file);
                break;
            }
        } else if (data_read == 0)	/* eof */
            break;
        data_size      += data_read;
        data_allocated += data_read;
        xrealloc(data, data_allocated);
    }
    close(state_fd);
    xfree(state_file);
    unlock_state_files();

    buffer = create_buf(data, data_size);
    safe_unpackstr_xmalloc(&ver_str, &ver_str_len, buffer);
    debug3("Version string in sicp_state header is %s", ver_str);
    if (ver_str && !strcmp(ver_str, "PROTOCOL_VERSION"))
        safe_unpack16(&protocol_version, buffer);
    xfree(ver_str);

    if (protocol_version == (uint16_t)NO_VAL) {
        error("************************************************");
        error("Can not recover SICP state, incompatible version");
        error("************************************************");
        xfree(ver_str);
        free_buf(buffer);
        return;
    }
    safe_unpack_time(&buf_time, buffer);

    now = time(NULL);
    while (remaining_buf(buffer) > 0) {
        safe_unpack32(&job_id,    buffer);
        safe_unpack32(&job_state, buffer);
        sicp_ptr = xmalloc(sizeof(sicp_job_t));
        sicp_ptr->job_id      = job_id;
        sicp_ptr->job_state   = job_state;
        sicp_ptr->update_time = now;
        list_append(sicp_job_list, sicp_ptr);
        _add_job_hash(sicp_ptr);
        sicp_cnt++;
    }

    free_buf(buffer);
    info("Recovered information about %d sicp jobs", sicp_cnt);
    if (slurm_get_debug_flags() & DEBUG_FLAG_SICP)
        _log_sicp_recs();
    return;

unpack_error:
    error("Incomplete sicp data checkpoint file");
    info("Recovered information about %d sicp jobs", sicp_cnt);
    free_buf(buffer);
    return;
}
Esempio n. 17
0
static void _read_last_decay_ran(time_t *last_ran, time_t *last_reset)
{
	int data_allocated, data_read = 0;
	uint32_t data_size = 0;
	int state_fd;
	char *data = NULL, *state_file;
	Buf buffer;

	xassert(last_ran);
	xassert(last_reset);

	(*last_ran) = 0;
	(*last_reset) = 0;

	/* read the file */
	state_file = xstrdup(slurmctld_conf.state_save_location);
	xstrcat(state_file, "/priority_last_decay_ran");
	lock_state_files();
	state_fd = open(state_file, O_RDONLY);
	if (state_fd < 0) {
		info("No last decay (%s) to recover", state_file);
		unlock_state_files();
		return;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
					      state_file);
					break;
				}
			} else if (data_read == 0)	/* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);
	unlock_state_files();

	buffer = create_buf(data, data_size);
	safe_unpack_time(last_ran, buffer);
	safe_unpack_time(last_reset, buffer);
	free_buf(buffer);
	if (priority_debug)
		info("Last ran decay on jobs at %ld", (long)*last_ran);

	return;

unpack_error:
	error("Incomplete priority last decay file returning");
	free_buf(buffer);
	return;

}
void init_interconnect (char* config_file,
                        unsigned int n_shader, 
                        unsigned int n_mem )
{
   _n_shader = n_shader;
   _n_mem = n_mem;
   if (! config_file ) {
      cout << "Interconnect Requires a configfile" << endl;
      exit (-1);
   }
   icnt_config.Parse( config_file );

   net_c = icnt_config.GetInt( "network_count" );
   if (net_c==2) {
      doub_net = true;    
   } else if (net_c<1 || net_c>2) {
      cout <<net_c<<" Network_count less than 1 or more than 2 not supported."<<endl;
      abort();
   }

   g_num_vcs = icnt_config.GetInt( "num_vcs" );
   InitializeRoutingMap( );
   InitializeTrafficMap( );
   InitializeInjectionMap( );

   RandomSeed( icnt_config.GetInt("seed") );

   Network_gpgpu **net;

   traffic = new TrafficManager *[net_c];
   net = new Network_gpgpu *[net_c];

   for (unsigned i=0;i<net_c;i++) {
      string topo;

      icnt_config.GetStr( "topology", topo );

      if ( topo == "torus" ) {
         net[i] = new KNCube( icnt_config, true ); 
      } else if (   topo =="mesh"  ) {
         net[i] = new KNCube( icnt_config, false );
      } else if ( topo == "fly" ) {
         net[i] = new KNFly( icnt_config );
      } else if ( topo == "single" ) {
         net[i] = new SingleNet( icnt_config );
      } else {
         cerr << "Unknown topology " << topo << endl;
         exit(-1);
      }

      if ( icnt_config.GetInt( "link_failures" ) ) {
         net[i]->InsertRandomFaults( icnt_config );
      }

      traffic[i] = new TrafficManager ( icnt_config, net[i], i/*id*/ );
   }

   fixed_lat_icnt = icnt_config.GetInt( "fixed_lat_per_hop" );

   if (icnt_config.GetInt( "perfect_icnt" )) {
      perfect_icnt = true;
      fixed_lat_icnt = 1;  
   }
   _flit_size = icnt_config.GetInt( "flit_size" );
   if (icnt_config.GetInt("ejection_buf_size")) {
      ejection_buffer_capacity = icnt_config.GetInt( "ejection_buf_size" ) ;
   } else {
      ejection_buffer_capacity = icnt_config.GetInt( "vc_buf_size" );
   }
   boundary_buf_capacity = icnt_config.GetInt( "boundary_buf_size" ) ;
   if (icnt_config.GetInt("input_buf_size")) {
      input_buffer_capacity = icnt_config.GetInt("input_buf_size");
   } else {
      input_buffer_capacity = 9;
   }
   create_buf(traffic[0]->_dests,input_buffer_capacity,icnt_config.GetInt( "num_vcs" )); 
   MATLAB_OUTPUT        = icnt_config.GetInt("MATLAB_OUTPUT");
   DISPLAY_LAT_DIST     = icnt_config.GetInt("DISPLAY_LAT_DIST");
   DISPLAY_HOP_DIST     = icnt_config.GetInt("DISPLAY_HOP_DIST");
   DISPLAY_PAIR_LATENCY = icnt_config.GetInt("DISPLAY_PAIR_LATENCY");
   create_node_map(n_shader,n_mem,traffic[0]->_dests, icnt_config.GetInt("use_map"));
   for (unsigned i=0;i<net_c;i++) {
      traffic[i]->_FirstStep();
   }
}
Esempio n. 19
0
int switch_p_libstate_restore(char *dir_name, bool recover)
{
#ifdef HAVE_NATIVE_CRAY
    char *data = NULL, *file_name;
    Buf buffer = NULL;
    int error_code = SLURM_SUCCESS;
    int state_fd, data_allocated = 0, data_read = 0, data_size = 0;

    xassert(dir_name != NULL);

    if (debug_flags & DEBUG_FLAG_SWITCH) {
        CRAY_INFO("restore from %s, recover %d",
                  dir_name,  (int) recover);
    }

    if (!recover)		/* clean start, no recovery */
        return SLURM_SUCCESS;

    file_name = xstrdup(dir_name);
    xstrcat(file_name, "/switch_cray_state");
    state_fd = open (file_name, O_RDONLY);
    if (state_fd >= 0) {
        data_allocated = SWITCH_BUF_SIZE;
        data = xmalloc(data_allocated);
        while (1) {
            data_read = read (state_fd, &data[data_size],
                              SWITCH_BUF_SIZE);
            if ((data_read < 0) && (errno == EINTR))
                continue;
            if (data_read < 0) {
                CRAY_ERR("Read error on %s, %m", file_name);
                error_code = SLURM_ERROR;
                break;
            } else if (data_read == 0)
                break;
            data_size      += data_read;
            data_allocated += data_read;
            xrealloc(data, data_allocated);
        }
        close (state_fd);
        (void) unlink(file_name);	/* One chance to recover */
        xfree(file_name);
    } else {
        CRAY_ERR("No %s file for switch/cray state recovery",
                 file_name);
        CRAY_ERR("Starting switch/cray with clean state");
        xfree(file_name);
        return SLURM_SUCCESS;
    }

    if (error_code == SLURM_SUCCESS) {
        buffer = create_buf (data, data_size);
        data = NULL;	/* now in buffer, don't xfree() */
        _state_read_buf(buffer);
    }

    if (buffer)
        free_buf(buffer);
    xfree(data);
#endif
    return SLURM_SUCCESS;
}
Esempio n. 20
0
static void _process_server_request(pmixp_base_hdr_t *hdr, Buf buf)
{
	int rc;

	switch (hdr->type) {
	case PMIXP_MSG_FAN_IN:
	case PMIXP_MSG_FAN_OUT: {
		pmixp_coll_t *coll;
		pmixp_proc_t *procs = NULL;
		size_t nprocs = 0;
		pmixp_coll_type_t type = 0;
		int c_nodeid;

		rc = pmixp_coll_unpack_info(buf, &type, &c_nodeid,
					    &procs, &nprocs);
		if (SLURM_SUCCESS != rc) {
			char *nodename = pmixp_info_job_host(hdr->nodeid);
			PMIXP_ERROR("Bad message header from node %s",
				    nodename);
			xfree(nodename);
			goto exit;
		}
		coll = pmixp_state_coll_get(type, procs, nprocs);
		xfree(procs);

		PMIXP_DEBUG("FENCE collective message from nodeid = %u, "
			    "type = %s, seq = %d",
			    hdr->nodeid,
			    ((PMIXP_MSG_FAN_IN == hdr->type) ?
				     "fan-in" : "fan-out"),
			    hdr->seq);
		rc = pmixp_coll_check_seq(coll, hdr->seq);
		if (PMIXP_COLL_REQ_FAILURE == rc) {
			/* this is unexepable event: either something went
			 * really wrong or the state machine is incorrect.
			 * This will 100% lead to application hang.
			 */
			char *nodename = pmixp_info_job_host(hdr->nodeid);
			PMIXP_ERROR("Bad collective seq. #%d from %s, current"
				    " is %d",
				    hdr->seq, nodename, coll->seq);
			pmixp_debug_hang(0); /* enable hang to debug this! */
			slurm_kill_job_step(pmixp_info_jobid(),
					    pmixp_info_stepid(), SIGKILL);
			xfree(nodename);
			break;
		} else if (PMIXP_COLL_REQ_SKIP == rc) {
			PMIXP_DEBUG("Wrong collective seq. #%d from"
				    " nodeid %u, current is %d, skip "
				    "this message",
				    hdr->seq, hdr->nodeid, coll->seq);
			goto exit;
		}

		if (PMIXP_MSG_FAN_IN == hdr->type) {
			pmixp_coll_contrib_child(coll, hdr->nodeid,
						 hdr->seq, buf);
		} else {
			pmixp_coll_contrib_parent(coll, hdr->nodeid,
						  hdr->seq, buf);
		}

		break;
	}
	case PMIXP_MSG_DMDX: {
		pmixp_dmdx_process(buf, hdr->nodeid, hdr->seq);
		/* buf will be free'd by the PMIx callback so
		 * protect the data by voiding the buffer.
		 * Use the statement below instead of (buf = NULL)
		 * to maintain incapsulation - in general `buf`is
		 * not a pointer, but opaque type.
		 */
		buf = create_buf(NULL, 0);
		break;
	}
	case PMIXP_MSG_INIT_DIRECT:
		PMIXP_DEBUG("Direct connection init from %d", hdr->nodeid);
		break;
#ifndef NDEBUG
	case PMIXP_MSG_PINGPONG: {
		/* if the pingpong mode was activated -
		 * node 0 sends ping requests
		 * and receiver assumed to respond back to node 0
		 */
		int msize = remaining_buf(buf);

		if (pmixp_info_nodeid()) {
			pmixp_server_pp_send(0, msize);
		} else {
			if (pmixp_server_pp_same_thread()) {
				if (pmixp_server_pp_count() ==
				    pmixp_server_pp_warmups()) {
					pmixp_server_pp_start();
				}
				if (!pmixp_server_pp_check_fini(msize)) {
					pmixp_server_pp_send(1, msize);
				}
			}
		}
		pmixp_server_pp_inc();
		break;
	}
#endif
	default:
		PMIXP_ERROR("Unknown message type %d", hdr->type);
		break;
	}

exit:
	free_buf(buf);
}
Esempio n. 21
0
static void _process_server_request(recv_header_t *_hdr, void *payload)
{
	send_header_t *hdr = &_hdr->send_hdr;
	char *nodename = pmixp_info_job_host(hdr->nodeid);
	Buf buf;
	int rc;

	buf = create_buf(payload, hdr->msgsize);

	switch (hdr->type) {
	case PMIXP_MSG_FAN_IN:
	case PMIXP_MSG_FAN_OUT: {
		pmixp_coll_t *coll;
		pmix_proc_t *procs = NULL;
		size_t nprocs = 0;
		pmixp_coll_type_t type = 0;

		rc = pmixp_coll_unpack_ranges(buf, &type, &procs, &nprocs);
		if (SLURM_SUCCESS != rc) {
			PMIXP_ERROR("Bad message header from node %s", nodename);
			return;
		}
		coll = pmixp_state_coll_get(type, procs, nprocs);
		xfree(procs);

		PMIXP_DEBUG("FENCE collective message from node \"%s\", type = %s, seq = %d",
			    nodename, (PMIXP_MSG_FAN_IN == hdr->type) ? "fan-in" : "fan-out",
			    hdr->seq);
		rc = pmixp_coll_check_seq(coll, hdr->seq, nodename);
		if (PMIXP_COLL_REQ_FAILURE == rc) {
			/* this is unexepable event: either something went
			 * really wrong or the state machine is incorrect.
			 * This will 100% lead to application hang.
			 */
			PMIXP_ERROR("Bad collective seq. #%d from %s, current is %d",
				    hdr->seq, nodename, coll->seq);
			pmixp_debug_hang(0); /* enable hang to debug this! */
			slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(),
					    SIGKILL);

			break;
		} else if (PMIXP_COLL_REQ_SKIP == rc) {
			PMIXP_DEBUG("Wrong collective seq. #%d from %s, current is %d, skip this message",
				    hdr->seq, nodename, coll->seq);
			free_buf(buf);
			break;
		}

		if (PMIXP_MSG_FAN_IN == hdr->type) {
			pmixp_coll_contrib_node(coll, nodename, buf);
			/* we don't need this buffer anymore */
			free_buf(buf);
		} else {
			pmixp_coll_bcast(coll, buf);
			/* buf will be free'd by the PMIx callback */
		}

		break;
	}
	case PMIXP_MSG_DMDX: {
		pmixp_dmdx_process(buf, nodename, hdr->seq);
		break;
	}
	case PMIXP_MSG_HEALTH_CHK: {
		/* this is just health ping.
		 * TODO: can we do something more sophisticated?
		 */
		free_buf(buf);
		break;
	}
	default:
		PMIXP_ERROR("Unknown message type %d", hdr->type);
		break;
	}
	xfree(nodename);
}
Esempio n. 22
0
extern slurmdb_federation_rec_t *fed_mgr_state_load(char *state_save_location)
{
	Buf buffer = NULL;
	char *data = NULL, *state_file;
	time_t buf_time;
	uint16_t ver = 0;
	uint32_t data_size = 0;
	int state_fd;
	int data_allocated, data_read = 0, error_code = SLURM_SUCCESS;
	slurmdb_federation_rec_t *ret_fed = NULL;

	state_file = xstrdup_printf("%s/%s", state_save_location,
				    FED_MGR_STATE_FILE);
	state_fd = open(state_file, O_RDONLY);
	if (state_fd < 0) {
		error("No fed_mgr state file (%s) to recover", state_file);
		xfree(state_file);
		return NULL;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
					      state_file);
					break;
				}
			} else if (data_read == 0)	/* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);

	buffer = create_buf(data, data_size);

	safe_unpack16(&ver, buffer);

	debug3("Version in fed_mgr_state header is %u", ver);
	if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) {
		error("***********************************************");
		error("Can not recover fed_mgr state, incompatible version, "
		      "got %u need > %u <= %u", ver,
		      SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION);
		error("***********************************************");
		free_buf(buffer);
		return NULL;
	}

	safe_unpack_time(&buf_time, buffer);

	error_code = slurmdb_unpack_federation_rec((void **)&ret_fed, ver,
						   buffer);
	if (error_code != SLURM_SUCCESS)
		goto unpack_error;
	else if (!ret_fed || !ret_fed->name ||
		 !list_count(ret_fed->cluster_list)) {
		slurmdb_destroy_federation_rec(ret_fed);
		ret_fed = NULL;
		error("No feds retrieved");
	} else {
		/* We want to free the connections here since they don't exist
		 * anymore, but they were packed when state was saved. */
		slurmdb_cluster_rec_t *cluster;
		ListIterator itr = list_iterator_create(
			ret_fed->cluster_list);
		while ((cluster = list_next(itr))) {
			slurm_persist_conn_destroy(cluster->fed.recv);
			cluster->fed.recv = NULL;
			slurm_persist_conn_destroy(cluster->fed.send);
			cluster->fed.send = NULL;
		}
		list_iterator_destroy(itr);
	}

	free_buf(buffer);

	return ret_fed;

unpack_error:
	free_buf(buffer);

	return NULL;
}
Esempio n. 23
0
/*
 * load_all_front_end_state - Load the front_end node state from file, recover
 *	on slurmctld restart. Execute this after loading the configuration
 *	file data. Data goes into common storage.
 * IN state_only - if true, overwrite only front_end node state and reason
 *	Use this to overwrite the "UNKNOWN state typically used in slurm.conf
 * RET 0 or error code
 * NOTE: READ lock_slurmctld config before entry
 */
extern int load_all_front_end_state(bool state_only)
{
#ifdef HAVE_FRONT_END
	char *node_name = NULL, *reason = NULL, *data = NULL, *state_file;
	int data_allocated, data_read = 0, error_code = 0, node_cnt = 0;
	uint16_t node_state;
	uint32_t data_size = 0, name_len;
	uint32_t reason_uid = NO_VAL;
	time_t reason_time = 0;
	front_end_record_t *front_end_ptr;
	int state_fd;
	time_t time_stamp;
	Buf buffer;
	char *ver_str = NULL;
	uint16_t protocol_version = (uint16_t) NO_VAL;

	/* read the file */
	lock_state_files ();
	state_fd = _open_front_end_state_file(&state_file);
	if (state_fd < 0) {
		info ("No node state file (%s) to recover", state_file);
		error_code = ENOENT;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size], BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error ("Read error on %s: %m",
						state_file);
					break;
				}
			} else if (data_read == 0)     /* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close (state_fd);
	}
	xfree (state_file);
	unlock_state_files ();

	buffer = create_buf (data, data_size);

	safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
	debug3("Version string in front_end_state header is %s", ver_str);
	if (ver_str) {
		if (!strcmp(ver_str, FRONT_END_STATE_VERSION)) {
			protocol_version = SLURM_PROTOCOL_VERSION;
		}
	}

	if (protocol_version == (uint16_t) NO_VAL) {
		error("*****************************************************");
		error("Can not recover front_end state, version incompatible");
		error("*****************************************************");
		xfree(ver_str);
		free_buf(buffer);
		return EFAULT;
	}
	xfree(ver_str);

	safe_unpack_time(&time_stamp, buffer);

	while (remaining_buf (buffer) > 0) {
		uint16_t base_state;
		if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc (&node_name, &name_len, buffer);
			safe_unpack16 (&node_state,  buffer);
			safe_unpackstr_xmalloc (&reason,    &name_len, buffer);
			safe_unpack_time (&reason_time, buffer);
			safe_unpack32 (&reason_uid,  buffer);
			base_state = node_state & NODE_STATE_BASE;
		} else
			goto unpack_error;

		/* validity test as possible */

		/* find record and perform update */
		front_end_ptr = find_front_end_record(node_name);
		if (front_end_ptr == NULL) {
			error("Front_end node %s has vanished from "
			      "configuration", node_name);
		} else if (state_only) {
			uint16_t orig_flags;
			orig_flags = front_end_ptr->node_state &
				     NODE_STATE_FLAGS;
			node_cnt++;
			if (IS_NODE_UNKNOWN(front_end_ptr)) {
				if (base_state == NODE_STATE_DOWN) {
					orig_flags &= (~NODE_STATE_COMPLETING);
					front_end_ptr->node_state =
						NODE_STATE_DOWN | orig_flags;
				}
				if (node_state & NODE_STATE_DRAIN) {
					 front_end_ptr->node_state |=
						 NODE_STATE_DRAIN;
				}
				if (node_state & NODE_STATE_FAIL) {
					front_end_ptr->node_state |=
						NODE_STATE_FAIL;
				}
			}
			if (front_end_ptr->reason == NULL) {
				front_end_ptr->reason = reason;
				reason = NULL;	/* Nothing to free */
				front_end_ptr->reason_time = reason_time;
				front_end_ptr->reason_uid = reason_uid;
			}
		} else {
			node_cnt++;
			front_end_ptr->node_state = node_state;
			xfree(front_end_ptr->reason);
			front_end_ptr->reason	= reason;
			reason			= NULL;	/* Nothing to free */
			front_end_ptr->reason_time	= reason_time;
			front_end_ptr->reason_uid	= reason_uid;
			front_end_ptr->last_response	= (time_t) 0;
		}

		xfree(node_name);
		xfree(reason);
	}

fini:	info("Recovered state of %d front_end nodes", node_cnt);
	free_buf (buffer);
	return error_code;

unpack_error:
	error("Incomplete front_end node data checkpoint file");
	error_code = EFAULT;
	xfree (node_name);
	xfree(reason);
	goto fini;
#else
	return 0;
#endif
}
Esempio n. 24
0
extern Buf slurm_persist_recv_msg(slurm_persist_conn_t *persist_conn)
{
	uint32_t msg_size, nw_size;
	char *msg;
	ssize_t msg_read, offset;
	Buf buffer;

	xassert(persist_conn);

	if (persist_conn->fd < 0)
		return NULL;

	if (!_conn_readable(persist_conn))
		goto endit;

	msg_read = read(persist_conn->fd, &nw_size, sizeof(nw_size));
	if (msg_read != sizeof(nw_size))
		goto endit;
	msg_size = ntohl(nw_size);
	/* We don't error check for an upper limit here
	 * since size could possibly be massive */
	if (msg_size < 2) {
		error("Persistent Conn: Invalid msg_size (%u)", msg_size);
		goto endit;
	}

	msg = xmalloc(msg_size);
	offset = 0;
	while (msg_size > offset) {
		if (!_conn_readable(persist_conn))
			break;		/* problem with this socket */
		msg_read = read(persist_conn->fd, (msg + offset),
				(msg_size - offset));
		if (msg_read <= 0) {
			error("Persistent Conn: read: %m");
			break;
		}
		offset += msg_read;
	}
	if (msg_size != offset) {
		if (!(*persist_conn->shutdown)) {
			error("Persistent Conn: only read %zd of %d bytes",
			      offset, msg_size);
		}	/* else in shutdown mode */
		xfree(msg);
		goto endit;
	}

	buffer = create_buf(msg, msg_size);
	return buffer;

endit:
	/* Close it since we abandoned it.  If the connection does still exist
	 * on the other end we can't rely on it after this point since we didn't
	 * listen long enough for this response.
	 */
	if (!(*persist_conn->shutdown) &&
	    persist_conn->flags & PERSIST_FLAG_RECONNECT)
		slurm_persist_conn_reopen(persist_conn, true);

	return NULL;
}
Esempio n. 25
0
/*
 *  This function handles the initialization information from slurmd
 *  sent by _send_slurmstepd_init() in src/slurmd/slurmd/req.c.
 */
static int
_init_from_slurmd(int sock, char **argv,
		  slurm_addr_t **_cli, slurm_addr_t **_self, slurm_msg_t **_msg,
		  int *_ngids, gid_t **_gids)
{
	char *incoming_buffer = NULL;
	Buf buffer;
	int step_type;
	int len;
	slurm_addr_t *cli = NULL;
	slurm_addr_t *self = NULL;
	slurm_msg_t *msg = NULL;
	int ngids = 0;
	gid_t *gids = NULL;
	uint16_t port;
	char buf[16];
	log_options_t lopts = LOG_OPTS_INITIALIZER;

	log_init(argv[0], lopts, LOG_DAEMON, NULL);

	/* receive job type from slurmd */
	safe_read(sock, &step_type, sizeof(int));
	debug3("step_type = %d", step_type);

	/* receive reverse-tree info from slurmd */
	pthread_mutex_lock(&step_complete.lock);
	safe_read(sock, &step_complete.rank, sizeof(int));
	safe_read(sock, &step_complete.parent_rank, sizeof(int));
	safe_read(sock, &step_complete.children, sizeof(int));
	safe_read(sock, &step_complete.depth, sizeof(int));
	safe_read(sock, &step_complete.max_depth, sizeof(int));
	safe_read(sock, &step_complete.parent_addr, sizeof(slurm_addr_t));
	step_complete.bits = bit_alloc(step_complete.children);
	step_complete.jobacct = jobacct_gather_g_create(NULL);
	pthread_mutex_unlock(&step_complete.lock);

	/* receive conf from slurmd */
	if ((conf = read_slurmd_conf_lite (sock)) == NULL)
		fatal("Failed to read conf from slurmd");
	log_alter(conf->log_opts, 0, conf->logfile);

	debug2("debug level is %d.", conf->debug_level);
	/* acct info */
	jobacct_gather_g_startpoll(conf->job_acct_gather_freq);

	switch_g_slurmd_step_init();

	slurm_get_ip_str(&step_complete.parent_addr, &port, buf, 16);
	debug3("slurmstepd rank %d, parent address = %s, port = %u",
	       step_complete.rank, buf, port);

	/* receive cli from slurmd */
	safe_read(sock, &len, sizeof(int));
	incoming_buffer = xmalloc(sizeof(char) * len);
	safe_read(sock, incoming_buffer, len);
	buffer = create_buf(incoming_buffer,len);
	cli = xmalloc(sizeof(slurm_addr_t));
	if(slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR)
		fatal("slurmstepd: problem with unpack of slurmd_conf");
	free_buf(buffer);

	/* receive self from slurmd */
	safe_read(sock, &len, sizeof(int));
	if (len > 0) {
		/* receive packed self from main slurmd */
		incoming_buffer = xmalloc(sizeof(char) * len);
		safe_read(sock, incoming_buffer, len);
		buffer = create_buf(incoming_buffer,len);
		self = xmalloc(sizeof(slurm_addr_t));
		if (slurm_unpack_slurm_addr_no_alloc(self, buffer)
		    == SLURM_ERROR) {
			fatal("slurmstepd: problem with unpack of "
			      "slurmd_conf");
		}
		free_buf(buffer);
	}

	/* Receive GRES information from slurmd */
	gres_plugin_recv_stepd(sock);

	/* receive req from slurmd */
	safe_read(sock, &len, sizeof(int));
	incoming_buffer = xmalloc(sizeof(char) * len);
	safe_read(sock, incoming_buffer, len);
	buffer = create_buf(incoming_buffer,len);

	msg = xmalloc(sizeof(slurm_msg_t));
	slurm_msg_t_init(msg);

	switch(step_type) {
	case LAUNCH_BATCH_JOB:
		msg->msg_type = REQUEST_BATCH_JOB_LAUNCH;
		break;
	case LAUNCH_TASKS:
		msg->msg_type = REQUEST_LAUNCH_TASKS;
		break;
	default:
		fatal("Unrecognized launch RPC");
		break;
	}
	if(unpack_msg(msg, buffer) == SLURM_ERROR)
		fatal("slurmstepd: we didn't unpack the request correctly");
	free_buf(buffer);

	/* receive cached group ids array for the relevant uid */
	safe_read(sock, &ngids, sizeof(int));
	if (ngids > 0) {
		int i;
		uint32_t tmp32;

		gids = (gid_t *)xmalloc(sizeof(gid_t) * ngids);
		for (i = 0; i < ngids; i++) {
			safe_read(sock, &tmp32, sizeof(uint32_t));
			gids[i] = (gid_t)tmp32;
			debug2("got gid %d", gids[i]);
		}
	}

	*_cli = cli;
	*_self = self;
	*_msg = msg;
	*_ngids = ngids;
	*_gids = gids;

	return 1;

rwfail:
	fatal("Error reading initialization data from slurmd");
	exit(1);
}
Esempio n. 26
0
/*
 * load_all_part_state - load the partition state from file, recover on
 *	slurmctld restart. execute this after loading the configuration
 *	file data.
 * NOTE: READ lock_slurmctld config before entry
 */
int load_all_part_state(void)
{
	char *part_name = NULL, *allow_groups = NULL, *nodes = NULL;
	char *state_file, *data = NULL;
	uint32_t max_time, default_time, max_nodes, min_nodes;
	uint32_t max_cpus_per_node = INFINITE, grace_time = 0;
	time_t time;
	uint16_t flags;
	uint16_t max_share, preempt_mode, priority, state_up, cr_type;
	struct part_record *part_ptr;
	uint32_t data_size = 0, name_len;
	int data_allocated, data_read = 0, error_code = 0, part_cnt = 0;
	int state_fd;
	Buf buffer;
	char *ver_str = NULL;
	char* allow_alloc_nodes = NULL;
	uint16_t protocol_version = (uint16_t)NO_VAL;
	char* alternate = NULL;

	/* read the file */
	lock_state_files();
	state_fd = _open_part_state_file(&state_file);
	if (state_fd < 0) {
		info("No partition state file (%s) to recover",
		     state_file);
		error_code = ENOENT;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if  (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
						state_file);
					break;
				}
			} else if (data_read == 0)     /* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);
	unlock_state_files();

	buffer = create_buf(data, data_size);

	safe_unpackstr_xmalloc( &ver_str, &name_len, buffer);
	debug3("Version string in part_state header is %s", ver_str);
	if (ver_str) {
		if (!strcmp(ver_str, PART_STATE_VERSION)) {
			protocol_version = SLURM_PROTOCOL_VERSION;
		} else if (!strcmp(ver_str, PART_2_5_STATE_VERSION)) {
			protocol_version = SLURM_2_5_PROTOCOL_VERSION;
		}
	}

	if (protocol_version == (uint16_t)NO_VAL) {
		error("**********************************************************");
		error("Can not recover partition state, data version incompatible");
		error("**********************************************************");
		xfree(ver_str);
		free_buf(buffer);
		return EFAULT;
	}
	xfree(ver_str);
	safe_unpack_time(&time, buffer);

	while (remaining_buf(buffer) > 0) {
		if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_cpus_per_node, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;

			safe_unpack16(&state_up, buffer);
			safe_unpack16(&cr_type, buffer);

			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) {
			safe_unpackstr_xmalloc(&part_name, &name_len, buffer);
			safe_unpack32(&grace_time, buffer);
			safe_unpack32(&max_time, buffer);
			safe_unpack32(&default_time, buffer);
			safe_unpack32(&max_nodes, buffer);
			safe_unpack32(&min_nodes, buffer);

			safe_unpack16(&flags,        buffer);
			safe_unpack16(&max_share,    buffer);
			safe_unpack16(&preempt_mode, buffer);
			safe_unpack16(&priority,     buffer);

			if (priority > part_max_priority)
				part_max_priority = priority;
			cr_type = 0;	/* Default value */

			safe_unpack16(&state_up, buffer);
			safe_unpackstr_xmalloc(&allow_groups,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&allow_alloc_nodes,
					       &name_len, buffer);
			safe_unpackstr_xmalloc(&alternate, &name_len, buffer);
			safe_unpackstr_xmalloc(&nodes, &name_len, buffer);
			if ((flags & PART_FLAG_DEFAULT_CLR) ||
			    (flags & PART_FLAG_HIDDEN_CLR)  ||
			    (flags & PART_FLAG_NO_ROOT_CLR) ||
			    (flags & PART_FLAG_ROOT_ONLY_CLR) ||
			    (flags & PART_FLAG_REQ_RESV_CLR)) {
				error("Invalid data for partition %s: flags=%u",
				      part_name, flags);
				error_code = EINVAL;
			}
		} else {
			error("load_all_part_state: protocol_version "
			      "%hu not supported", protocol_version);
			goto unpack_error;
		}
		/* validity test as possible */
		if (state_up > PARTITION_UP) {
			error("Invalid data for partition %s: state_up=%u",
			      part_name, state_up);
			error_code = EINVAL;
		}
		if (error_code) {
			error("No more partition data will be processed from "
			      "the checkpoint file");
			xfree(allow_groups);
			xfree(allow_alloc_nodes);
			xfree(alternate);
			xfree(part_name);
			xfree(nodes);
			error_code = EINVAL;
			break;
		}

		/* find record and perform update */
		part_ptr = list_find_first(part_list, &list_find_part,
					   part_name);
		part_cnt++;
		if (part_ptr == NULL) {
			info("load_all_part_state: partition %s missing from "
				"configuration file", part_name);
			part_ptr = create_part_record();
			xfree(part_ptr->name);
			part_ptr->name = xstrdup(part_name);
		}

		part_ptr->flags          = flags;
		if (part_ptr->flags & PART_FLAG_DEFAULT) {
			xfree(default_part_name);
			default_part_name = xstrdup(part_name);
			default_part_loc = part_ptr;
		}
		part_ptr->max_time       = max_time;
		part_ptr->default_time   = default_time;
		part_ptr->max_cpus_per_node = max_cpus_per_node;
		part_ptr->max_nodes      = max_nodes;
		part_ptr->max_nodes_orig = max_nodes;
		part_ptr->min_nodes      = min_nodes;
		part_ptr->min_nodes_orig = min_nodes;
		part_ptr->max_share      = max_share;
		part_ptr->grace_time     = grace_time;
		if (preempt_mode != (uint16_t) NO_VAL)
			part_ptr->preempt_mode   = preempt_mode;
		part_ptr->priority       = priority;
		part_ptr->state_up       = state_up;
		part_ptr->cr_type	 = cr_type;
		xfree(part_ptr->allow_groups);
		part_ptr->allow_groups   = allow_groups;
		xfree(part_ptr->allow_alloc_nodes);
		part_ptr->allow_alloc_nodes   = allow_alloc_nodes;
		xfree(part_ptr->alternate);
		part_ptr->alternate      = alternate;
		xfree(part_ptr->nodes);
		part_ptr->nodes = nodes;

		xfree(part_name);
	}

	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return error_code;

      unpack_error:
	error("Incomplete partition data checkpoint file");
	info("Recovered state of %d partitions", part_cnt);
	free_buf(buffer);
	return EFAULT;
}
Esempio n. 27
0
int main (int argc, char *argv[])
{
	Buf buffer;
	uint16_t test16 = 1234, out16;
	uint32_t test32 = 5678, out32, byte_cnt;
	char testbytes[] = "TEST BYTES", *outbytes;
	char teststring[] = "TEST STRING",  *outstring = NULL;
	char *nullstr = NULL;
	char *data;
	int data_size;
	long double test_double = 1340664754944.2132312, test_double2;
	uint64_t test64;

	buffer = init_buf (0);
        pack16(test16, buffer);
        pack32(test32, buffer);
	pack64((uint64_t)test_double, buffer);

        packstr(testbytes, buffer);
        packstr(teststring, buffer);
	packstr(nullstr, buffer);

	packstr("literal", buffer);
	packstr("", buffer);

        data_size = get_buf_offset(buffer);
        printf("wrote %d bytes\n", data_size);

	/* Pull data off old buffer, destroy it, and create a new one */
	data = xfer_buf_data(buffer);
	buffer = create_buf(data, data_size);

        unpack16(&out16, buffer);
	TEST(out16 != test16, "un/pack16");

        unpack32(&out32, buffer);
	TEST(out32 != test32, "un/pack32");

  	unpack64(&test64, buffer);
	test_double2 = (long double)test64;
	TEST((uint64_t)test_double2 != (uint64_t)test_double, "un/pack double as a uint64");
	/* info("Original\t %Lf", test_double); */
	/* info("uint64\t %ld", test64); */
	/* info("converted LD\t %Lf", test_double2); */

	unpackstr_ptr(&outbytes, &byte_cnt, buffer);
	TEST( ( strcmp(testbytes, outbytes) != 0 ) , "un/packstr_ptr");

	unpackstr_xmalloc(&outstring, &byte_cnt, buffer);
	TEST(strcmp(teststring, outstring) != 0, "un/packstr_xmalloc");
	xfree(outstring);

	unpackstr_xmalloc(&nullstr, &byte_cnt, buffer);
	TEST(nullstr != NULL, "un/packstr of null string.");

	unpackstr_xmalloc(&outstring, &byte_cnt, buffer);
	TEST(strcmp("literal", outstring) != 0, 
			"un/packstr of string literal");
	xfree(outstring);

	unpackstr_xmalloc(&outstring, &byte_cnt, buffer);
	TEST(strcmp("", outstring) != 0, "un/packstr of string \"\" ");

	xfree(outstring);

	free_buf(buffer);
	totals();
	return failed;

}