Example #1
0
static void
_print_config(void)
{
	int days, hours, mins, secs;
	char name[128];

	gethostname_short(name, sizeof(name));
	printf("NodeName=%s ", name);

	get_cpuinfo(&conf->actual_cpus,
		    &conf->actual_boards,
	            &conf->actual_sockets,
	            &conf->actual_cores,
	            &conf->actual_threads,
	            &conf->block_map_size,
	            &conf->block_map, &conf->block_map_inv);
	printf("CPUs=%u Boards=%u Sockets=%u CoresPerSocket=%u "
	       "ThreadsPerCore=%u ",
	       conf->actual_cpus, conf->actual_boards, conf->actual_sockets,
	       conf->actual_cores, conf->actual_threads);

	get_memory(&conf->real_memory_size);
	get_tmp_disk(&conf->tmp_disk_space, "/tmp");
	printf("RealMemory=%u TmpDisk=%u\n",
	       conf->real_memory_size, conf->tmp_disk_space);

	get_up_time(&conf->up_time);
	secs  =  conf->up_time % 60;
	mins  = (conf->up_time / 60) % 60;
	hours = (conf->up_time / 3600) % 24;
	days  = (conf->up_time / 86400);
	printf("UpTime=%u-%2.2u:%2.2u:%2.2u\n", days, hours, mins, secs);
}
Example #2
0
void TimeOutTimer::OnTimer()
{
	int    timeout;
	double last;
	double curr = get_up_time();

	MutexLock lock(mutex_);

	map<uint64_t, weak_ptr<TermSession> >::iterator
		it = termsession_list_.begin();
	for (/* without initialize */; it != termsession_list_.end(); /* without condtion */) {
		shared_ptr<TermSession> sp_it(it->second.lock());
		if (sp_it) {
			{
				last = get_up_time();
				timeout = kDefaultTimeout;
				MutexLock lock(sp_it->termconn_mtx_);
				if (sp_it->term_conn_) {
					last    = sp_it->term_conn_->last_heartbeat_time_;
					timeout = sp_it->term_conn_->timeout_;
				}
			}
			if ((int)(curr - last) > timeout) {
				//超时处理...添加到工作队列里面?
				logger_.Warn("[超时计时器]删除超时的终端会话,会话id: 0x"SFMT64X"CAId: "SFMT64U, sp_it->Id(), sp_it->CAId());
				CASession *ca_session = sp_it->ca_session_;
				if (psm_ctx_->busi_pool_->DelTermSession(sp_it) > 0) {
					psm_ctx_->term_basic_func_svr_->NotifyAllTerminalStatusPChanged(ca_session, sp_it->Id());
				}
                MutexLock lock(sp_it->termconn_mtx_);
				if (sp_it->term_conn_) {
                    sp_it->term_conn_->SetDirty();
                }
				termsession_list_.erase(it++);
			} else {
				it++;
			}
		} else {
			//删除无效会话。
			termsession_list_.erase(it++);
		}
	}

	logger_.Trace("[超时计时器] 循环遍历处理时间%.3f", get_up_time() - curr);
}
Example #3
0
/* % gcc -DDEBUG_MODULE get_mach_stat.c -I../../.. -g -DUSE_CPU_SPEED	*/
int
main(int argc, char * argv[])
{
	int error_code;
	uint16_t sockets, cores, threads;
	uint16_t block_map_size;
	uint16_t *block_map, *block_map_inv;
	struct config_record this_node;
	char node_name[MAX_SLURM_NAME];
	float speed;
	uint16_t testnumproc = 0;
	uint32_t up_time = 0;
	int days, hours, mins, secs;
	char* _cpuinfo_path = "/proc/cpuinfo";

	if (argc > 1) {
	    	_cpuinfo_path = argv[1];
		testnumproc = 1024;	/* since may not match test host */
	}
	debug3("%s:", _cpuinfo_path);

	error_code = get_mach_name(node_name);
	if (error_code != 0)
		exit(1);    /* The show is all over without a node name */

	error_code += get_procs(&this_node.cpus);
	error_code += get_cpuinfo(MAX(this_node.cpus, testnumproc),
				  &this_node.sockets,
				  &this_node.cores,
				  &this_node.threads,
				  &block_map_size,
				  &block_map, &block_map_inv);
	xfree(block_map);	/* not used here */
	xfree(block_map_inv);	/* not used here */
	error_code += get_memory(&this_node.real_memory);
	error_code += get_tmp_disk(&this_node.tmp_disk, "/tmp");
	error_code += get_up_time(&up_time);
#ifdef USE_CPU_SPEED
	error_code += get_speed(&speed);
#endif

	debug3("");
	debug3("NodeName=%s CPUs=%u Sockets=%u Cores=%u Threads=%u",
		node_name, this_node.cpus,
		this_node.sockets, this_node.cores, this_node.threads);
	debug3("\tRealMemory=%u TmpDisk=%u Speed=%f",
		this_node.real_memory, this_node.tmp_disk, speed);
	secs  = up_time % 60;
	mins  = (up_time / 60) % 60;
	hours = (up_time / 3600) % 24;
	days  = (up_time / 86400);
	debug3("\tUpTime=%u=%u-%2.2u:%2.2u:%2.2u",
	       up_time, days, hours, mins, secs);
	if (error_code != 0)
		debug3("get_mach_stat error_code=%d encountered", error_code);
	exit (error_code);
}
Example #4
0
weak_ptr<TermSession> BusinessPool::GenTermSession(PtLoginRequest *msg, TermConnection *conn, LoginError *error)
{
    if (!started_) {
        logger_.Warn("PSM Business Pool have stoped, but generate terminal session"); 
        *error = LoginWorkPoolNotStarted;
        return weak_ptr<TermSession>();
    }

    shared_ptr<TermSession> sp_ts(new TermSession(logger_, msg, conn, error));  
    if (!sp_ts->valid()) {
        logger_.Warn("PSM Business Pool generate terminal session user cert failure");
        return weak_ptr<TermSession>();
    }

    // valid ok.

    // generate terminal session id.
    uint64_t ts_id = genTermSessionId(get_up_time(), global_cnt_++, psm_ctx_->ip_addr(), sp_ts->CAId());
    logger_.Trace("PSM Business Pool generate terminal session id: 0x" SFMT64X, ts_id);
    sp_ts->Id(ts_id);

    CASessionMgr *cs_mgr = pool_relation_ca_session_mgr_[getIdByTermSessionId(sp_ts->Id())];
    CASession *cs = cs_mgr->FindCASessionById(sp_ts->CAId());

    if (cs == NULL) {
        cs = cs_mgr->Create(sp_ts->CAId());
        cs_mgr->Attach(cs);
    }

    sp_ts->ca_session_ = cs;
    cs->Add(sp_ts);
    cs_mgr->AttachTermSessionInfo(sp_ts->Id(), cs);

    //设置Session描述符
    {
        ByteStream agreekey;
        for ( int i=0; i<8; i++ ) {
            agreekey.Add((uint8_t*)"\x0F\x0E\x0D\x0C\x0B\x0A\x09\x08\x07\x06\x05\x04\x03\x02\x01\x00",16);
        } 

        sp_ts->session_info_desc_.agreement_key_ = agreekey;
        sp_ts->session_info_desc_.session_id_    = sp_ts->Id();
        sp_ts->session_info_desc_.timeout_       = 15;
        sp_ts->session_info_desc_.interval_time_ = 10;
        sp_ts->session_info_desc_.valid_         = true;
    }

    //设置终端描述符
    sp_ts->terminal_info_desc_.session_id_ = sp_ts->Id();
    
    *error = LoginOK;
    return sp_ts;
}
Example #5
0
/*
 * Read the slurm configuration file (slurm.conf) and substitute some
 * values into the slurmd configuration in preference of the defaults.
 */
static void
_read_config(void)
{
	char *path_pubkey = NULL;
	slurm_ctl_conf_t *cf = NULL;
	uint16_t tmp16 = 0;

#ifndef HAVE_FRONT_END
	bool cr_flag = false, gang_flag = false;
#endif

	cf = slurm_conf_lock();

	slurm_mutex_lock(&conf->config_mutex);

	if (conf->conffile == NULL)
		conf->conffile = xstrdup(cf->slurm_conf);

	conf->slurm_user_id =  cf->slurm_user_id;

	conf->cr_type = cf->select_type_param;

	path_pubkey = xstrdup(cf->job_credential_public_certificate);

	if (!conf->logfile)
		conf->logfile = xstrdup(cf->slurmd_logfile);

#ifndef HAVE_FRONT_END
	if (!strcmp(cf->select_type, "select/cons_res"))
		cr_flag = true;
	if (cf->preempt_mode & PREEMPT_MODE_GANG)
		gang_flag = true;
#endif

	slurm_conf_unlock();
	/* node_name may already be set from a command line parameter */
	if (conf->node_name == NULL)
		conf->node_name = slurm_conf_get_nodename(conf->hostname);
	/* if we didn't match the form of the hostname already
	 * stored in conf->hostname, check to see if we match any
	 * valid aliases */
	if (conf->node_name == NULL)
		conf->node_name = slurm_conf_get_aliased_nodename();

	if (conf->node_name == NULL)
		conf->node_name = slurm_conf_get_nodename("localhost");

	if (conf->node_name == NULL)
		fatal("Unable to determine this slurmd's NodeName");

	_massage_pathname(&conf->logfile);

	/* set node_addr if relevant */
	if ((conf->node_addr == NULL) &&
	    (conf->node_addr = slurm_conf_get_nodeaddr(conf->hostname)) &&
	    (strcmp(conf->node_addr, conf->hostname) == 0)) {
		xfree(conf->node_addr);	/* Sets to NULL */
	}

	conf->port = slurm_conf_get_port(conf->node_name);
	slurm_conf_get_cpus_bsct(conf->node_name,
				 &conf->conf_cpus, &conf->conf_boards,
				 &conf->conf_sockets, &conf->conf_cores,
				 &conf->conf_threads);

	/* store hardware properties in slurmd_config */
	xfree(conf->block_map);
	xfree(conf->block_map_inv);

	_update_logging();
	_update_nice();

	get_cpuinfo(&conf->actual_cpus,
		    &conf->actual_boards,
	            &conf->actual_sockets,
	            &conf->actual_cores,
	            &conf->actual_threads,
	            &conf->block_map_size,
	            &conf->block_map, &conf->block_map_inv);
#ifdef HAVE_FRONT_END
	/*
	 * When running with multiple frontends, the slurmd S:C:T values are not
	 * relevant, hence ignored by both _register_front_ends (sets all to 1)
	 * and validate_nodes_via_front_end (uses slurm.conf values).
	 * Report actual hardware configuration, irrespective of FastSchedule.
	 */
	conf->cpus    = conf->actual_cpus;
	conf->boards  = conf->actual_boards;
	conf->sockets = conf->actual_sockets;
	conf->cores   = conf->actual_cores;
	conf->threads = conf->actual_threads;
#else
	/* If the actual resources on a node differ than what is in
	 * the configuration file and we are using
	 * cons_res or gang scheduling we have to use what is in the
	 * configuration file because the slurmctld creates bitmaps
	 * for scheduling before these nodes check in.
	 */
	if (((cf->fast_schedule == 0) && !cr_flag && !gang_flag) ||
	    ((cf->fast_schedule == 1) &&
	     (conf->actual_cpus < conf->conf_cpus))) {
		conf->cpus    = conf->actual_cpus;
		conf->boards  = conf->actual_boards;
		conf->sockets = conf->actual_sockets;
		conf->cores   = conf->actual_cores;
		conf->threads = conf->actual_threads;
	} else {
		conf->cpus    = conf->conf_cpus;
		conf->boards  = conf->conf_boards;
		conf->sockets = conf->conf_sockets;
		conf->cores   = conf->conf_cores;
		conf->threads = conf->conf_threads;
	}

	if ((conf->cpus    != conf->actual_cpus)    ||
	    (conf->sockets != conf->actual_sockets) ||
	    (conf->cores   != conf->actual_cores)   ||
	    (conf->threads != conf->actual_threads)) {
		if (cf->fast_schedule) {
			info("Node configuration differs from hardware: "
			     "CPUs=%u:%u(hw) Boards=%u:%u(hw) "
			     "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) "
			     "ThreadsPerCore=%u:%u(hw)",
			     conf->cpus,    conf->actual_cpus,
			     conf->boards,  conf->actual_boards,
			     conf->sockets, conf->actual_sockets,
			     conf->cores,   conf->actual_cores,
			     conf->threads, conf->actual_threads);
		} else if ((cf->fast_schedule == 0) && (cr_flag || gang_flag)) {
			error("You are using cons_res or gang scheduling with "
			      "Fastschedule=0 and node configuration differs "
			      "from hardware.  The node configuration used "
			      "will be what is in the slurm.conf because of "
			      "the bitmaps the slurmctld must create before "
			      "the slurmd registers.\n"
			      "   CPUs=%u:%u(hw) Boards=%u:%u(hw) "
			      "SocketsPerBoard=%u:%u(hw) CoresPerSocket=%u:%u(hw) "
			      "ThreadsPerCore=%u:%u(hw)",
			      conf->cpus,    conf->actual_cpus,
			      conf->boards,  conf->actual_boards,
			      conf->sockets, conf->actual_sockets,
			      conf->cores,   conf->actual_cores,
			      conf->threads, conf->actual_threads);
		}
	}
#endif

	get_memory(&conf->real_memory_size);
	get_up_time(&conf->up_time);

	cf = slurm_conf_lock();
	get_tmp_disk(&conf->tmp_disk_space, cf->tmp_fs);
	_free_and_set(&conf->epilog,   xstrdup(cf->epilog));
	_free_and_set(&conf->prolog,   xstrdup(cf->prolog));
	_free_and_set(&conf->tmpfs,    xstrdup(cf->tmp_fs));
	_free_and_set(&conf->health_check_program,
		      xstrdup(cf->health_check_program));
	_free_and_set(&conf->spooldir, xstrdup(cf->slurmd_spooldir));
	_massage_pathname(&conf->spooldir);
	_free_and_set(&conf->pidfile,  xstrdup(cf->slurmd_pidfile));
	_massage_pathname(&conf->pidfile);
	_free_and_set(&conf->select_type, xstrdup(cf->select_type));
	_free_and_set(&conf->task_prolog, xstrdup(cf->task_prolog));
	_free_and_set(&conf->task_epilog, xstrdup(cf->task_epilog));
	_free_and_set(&conf->pubkey,   path_pubkey);

	conf->debug_flags = cf->debug_flags;
	conf->propagate_prio = cf->propagate_prio_process;

	_free_and_set(&conf->job_acct_gather_freq,
		      xstrdup(cf->job_acct_gather_freq));

	conf->acct_freq_task = (uint16_t)NO_VAL;
	tmp16 = acct_gather_parse_freq(PROFILE_TASK,
				       conf->job_acct_gather_freq);
	if (tmp16 != -1)
		conf->acct_freq_task = tmp16;

	_free_and_set(&conf->acct_gather_energy_type,
		      xstrdup(cf->acct_gather_energy_type));
	_free_and_set(&conf->acct_gather_filesystem_type,
		      xstrdup(cf->acct_gather_filesystem_type));
	_free_and_set(&conf->acct_gather_infiniband_type,
		      xstrdup(cf->acct_gather_infiniband_type));
	_free_and_set(&conf->acct_gather_profile_type,
		      xstrdup(cf->acct_gather_profile_type));
	_free_and_set(&conf->job_acct_gather_type,
		      xstrdup(cf->job_acct_gather_type));

	if ( (conf->node_name == NULL) ||
	     (conf->node_name[0] == '\0') )
		fatal("Node name lookup failure");

	if (cf->control_addr == NULL)
		fatal("Unable to establish controller machine");
	if (cf->slurmctld_port == 0)
		fatal("Unable to establish controller port");
	conf->slurmd_timeout = cf->slurmd_timeout;
	conf->use_pam = cf->use_pam;
	conf->task_plugin_param = cf->task_plugin_param;

	slurm_mutex_unlock(&conf->config_mutex);
	slurm_conf_unlock();
}
Example #6
0
static void
_fill_registration_msg(slurm_node_registration_status_msg_t *msg)
{
	List steps;
	ListIterator i;
	step_loc_t *stepd;
	int  n;
	char *arch, *os;
	struct utsname buf;
	static bool first_msg = true;
	static time_t slurmd_start_time = 0;
	Buf gres_info;

	msg->node_name   = xstrdup (conf->node_name);
	msg->cpus	 = conf->cpus;
	msg->boards	 = conf->boards;
	msg->sockets	 = conf->sockets;
	msg->cores	 = conf->cores;
	msg->threads	 = conf->threads;
	msg->real_memory = conf->real_memory_size;
	msg->tmp_disk    = conf->tmp_disk_space;
	msg->hash_val    = slurm_get_hash_val();
	get_cpu_load(&msg->cpu_load);

	gres_info = init_buf(1024);
	if (gres_plugin_node_config_pack(gres_info) != SLURM_SUCCESS)
		error("error packing gres configuration");
	else
		msg->gres_info   = gres_info;

	get_up_time(&conf->up_time);
	msg->up_time     = conf->up_time;
	if (slurmd_start_time == 0)
		slurmd_start_time = time(NULL);
	msg->slurmd_start_time = slurmd_start_time;

	if (first_msg) {
		first_msg = false;
		info("CPUs=%u Boards=%u Sockets=%u Cores=%u Threads=%u "
		     "Memory=%u TmpDisk=%u Uptime=%u",
		     msg->cpus, msg->boards, msg->sockets, msg->cores,
		     msg->threads, msg->real_memory, msg->tmp_disk,
		     msg->up_time);
	} else {
		debug3("CPUs=%u Boards=%u Sockets=%u Cores=%u Threads=%u "
		       "Memory=%u TmpDisk=%u Uptime=%u",
		       msg->cpus, msg->boards, msg->sockets, msg->cores,
		       msg->threads, msg->real_memory, msg->tmp_disk,
		       msg->up_time);
	}
	uname(&buf);
	if ((arch = getenv("SLURM_ARCH")))
		msg->arch = xstrdup(arch);
	else
		msg->arch = xstrdup(buf.machine);
	if ((os = getenv("SLURM_OS")))
		msg->os   = xstrdup(os);
	else
		msg->os = xstrdup(buf.sysname);

	if (msg->startup) {
		if (switch_g_alloc_node_info(&msg->switch_nodeinfo))
			error("switch_g_alloc_node_info: %m");
		if (switch_g_build_node_info(msg->switch_nodeinfo))
			error("switch_g_build_node_info: %m");
	}

	steps = stepd_available(conf->spooldir, conf->node_name);
	msg->job_count = list_count(steps);
	msg->job_id    = xmalloc(msg->job_count * sizeof(*msg->job_id));
	/* Note: Running batch jobs will have step_id == NO_VAL */
	msg->step_id   = xmalloc(msg->job_count * sizeof(*msg->step_id));

	i = list_iterator_create(steps);
	n = 0;
	while ((stepd = list_next(i))) {
		int fd;
		fd = stepd_connect(stepd->directory, stepd->nodename,
				   stepd->jobid, stepd->stepid);
		if (fd == -1) {
			--(msg->job_count);
			continue;
		}
		if (stepd_state(fd) == SLURMSTEPD_NOT_RUNNING) {
			debug("stale domain socket for stepd %u.%u ",
			      stepd->jobid, stepd->stepid);
			--(msg->job_count);
			close(fd);
			continue;
		}

		close(fd);
		if (stepd->stepid == NO_VAL)
			debug("found apparently running job %u", stepd->jobid);
		else
			debug("found apparently running step %u.%u",
			      stepd->jobid, stepd->stepid);
		msg->job_id[n]  = stepd->jobid;
		msg->step_id[n] = stepd->stepid;
		n++;
	}
	list_iterator_destroy(i);
	list_destroy(steps);

	if (!msg->energy)
		msg->energy = acct_gather_energy_alloc();
	acct_gather_energy_g_get_data(ENERGY_DATA_STRUCT, msg->energy);

	msg->timestamp = time(NULL);

	return;
}
Example #7
0
int SMSvcApplyWork::SendResponed( unsigned int ret_code )
{
    // 参数错误,直接给终端应答
    PbSvcApplyResponse svcapply_response(ret_code, 0);
    if ( this->pkg_->sequence_no_desc_.valid_ )   svcapply_response.Add(this->pkg_->sequence_no_desc_);
    if ( this->pkg_->test_data_desc_.valid_ )
    {
        PB_TestDataDescriptor test_data_desc(this->pkg_->test_data_desc_.request_str_, to_string("%.3lf",get_up_time()));
        svcapply_response.Add(test_data_desc);
    }

    ByteStream response_pkg = svcapply_response.Serialize();
    if ( this->conn_->Write(response_pkg.GetBuffer(), response_pkg.Size()) )
    {
        //TODO:暂时默认发送成功
        return 0;
    }

    return -1;
}