示例#1
0
extern int as_mysql_nonhour_rollup(mysql_conn_t *mysql_conn,
				   bool run_month,
				   char *cluster_name,
				   time_t start, time_t end,
				   uint16_t archive_data)
{
	/* can't just add 86400 since daylight savings starts and ends every
	 * once in a while
	 */
	int rc = SLURM_SUCCESS;
	struct tm start_tm;
	time_t curr_start = start;
	time_t curr_end;
	time_t now = time(NULL);
	char *query = NULL;
	uint16_t track_wckey = slurm_get_track_wckey();
	char *unit_name;

	if (!slurm_localtime_r(&curr_start, &start_tm)) {
		error("Couldn't get localtime from start %ld", curr_start);
		return SLURM_ERROR;
	}

	start_tm.tm_sec = 0;
	start_tm.tm_min = 0;
	start_tm.tm_hour = 0;
	start_tm.tm_isdst = -1;

	if (run_month) {
		unit_name = "month";
		start_tm.tm_mday = 1;
		start_tm.tm_mon++;
	} else {
		unit_name = "day";
		start_tm.tm_mday++;
	}

	curr_end = slurm_mktime(&start_tm);

	while (curr_start < end) {
		if (debug_flags & DEBUG_FLAG_DB_USAGE)
			DB_DEBUG(mysql_conn->conn,
				 "curr %s is now %ld-%ld",
				 unit_name, curr_start, curr_end);
/* 		info("start %s", slurm_ctime2(&curr_start)); */
/* 		info("end %s", slurm_ctime2(&curr_end)); */
		query = xstrdup_printf(
			"insert into \"%s_%s\" (creation_time, mod_time, id, "
			"id_tres, time_start, alloc_secs) "
			"select %ld, %ld, id, id_tres, "
			"%ld, @ASUM:=SUM(alloc_secs) from \"%s_%s\" where "
			"(time_start < %ld && time_start >= %ld) "
			"group by id, id_tres on duplicate key update "
			"mod_time=%ld, alloc_secs=@ASUM;",
			cluster_name,
			run_month ? assoc_month_table : assoc_day_table,
			now, now, curr_start,
			cluster_name,
			run_month ? assoc_day_table : assoc_hour_table,
			curr_end, curr_start, now);
		/* We group on deleted here so if there are no entries
		   we don't get an error, just nothing is returned.
		   Else we get a bunch of NULL's
		*/
		xstrfmtcat(query,
			   "insert into \"%s_%s\" (creation_time, "
			   "mod_time, time_start, id_tres, count, "
			   "alloc_secs, down_secs, pdown_secs, "
			   "idle_secs, over_secs, resv_secs) "
			   "select %ld, %ld, "
			   "%ld, id_tres, @CPU:=MAX(count), "
			   "@ASUM:=SUM(alloc_secs), "
			   "@DSUM:=SUM(down_secs), "
			   "@PDSUM:=SUM(pdown_secs), "
			   "@ISUM:=SUM(idle_secs), "
			   "@OSUM:=SUM(over_secs), "
			   "@RSUM:=SUM(resv_secs) from \"%s_%s\" where "
			   "(time_start < %ld && time_start >= %ld) "
			   "group by deleted, id_tres "
			   "on duplicate key update "
			   "mod_time=%ld, count=@CPU, "
			   "alloc_secs=@ASUM, down_secs=@DSUM, "
			   "pdown_secs=@PDSUM, idle_secs=@ISUM, "
			   "over_secs=@OSUM, resv_secs=@RSUM;",
			   cluster_name,
			   run_month ? cluster_month_table : cluster_day_table,
			   now, now, curr_start,
			   cluster_name,
			   run_month ? cluster_day_table : cluster_hour_table,
			   curr_end, curr_start, now);
		if (track_wckey) {
			xstrfmtcat(query,
				   "insert into \"%s_%s\" (creation_time, "
				   "mod_time, id, id_tres, time_start, "
				   "alloc_secs) "
				   "select %ld, %ld, "
				   "id, id_tres, %ld, @ASUM:=SUM(alloc_secs) "
				   "from \"%s_%s\" where (time_start < %ld && "
				   "time_start >= %ld) group by id, id_tres "
				   "on duplicate key update "
				   "mod_time=%ld, alloc_secs=@ASUM;",
				   cluster_name,
				   run_month ? wckey_month_table :
				   wckey_day_table,
				   now, now, curr_start,
				   cluster_name,
				   run_month ? wckey_day_table :
				   wckey_hour_table,
				   curr_end, curr_start, now);
		}
		if (debug_flags & DEBUG_FLAG_DB_USAGE)
			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
		rc = mysql_db_query(mysql_conn, query);
		xfree(query);
		if (rc != SLURM_SUCCESS) {
			error("Couldn't add %s rollup", unit_name);
			return SLURM_ERROR;
		}

		curr_start = curr_end;
		if (!slurm_localtime_r(&curr_start, &start_tm)) {
			error("Couldn't get localtime from %s start %ld",
			      unit_name, curr_start);
			return SLURM_ERROR;
		}
		start_tm.tm_sec = 0;
		start_tm.tm_min = 0;
		start_tm.tm_hour = 0;
		start_tm.tm_mday++;
		start_tm.tm_isdst = -1;
		curr_end = slurm_mktime(&start_tm);
	}

/* 	info("stop start %s", slurm_ctime2(&curr_start)); */
/* 	info("stop end %s", slurm_ctime2(&curr_end)); */

	/* go check to see if we archive and purge */
	rc = _process_purge(mysql_conn, cluster_name, archive_data,
			    run_month ? SLURMDB_PURGE_MONTHS :
			    SLURMDB_PURGE_DAYS);
	return rc;
}
示例#2
0
/*
 * slurm_sprint_partition_info - output information about a specific Slurm
 *	partition based upon message as loaded using slurm_load_partitions
 * IN part_ptr - an individual partition information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *slurm_sprint_partition_info ( partition_info_t * part_ptr,
				    int one_liner )
{
	char tmp[16];
	char *out = NULL;
	char *allow_deny, *value;
	uint16_t force, preempt_mode, val;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	char *line_end = (one_liner) ? " " : "\n   ";

	/****** Line 1 ******/

	xstrfmtcat(out, "PartitionName=%s", part_ptr->name);
	xstrcat(out, line_end);

	/****** Line 2 ******/

	if ((part_ptr->allow_groups == NULL) ||
	    (part_ptr->allow_groups[0] == '\0'))
		xstrcat(out, "AllowGroups=ALL");
	else {
		xstrfmtcat(out, "AllowGroups=%s", part_ptr->allow_groups);
	}

	if (part_ptr->allow_accounts || !part_ptr->deny_accounts) {
		allow_deny = "Allow";
		if ((part_ptr->allow_accounts == NULL) ||
		    (part_ptr->allow_accounts[0] == '\0'))
			value = "ALL";
		else
			value = part_ptr->allow_accounts;
	} else {
		allow_deny = "Deny";
		value = part_ptr->deny_accounts;
	}
	xstrfmtcat(out, " %sAccounts=%s", allow_deny, value);

	if (part_ptr->allow_qos || !part_ptr->deny_qos) {
		allow_deny = "Allow";
		if ((part_ptr->allow_qos == NULL) ||
		    (part_ptr->allow_qos[0] == '\0'))
			value = "ALL";
		else
			value = part_ptr->allow_qos;
	} else {
		allow_deny = "Deny";
		value = part_ptr->deny_qos;
	}
	xstrfmtcat(out, " %sQos=%s", allow_deny, value);
	xstrcat(out, line_end);

	/****** Line 3 ******/
	if (part_ptr->allow_alloc_nodes == NULL)
		xstrcat(out, "AllocNodes=ALL");
	else
		xstrfmtcat(out, "AllocNodes=%s", part_ptr->allow_alloc_nodes);

	if (part_ptr->alternate != NULL) {
		xstrfmtcat(out, " Alternate=%s", part_ptr->alternate);
	}

	if (part_ptr->flags & PART_FLAG_DEFAULT)
		xstrcat(out, " Default=YES");
	else
		xstrcat(out, " Default=NO");

	if (part_ptr->qos_char)
		xstrfmtcat(out, " QoS=%s", part_ptr->qos_char);
	else
		xstrcat(out, " QoS=N/A");

	xstrcat(out, line_end);

	/****** Line 4 added here for BG partitions only
	 ****** to maintain alphabetized output ******/

	if (cluster_flags & CLUSTER_FLAG_BG) {
		xstrfmtcat(out, "Midplanes=%s", part_ptr->nodes);
		xstrcat(out, line_end);
	}

	/****** Line 5 ******/

	if (part_ptr->default_time == INFINITE)
		xstrcat(out, "DefaultTime=UNLIMITED");
	else if (part_ptr->default_time == NO_VAL)
		xstrcat(out, "DefaultTime=NONE");
	else {
		char time_line[32];
		secs2time_str(part_ptr->default_time * 60, time_line,
			sizeof(time_line));
		xstrfmtcat(out, "DefaultTime=%s", time_line);
	}

	if (part_ptr->flags & PART_FLAG_NO_ROOT)
		xstrcat(out, " DisableRootJobs=YES");
	else
		xstrcat(out, " DisableRootJobs=NO");

	if (part_ptr->flags & PART_FLAG_EXCLUSIVE_USER)
		xstrcat(out, " ExclusiveUser=YES");
	else
		xstrcat(out, " ExclusiveUser=NO");

	xstrfmtcat(out, " GraceTime=%u", part_ptr->grace_time);

	if (part_ptr->flags & PART_FLAG_HIDDEN)
		xstrcat(out, " Hidden=YES");
	else
		xstrcat(out, " Hidden=NO");

	xstrcat(out, line_end);

	/****** Line 6 ******/

	if (part_ptr->max_nodes == INFINITE)
		xstrcat(out, "MaxNodes=UNLIMITED");
	else {
		if (cluster_flags & CLUSTER_FLAG_BG) {
			convert_num_unit((float)part_ptr->max_nodes,
					 tmp, sizeof(tmp), UNIT_NONE,
					 CONVERT_NUM_UNIT_EXACT);
			xstrfmtcat(out, "MaxNodes=%s", tmp);
		} else
			xstrfmtcat(out, "MaxNodes=%u", part_ptr->max_nodes);

	}

	if (part_ptr->max_time == INFINITE)
		xstrcat(out, " MaxTime=UNLIMITED");
	else {
		char time_line[32];
		secs2time_str(part_ptr->max_time * 60, time_line,
			      sizeof(time_line));
		xstrfmtcat(out, " MaxTime=%s", time_line);
	}

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)part_ptr->min_nodes, tmp, sizeof(tmp),
				 UNIT_NONE,
				 CONVERT_NUM_UNIT_EXACT);
		xstrfmtcat(out, " MinNodes=%s", tmp);
	} else
		xstrfmtcat(out, " MinNodes=%u", part_ptr->min_nodes);

	if (part_ptr->flags & PART_FLAG_LLN)
		xstrcat(out, " LLN=YES");
	else
		xstrcat(out, " LLN=NO");

	if (part_ptr->max_cpus_per_node == INFINITE)
		xstrcat(out, " MaxCPUsPerNode=UNLIMITED");
	else {
		xstrfmtcat(out, " MaxCPUsPerNode=%u",
			   part_ptr->max_cpus_per_node);
	}

	xstrcat(out, line_end);

	/****** Line added here for non BG nodes
	 to keep with alphabetized output******/

	if (!(cluster_flags & CLUSTER_FLAG_BG)) {
		xstrfmtcat(out, "Nodes=%s", part_ptr->nodes);
		xstrcat(out, line_end);
	}

	/****** Line 7 ******/

	xstrfmtcat(out, "PriorityJobFactor=%u", part_ptr->priority_job_factor);
	xstrfmtcat(out, " PriorityTier=%u", part_ptr->priority_tier);

	if (part_ptr->flags & PART_FLAG_ROOT_ONLY)
		xstrcat(out, " RootOnly=YES");
	else
		xstrcat(out, " RootOnly=NO");

	if (part_ptr->flags & PART_FLAG_REQ_RESV)
		xstrcat(out, " ReqResv=YES");
	else
		xstrcat(out, " ReqResv=NO");

	force = part_ptr->max_share & SHARED_FORCE;
	val = part_ptr->max_share & (~SHARED_FORCE);
	if (val == 0)
		xstrcat(out, " OverSubscribe=EXCLUSIVE");
	else if (force)
		xstrfmtcat(out, " OverSubscribe=FORCE:%u", val);
	else if (val == 1)
		xstrcat(out, " OverSubscribe=NO");
	else
		xstrfmtcat(out, " OverSubscribe=YES:%u", val);

	preempt_mode = part_ptr->preempt_mode;
	if (preempt_mode == (uint16_t) NO_VAL)
		preempt_mode = slurm_get_preempt_mode(); /* use cluster param */
	xstrfmtcat(out, " PreemptMode=%s", preempt_mode_string(preempt_mode));

	xstrcat(out, line_end);

	/****** Line 8 ******/

	if (part_ptr->state_up == PARTITION_UP)
		xstrcat(out, "State=UP");
	else if (part_ptr->state_up == PARTITION_DOWN)
		xstrcat(out, "State=DOWN");
	else if (part_ptr->state_up == PARTITION_INACTIVE)
		xstrcat(out, "State=INACTIVE");
	else if (part_ptr->state_up == PARTITION_DRAIN)
		xstrcat(out, "State=DRAIN");
	else
		xstrcat(out, "State=UNKNOWN");

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)part_ptr->total_cpus, tmp,
				 sizeof(tmp), UNIT_NONE,
				 CONVERT_NUM_UNIT_EXACT);
		xstrfmtcat(out, " TotalCPUs=%s", tmp);
	} else
		xstrfmtcat(out, " TotalCPUs=%u", part_ptr->total_cpus);


	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)part_ptr->total_nodes, tmp,
				 sizeof(tmp), UNIT_NONE,
				 CONVERT_NUM_UNIT_EXACT);
		xstrfmtcat(out, " TotalNodes=%s", tmp);
	} else
		xstrfmtcat(out, " TotalNodes=%u", part_ptr->total_nodes);

	xstrfmtcat(out, " SelectTypeParameters=%s",
		   select_type_param_string(part_ptr->cr_type));

	xstrcat(out, line_end);

	/****** Line 9 ******/
	if (part_ptr->def_mem_per_cpu & MEM_PER_CPU) {
		if (part_ptr->def_mem_per_cpu == MEM_PER_CPU) {
			xstrcat(out, "DefMemPerCPU=UNLIMITED");
		} else {
			xstrfmtcat(out, "DefMemPerCPU=%u",
				   part_ptr->def_mem_per_cpu & (~MEM_PER_CPU));
		}
	} else if (part_ptr->def_mem_per_cpu == 0) {
		xstrcat(out, "DefMemPerNode=UNLIMITED");
	} else {
		xstrfmtcat(out, "DefMemPerNode=%u", part_ptr->def_mem_per_cpu);
	}

	if (part_ptr->max_mem_per_cpu & MEM_PER_CPU) {
		if (part_ptr->max_mem_per_cpu == MEM_PER_CPU) {
			xstrcat(out, " MaxMemPerCPU=UNLIMITED");
		} else {
			xstrfmtcat(out, " MaxMemPerCPU=%u",
				   part_ptr->max_mem_per_cpu & (~MEM_PER_CPU));
		}
	} else if (part_ptr->max_mem_per_cpu == 0) {
		xstrcat(out, " MaxMemPerNode=UNLIMITED");
	} else {
		xstrfmtcat(out, " MaxMemPerNode=%u", part_ptr->max_mem_per_cpu);
	}

	/****** Line 10 ******/
	if (part_ptr->billing_weights_str) {
		xstrcat(out, line_end);

		xstrfmtcat(out, "TRESBillingWeights=%s",
			   part_ptr->billing_weights_str);
	}

	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
示例#3
0
static int _save_allocation(char *com, List allocated_blocks)
{
	int len = strlen(com);
	int i=5, j=0;
	allocated_block_t *allocated_block = NULL;
	char filename[50];
	char *save_string = NULL;
	FILE *file_ptr = NULL;
	char *extra = NULL;

	ListIterator results_i;

	memset(filename, 0, 50);
	if (len > 5)
		while (i<len) {

			while (com[i-1]!=' ' && i<len) {
				i++;
			}
			while (i<len && com[i]!=' ') {
				filename[j] = com[i];
				i++;
				j++;
			}
		}
	if (filename[0]=='\0') {
		time_t now_time = time(NULL);
		sprintf(filename,"bluegene.conf.%ld",
			(long int) now_time);
	}

	file_ptr = fopen(filename,"w");

	if (file_ptr!=NULL) {
		char *image_dir = NULL;

		xstrcat(save_string,
			"#\n# bluegene.conf file generated by smap\n");
		xstrcat(save_string,
			"# See the bluegene.conf man page for "
			"more information\n");
		xstrcat(save_string, "#\n");
#ifdef HAVE_BGL
		image_dir = "/bgl/BlueLight/ppcfloor/bglsys/bin";
		xstrfmtcat(save_string, "BlrtsImage=%s/rts_hw.rts\n",
			   image_dir);
		xstrfmtcat(save_string, "LinuxImage=%s/zImage.elf\n",
			   image_dir);
		xstrfmtcat(save_string,
			   "MloaderImage=%s/mmcs-mloader.rts\n",
			   image_dir);
		xstrfmtcat(save_string,
			   "RamDiskImage=%s/ramdisk.elf\n",
			   image_dir);

		xstrcat(save_string, "IONodesPerMP=8 # io poor\n");
		xstrcat(save_string, "# IONodesPerMP=64 # io rich\n");
#elif defined HAVE_BGP
		image_dir = "/bgsys/drivers/ppcfloor/boot";
		xstrfmtcat(save_string, "CnloadImage=%s/cns,%s/cnk\n",
			   image_dir, image_dir);
		xstrfmtcat(save_string, "MloaderImage=%s/uloader\n",
			   image_dir);
		xstrfmtcat(save_string,
			   "IoloadImage=%s/cns,%s/linux,%s/ramdisk\n",
			   image_dir, image_dir, image_dir);
		xstrcat(save_string, "IONodesPerMP=4 # io poor\n");
		xstrcat(save_string, "# IONodesPerMP=32 # io rich\n");
#else
		image_dir = "/bgsys/drivers/ppcfloor/boot";
		xstrfmtcat(save_string, "MloaderImage=%s/firmware\n",
			   image_dir);
		xstrcat(save_string, "IONodesPerMP=4 # io semi-poor\n");
		xstrcat(save_string, "# IONodesPerMP=16 # io rich\n");
#endif

		xstrcat(save_string, "BridgeAPILogFile="
		       "/var/log/slurm/bridgeapi.log\n");

		xstrcat(save_string, "BridgeAPIVerbose=2\n");

		xstrfmtcat(save_string, "MidPlaneNodeCnt=%d\n",
			   midplane_cnode_cnt);
		xstrfmtcat(save_string, "NodeCardNodeCnt=%d\n",
			   nodecard_node_cnt);
		if (!list_count(allocated_blocks))
			xstrcat(save_string, "LayoutMode=DYNAMIC\n");
		else {
			xstrfmtcat(save_string, "LayoutMode=%s\n", layout_mode);
			xstrfmtcat(save_string, "#\n# Block Layout\n#\n");
		}
		results_i = list_iterator_create(allocated_blocks);
		while ((allocated_block = list_next(results_i)) != NULL) {
			select_ba_request_t *request = allocated_block->request;

			if (request->small16 || request->small32
			    || request->small64 || request->small128
			    || request->small256) {
#ifdef HAVE_BGL
				xstrfmtcat(extra,
					   " 32CNBlocks=%d "
					   "128CNBlocks=%d",
					   request->small32,
					   request->small128);
#elif defined HAVE_BGP
				xstrfmtcat(extra,
					   " 16CNBlocks=%d "
					   "32CNBlocks=%d "
					   "64CNBlocks=%d "
					   "128CNBlocks=%d "
					   "256CNBlocks=%d",
					   request->small16,
					   request->small32,
					   request->small64,
					   request->small128,
					   request->small256);
#else
				xstrfmtcat(extra,
					   " 32CNBlocks=%d "
					   "64CNBlocks=%d "
					   "128CNBlocks=%d "
					   "256CNBlocks=%d",
					   request->small32,
					   request->small64,
					   request->small128,
					   request->small256);
#endif
			}

			xstrfmtcat(save_string, "MPs=%s", request->save_name);

			for (i=0; i<SYSTEM_DIMENSIONS; i++) {
				if (request->conn_type[i] == (uint16_t)NO_VAL)
					break;
				if (i)
					xstrcat(save_string, ",");
				else
					xstrcat(save_string, " Type=");
				xstrfmtcat(save_string, "%s", conn_type_string(
						   request->conn_type[i]));
#ifdef HAVE_BG_L_P
				break;
#endif
			}

			if (extra) {
				xstrfmtcat(save_string, "%s\n", extra);
				xfree(extra);
			} else
				xstrcat(save_string, "\n");

		}
		list_iterator_destroy(results_i);
		fputs(save_string, file_ptr);
		xfree(save_string);
		fclose (file_ptr);
	}

	return 1;
}
示例#4
0
extern int slurm_jobcomp_log_record(struct job_record *job_ptr)
{
	char usr_str[32], grp_str[32], start_str[32], end_str[32], time_str[32];
	char *json_str = NULL, *script_str = NULL, *state_string = NULL;
	char *exit_code_str = NULL, *derived_ec_str = NULL, *script = NULL;
	enum job_states job_state;
	int i, tmp_int, tmp_int2;
	time_t elapsed_time;
	uint32_t time_limit;
	uint16_t ntasks_per_node;
	struct job_node *jnode;

	if (list_count(jobslist) > MAX_JOBS) {
		error("%s: Limit of %d enqueued jobs in memory waiting to be indexed reached. Job %lu discarded",
		      plugin_type, MAX_JOBS, (unsigned long)job_ptr->job_id);
		return SLURM_ERROR;
	}

	_get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str));
	_get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str));

	if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr)
		time_limit = job_ptr->part_ptr->max_time;
	else
		time_limit = job_ptr->time_limit;

	if (job_ptr->job_state & JOB_RESIZING) {
		time_t now = time(NULL);
		state_string = job_state_string(job_ptr->job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&now, end_str, sizeof(end_str));
	} else {
		/* Job state will typically have JOB_COMPLETING or JOB_RESIZING
		 * flag set when called. We remove the flags to get the eventual
		 * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */
		job_state = job_ptr->job_state & JOB_STATE_BASE;
		state_string = job_state_string(job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else if (job_ptr->start_time > job_ptr->end_time) {
			/* Job cancelled while pending and
			 * expected start time is in the future. */
			snprintf(start_str, sizeof(start_str), "Unknown");
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&job_ptr->end_time, end_str, sizeof(end_str));
	}

	elapsed_time = job_ptr->end_time - job_ptr->start_time;

	tmp_int = tmp_int2 = 0;
	if (job_ptr->derived_ec == NO_VAL)
		;
	else if (WIFSIGNALED(job_ptr->derived_ec))
		tmp_int2 = WTERMSIG(job_ptr->derived_ec);
	else if (WIFEXITED(job_ptr->derived_ec))
		tmp_int = WEXITSTATUS(job_ptr->derived_ec);
	xstrfmtcat(derived_ec_str, "%d:%d", tmp_int, tmp_int2);

	tmp_int = tmp_int2 = 0;
	if (job_ptr->exit_code == NO_VAL)
		;
	else if (WIFSIGNALED(job_ptr->exit_code))
		tmp_int2 = WTERMSIG(job_ptr->exit_code);
	else if (WIFEXITED(job_ptr->exit_code))
		tmp_int = WEXITSTATUS(job_ptr->exit_code);
	xstrfmtcat(exit_code_str, "%d:%d", tmp_int, tmp_int2);

	json_str = xstrdup_printf(JOBCOMP_DATA_FORMAT,
				job_ptr->job_id, usr_str,
				job_ptr->user_id, grp_str,
				job_ptr->group_id, start_str,
				end_str, elapsed_time,
				job_ptr->partition, job_ptr->alloc_node,
				job_ptr->nodes, job_ptr->total_cpus,
				job_ptr->total_nodes,
				derived_ec_str,
				exit_code_str, state_string,
				((float) elapsed_time *
				 (float) job_ptr->total_cpus) /
				 (float) 3600);

	if (job_ptr->array_task_id != NO_VAL) {
		xstrfmtcat(json_str, ",\"array_job_id\":%lu",
			   (unsigned long) job_ptr->array_job_id);
		xstrfmtcat(json_str, ",\"array_task_id\":%lu",
			   (unsigned long) job_ptr->array_task_id);
	}

	if (job_ptr->pack_job_id != NO_VAL) {
		xstrfmtcat(json_str, ",\"pack_job_id\":%lu",
			   (unsigned long) job_ptr->pack_job_id);
		xstrfmtcat(json_str, ",\"pack_job_offset\":%lu",
			   (unsigned long) job_ptr->pack_job_offset);
	}

	if (job_ptr->details && job_ptr->details->submit_time) {
		_make_time_str(&job_ptr->details->submit_time,
			       time_str, sizeof(time_str));
		xstrfmtcat(json_str, ",\"@submit\":\"%s\"", time_str);
	}

	if (job_ptr->details && job_ptr->details->begin_time) {
		_make_time_str(&job_ptr->details->begin_time,
			       time_str, sizeof(time_str));
		xstrfmtcat(json_str, ",\"@eligible\":\"%s\"", time_str);
		if (job_ptr->start_time) {
			int64_t queue_wait = (int64_t)difftime(
				job_ptr->start_time,
				job_ptr->details->begin_time);
			xstrfmtcat(json_str, ",\"queue_wait\":%"PRIi64,
				   queue_wait);
		}
	}

	if (job_ptr->details
	    && (job_ptr->details->work_dir && job_ptr->details->work_dir[0])) {
		xstrfmtcat(json_str, ",\"work_dir\":\"%s\"",
			   job_ptr->details->work_dir);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_err && job_ptr->details->std_err[0])) {
		xstrfmtcat(json_str, ",\"std_err\":\"%s\"",
			   job_ptr->details->std_err);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_in && job_ptr->details->std_in[0])) {
		xstrfmtcat(json_str, ",\"std_in\":\"%s\"",
			   job_ptr->details->std_in);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_out && job_ptr->details->std_out[0])) {
		xstrfmtcat(json_str, ",\"std_out\":\"%s\"",
			   job_ptr->details->std_out);
	}

	if (job_ptr->assoc_ptr != NULL) {
		xstrfmtcat(json_str, ",\"cluster\":\"%s\"",
			   job_ptr->assoc_ptr->cluster);
	}

	if (job_ptr->qos_ptr != NULL) {
		xstrfmtcat(json_str, ",\"qos\":\"%s\"", job_ptr->qos_ptr->name);
	}

	if (job_ptr->details && (job_ptr->details->num_tasks != NO_VAL)) {
		xstrfmtcat(json_str, ",\"ntasks\":%u",
			   job_ptr->details->num_tasks);
	}

	if (job_ptr->details
	    && (job_ptr->details->ntasks_per_node != NO_VAL16)) {
		ntasks_per_node = job_ptr->details->ntasks_per_node;
		xstrfmtcat(json_str, ",\"ntasks_per_node\":%hu",
			   ntasks_per_node);
	}

	if (job_ptr->details
	    && (job_ptr->details->cpus_per_task != NO_VAL16)) {
		xstrfmtcat(json_str, ",\"cpus_per_task\":%hu",
			   job_ptr->details->cpus_per_task);
	}

	if (job_ptr->details
	    && (job_ptr->details->orig_dependency
		&& job_ptr->details->orig_dependency[0])) {
		xstrfmtcat(json_str, ",\"orig_dependency\":\"%s\"",
			   job_ptr->details->orig_dependency);
	}

	if (job_ptr->details
	    && (job_ptr->details->exc_nodes
		&& job_ptr->details->exc_nodes[0])) {
		xstrfmtcat(json_str, ",\"excluded_nodes\":\"%s\"",
			   job_ptr->details->exc_nodes);
	}

	if (time_limit != INFINITE) {
		xstrfmtcat(json_str, ",\"time_limit\":%lu",
			   (unsigned long) time_limit * 60);
	}

	if (job_ptr->name && job_ptr->name[0])
		xstrfmtcat(json_str, ",\"job_name\":\"%s\"", job_ptr->name);

	if (job_ptr->resv_name && job_ptr->resv_name[0]) {
		xstrfmtcat(json_str, ",\"reservation_name\":\"%s\"",
			   job_ptr->resv_name);
	}

	if (job_ptr->wckey && job_ptr->wckey[0])
		xstrfmtcat(json_str, ",\"wc_key\":\"%s\"", job_ptr->wckey);

	if (job_ptr->gres_req && job_ptr->gres_req[0]) {
		xstrfmtcat(json_str, ",\"gres_req\":\"%s\"", job_ptr->gres_req);
	}

	if (job_ptr->gres_alloc && job_ptr->gres_alloc[0]) {
		xstrfmtcat(json_str, ",\"gres_alloc\":\"%s\"",
			   job_ptr->gres_alloc);
	}

	if (job_ptr->account && job_ptr->account[0]) {
		xstrfmtcat(json_str, ",\"account\":\"%s\"", job_ptr->account);
	}

	script = get_job_script(job_ptr);
	if (script && script[0]) {
		script_str = _json_escape(script);
		xstrfmtcat(json_str, ",\"script\":\"%s\"", script_str);
		xfree(script_str);
	}
	xfree(script);

	if (job_ptr->assoc_ptr) {
		assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
					   NO_LOCK, NO_LOCK, NO_LOCK };
		slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr;
		char *parent_accounts = NULL;
		char **acc_aux = NULL;
		int nparents = 0;

		assoc_mgr_lock(&locks);

		/* Start at the first parent and go up. When studying
		 * this code it was slightly faster to do 2 loops on
		 * the association linked list and only 1 xmalloc but
		 * we opted for cleaner looking code and going with a
		 * realloc. */
		while (assoc_ptr) {
			if (assoc_ptr->acct) {
				acc_aux = xrealloc(acc_aux,
						   sizeof(char *) *
						   (nparents + 1));
				acc_aux[nparents++] = assoc_ptr->acct;
			}
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		}

		for (i = nparents - 1; i >= 0; i--)
			xstrfmtcat(parent_accounts, "/%s", acc_aux[i]);
		xfree(acc_aux);

		xstrfmtcat(json_str, ",\"parent_accounts\":\"%s\"",
			   parent_accounts);

		xfree(parent_accounts);

		assoc_mgr_unlock(&locks);
	}

	xfree(derived_ec_str);
	xfree(exit_code_str);
	xstrcat(json_str, "}");
	jnode = xmalloc(sizeof(struct job_node));
	jnode->serialized_job = json_str;
	list_enqueue(jobslist, jnode);
	json_str = NULL;

	return SLURM_SUCCESS;
}
示例#5
0
/*
 * Parse a multi-prog input file line
 * line IN - line to parse
 * command_pos IN/OUT - where in opt.argv we are
 * count IN - which command we are on
 * return 0 if empty line, 1 if added
 */
static int _parse_prog_line(char *in_line, int *command_pos, int count)
{
	int i, cmd_inx;
	int first_task_inx, last_task_inx;
	hostset_t hs = NULL;
	char *tmp_str = NULL;

	xassert(opt.ntasks);

	/* Get the task ID string */
	for (i = 0; in_line[i]; i++)
		if (!isspace(in_line[i]))
			break;

	if (!in_line[i]) /* empty line */
		return 0;
	else if (in_line[i] == '#')
		return 0;
	else if (!isdigit(in_line[i]))
		goto bad_line;
	first_task_inx = i;
	for (i++; in_line[i]; i++) {
		if (isspace(in_line[i]))
			break;
	}
	if (!isspace(in_line[i]))
		goto bad_line;
	last_task_inx = i;


	/* Now transfer data to the function arguments */
	in_line[last_task_inx] = '\0';
	xstrfmtcat(tmp_str, "[%s]", in_line + first_task_inx);
	hs = hostset_create(tmp_str);
	xfree(tmp_str);
	in_line[last_task_inx] = ' ';
	if (!hs)
		goto bad_line;


	if (count) {
		opt.argc += 1;
		xrealloc(opt.argv, opt.argc * sizeof(char *));
		opt.argv[(*command_pos)++] = xstrdup(":");
	}
	opt.argc += 2;
	xrealloc(opt.argv, opt.argc * sizeof(char *));
	opt.argv[(*command_pos)++] = xstrdup("-n");
	opt.argv[(*command_pos)++] = xstrdup_printf("%d", hostset_count(hs));
	hostset_destroy(hs);

	/* Get the command */
	for (i++; in_line[i]; i++) {
		if (!isspace(in_line[i]))
			break;
	}
	if (in_line[i] == '\0')
		goto bad_line;

	cmd_inx = i;
	for ( ; in_line[i]; i++) {
		if (isspace(in_line[i])) {
			if (i > cmd_inx) {
				int diff = i - cmd_inx + 1;
				char tmp_char[diff + 1];
				snprintf(tmp_char, diff, "%s",
					 in_line + cmd_inx);
				opt.argc += 1;
				xrealloc(opt.argv, opt.argc * sizeof(char *));
				opt.argv[(*command_pos)++] = xstrdup(tmp_char);
			}
			cmd_inx = i + 1;
		} else if (in_line[i] == '\n')
			break;
	}

	return 1;

bad_line:
	error("invalid input line: %s", in_line);

	return 0;
}
示例#6
0
int _do_stat(uint32_t jobid, uint32_t stepid, char *nodelist,
             uint32_t req_cpufreq_min, uint32_t req_cpufreq_max,
             uint32_t req_cpufreq_gov)
{
    job_step_stat_response_msg_t *step_stat_response = NULL;
    int rc = SLURM_SUCCESS;
    ListIterator itr;
    slurmdb_stats_t temp_stats;
    job_step_stat_t *step_stat = NULL;
    int ntasks = 0;
    int tot_tasks = 0;
    hostlist_t hl = NULL;

    debug("requesting info for job %u.%u", jobid, stepid);
    if ((rc = slurm_job_step_stat(jobid, stepid, nodelist,
                                  &step_stat_response)) != SLURM_SUCCESS) {
        if (rc == ESLURM_INVALID_JOB_ID) {
            debug("job step %u.%u has already completed",
                  jobid, stepid);
        } else {
            error("problem getting step_layout for %u.%u: %s",
                  jobid, stepid, slurm_strerror(rc));
        }
        return rc;
    }

    memset(&job, 0, sizeof(slurmdb_job_rec_t));
    job.jobid = jobid;

    memset(&step, 0, sizeof(slurmdb_step_rec_t));

    memset(&temp_stats, 0, sizeof(slurmdb_stats_t));
    temp_stats.cpu_min = NO_VAL;
    memset(&step.stats, 0, sizeof(slurmdb_stats_t));
    step.stats.cpu_min = NO_VAL;

    step.job_ptr = &job;
    step.stepid = stepid;
    step.nodes = xmalloc(BUF_SIZE);
    step.req_cpufreq_min = req_cpufreq_min;
    step.req_cpufreq_max = req_cpufreq_max;
    step.req_cpufreq_gov = req_cpufreq_gov;
    step.stepname = NULL;
    step.state = JOB_RUNNING;

    hl = hostlist_create(NULL);
    itr = list_iterator_create(step_stat_response->stats_list);
    while ((step_stat = list_next(itr))) {
        if (!step_stat->step_pids || !step_stat->step_pids->node_name)
            continue;
        if (step_stat->step_pids->pid_cnt > 0 ) {
            int i;
            for(i=0; i<step_stat->step_pids->pid_cnt; i++) {
                if (step.pid_str)
                    xstrcat(step.pid_str, ",");
                xstrfmtcat(step.pid_str, "%u",
                           step_stat->step_pids->pid[i]);
            }
        }

        if (params.pid_format) {
            step.nodes = step_stat->step_pids->node_name;
            print_fields(&step);
            xfree(step.pid_str);
        } else {
            hostlist_push_host(hl, step_stat->step_pids->node_name);
            ntasks += step_stat->num_tasks;
            if (step_stat->jobacct) {
                jobacctinfo_2_stats(&temp_stats,
                                    step_stat->jobacct);
                aggregate_stats(&step.stats, &temp_stats);
            }
        }
    }
    list_iterator_destroy(itr);
    slurm_job_step_pids_response_msg_free(step_stat_response);
    /* we printed it out already */
    if (params.pid_format)
        return rc;

    hostlist_sort(hl);
    hostlist_ranged_string(hl, BUF_SIZE, step.nodes);
    hostlist_destroy(hl);
    tot_tasks += ntasks;

    if (tot_tasks) {
        step.stats.cpu_ave /= (double)tot_tasks;
        step.stats.rss_ave /= (double)tot_tasks;
        step.stats.vsize_ave /= (double)tot_tasks;
        step.stats.pages_ave /= (double)tot_tasks;
        step.stats.disk_read_ave /= (double)tot_tasks;
        step.stats.disk_write_ave /= (double)tot_tasks;
        step.stats.act_cpufreq /= (double)tot_tasks;
        step.ntasks = tot_tasks;
    }

    print_fields(&step);

    return rc;
}
示例#7
0
extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn,
				  char *cluster_name,
				  time_t start, time_t end,
				  uint16_t archive_data)
{
	int rc = SLURM_SUCCESS;
	int add_sec = 3600;
	int i=0;
	time_t now = time(NULL);
	time_t curr_start = start;
	time_t curr_end = curr_start + add_sec;
	char *query = NULL;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	ListIterator a_itr = NULL;
	ListIterator c_itr = NULL;
	ListIterator w_itr = NULL;
	ListIterator r_itr = NULL;
	List assoc_usage_list = list_create(_destroy_local_id_usage);
	List cluster_down_list = list_create(_destroy_local_cluster_usage);
	List wckey_usage_list = list_create(_destroy_local_id_usage);
	List resv_usage_list = list_create(_destroy_local_resv_usage);
	uint16_t track_wckey = slurm_get_track_wckey();
	/* char start_char[20], end_char[20]; */

	char *job_req_inx[] = {
		"job_db_inx",
		"id_job",
		"id_assoc",
		"id_wckey",
		"time_eligible",
		"time_start",
		"time_end",
		"time_suspended",
		"cpus_alloc",
		"cpus_req",
		"id_resv"

	};
	char *job_str = NULL;
	enum {
		JOB_REQ_DB_INX,
		JOB_REQ_JOBID,
		JOB_REQ_ASSOCID,
		JOB_REQ_WCKEYID,
		JOB_REQ_ELG,
		JOB_REQ_START,
		JOB_REQ_END,
		JOB_REQ_SUSPENDED,
		JOB_REQ_ACPU,
		JOB_REQ_RCPU,
		JOB_REQ_RESVID,
		JOB_REQ_COUNT
	};

	char *suspend_req_inx[] = {
		"time_start",
		"time_end"
	};
	char *suspend_str = NULL;
	enum {
		SUSPEND_REQ_START,
		SUSPEND_REQ_END,
		SUSPEND_REQ_COUNT
	};

	char *resv_req_inx[] = {
		"id_resv",
		"assoclist",
		"cpus",
		"flags",
		"time_start",
		"time_end"
	};
	char *resv_str = NULL;
	enum {
		RESV_REQ_ID,
		RESV_REQ_ASSOCS,
		RESV_REQ_CPU,
		RESV_REQ_FLAGS,
		RESV_REQ_START,
		RESV_REQ_END,
		RESV_REQ_COUNT
	};

	i=0;
	xstrfmtcat(job_str, "%s", job_req_inx[i]);
	for(i=1; i<JOB_REQ_COUNT; i++) {
		xstrfmtcat(job_str, ", %s", job_req_inx[i]);
	}

	i=0;
	xstrfmtcat(suspend_str, "%s", suspend_req_inx[i]);
	for(i=1; i<SUSPEND_REQ_COUNT; i++) {
		xstrfmtcat(suspend_str, ", %s", suspend_req_inx[i]);
	}

	i=0;
	xstrfmtcat(resv_str, "%s", resv_req_inx[i]);
	for(i=1; i<RESV_REQ_COUNT; i++) {
		xstrfmtcat(resv_str, ", %s", resv_req_inx[i]);
	}

/* 	info("begin start %s", ctime(&curr_start)); */
/* 	info("begin end %s", ctime(&curr_end)); */
	a_itr = list_iterator_create(assoc_usage_list);
	c_itr = list_iterator_create(cluster_down_list);
	w_itr = list_iterator_create(wckey_usage_list);
	r_itr = list_iterator_create(resv_usage_list);
	while (curr_start < end) {
		int last_id = -1;
		int last_wckeyid = -1;
		int seconds = 0;
		int tot_time = 0;
		local_cluster_usage_t *loc_c_usage = NULL;
		local_cluster_usage_t *c_usage = NULL;
		local_resv_usage_t *r_usage = NULL;
		local_id_usage_t *a_usage = NULL;
		local_id_usage_t *w_usage = NULL;

		debug3("%s curr hour is now %ld-%ld",
		       cluster_name, curr_start, curr_end);
/* 		info("start %s", ctime(&curr_start)); */
/* 		info("end %s", ctime(&curr_end)); */

		c_usage = _setup_cluster_usage(mysql_conn, cluster_name,
					       curr_start, curr_end,
					       cluster_down_list);

		// now get the reservations during this time
		query = xstrdup_printf("select %s from \"%s_%s\" where "
				       "(time_start < %ld && time_end >= %ld) "
				       "order by time_start",
				       resv_str, cluster_name, resv_table,
				       curr_end, curr_start);

		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		if (!(result = mysql_db_query_ret(
			     mysql_conn, query, 0))) {
			xfree(query);
			_destroy_local_cluster_usage(c_usage);
			return SLURM_ERROR;
		}
		xfree(query);

		/* If a reservation overlaps another reservation we
		   total up everything here as if they didn't but when
		   calculating the total time for a cluster we will
		   remove the extra time received.  This may result in
		   unexpected results with association based reports
		   since the association is given the total amount of
		   time of each reservation, thus equaling more time
		   than is available.  Job/Cluster/Reservation reports
		   should be fine though since we really don't over
		   allocate resources.  The issue with us not being
		   able to handle overlapping reservations here is
		   unless the reservation completely overlaps the
		   other reservation we have no idea how many cpus
		   should be removed since this could be a
		   heterogeneous system.  This same problem exists
		   when a reservation is created with the ignore_jobs
		   option which will allow jobs to continue to run in the
		   reservation that aren't suppose to.
		*/
		while ((row = mysql_fetch_row(result))) {
			time_t row_start = slurm_atoul(row[RESV_REQ_START]);
			time_t row_end = slurm_atoul(row[RESV_REQ_END]);
			uint32_t row_cpu = slurm_atoul(row[RESV_REQ_CPU]);
			uint32_t row_flags = slurm_atoul(row[RESV_REQ_FLAGS]);

			if (row_start < curr_start)
				row_start = curr_start;

			if (!row_end || row_end > curr_end)
				row_end = curr_end;

			/* Don't worry about it if the time is less
			 * than 1 second.
			 */
			if ((row_end - row_start) < 1)
				continue;

			r_usage = xmalloc(sizeof(local_resv_usage_t));
			r_usage->id = slurm_atoul(row[RESV_REQ_ID]);

			r_usage->local_assocs = list_create(slurm_destroy_char);
			slurm_addto_char_list(r_usage->local_assocs,
					      row[RESV_REQ_ASSOCS]);

			r_usage->total_time = (row_end - row_start) * row_cpu;
			r_usage->start = row_start;
			r_usage->end = row_end;
			list_append(resv_usage_list, r_usage);

			/* Since this reservation was added to the
			   cluster and only certain people could run
			   there we will use this as allocated time on
			   the system.  If the reservation was a
			   maintenance then we add the time to planned
			   down time.
			*/


			/* only record time for the clusters that have
			   registered.  This continue should rarely if
			   ever happen.
			*/
			if (!c_usage)
				continue;
			else if (row_flags & RESERVE_FLAG_MAINT)
				c_usage->pd_cpu += r_usage->total_time;
			else
				c_usage->a_cpu += r_usage->total_time;
			/* slurm_make_time_str(&r_usage->start, start_char, */
			/* 		    sizeof(start_char)); */
			/* slurm_make_time_str(&r_usage->end, end_char, */
			/* 		    sizeof(end_char)); */
			/* info("adding this much %lld to cluster %s " */
			/*      "%d %d %s - %s", */
			/*      r_usage->total_time, c_usage->name, */
			/*      (row_flags & RESERVE_FLAG_MAINT),  */
			/*      r_usage->id, start_char, end_char); */
		}
		mysql_free_result(result);

		/* now get the jobs during this time only  */
		query = xstrdup_printf("select %s from \"%s_%s\" where "
				       "(time_eligible < %ld && "
				       "(time_end >= %ld || time_end = 0)) "
				       "order by id_assoc, time_eligible",
				       job_str, cluster_name, job_table,
				       curr_end, curr_start);

		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		if (!(result = mysql_db_query_ret(
			     mysql_conn, query, 0))) {
			xfree(query);
			_destroy_local_cluster_usage(c_usage);
			return SLURM_ERROR;
		}
		xfree(query);

		while ((row = mysql_fetch_row(result))) {
			uint32_t job_id = slurm_atoul(row[JOB_REQ_JOBID]);
			uint32_t assoc_id = slurm_atoul(row[JOB_REQ_ASSOCID]);
			uint32_t wckey_id = slurm_atoul(row[JOB_REQ_WCKEYID]);
			uint32_t resv_id = slurm_atoul(row[JOB_REQ_RESVID]);
			time_t row_eligible = slurm_atoul(row[JOB_REQ_ELG]);
			time_t row_start = slurm_atoul(row[JOB_REQ_START]);
			time_t row_end = slurm_atoul(row[JOB_REQ_END]);
			uint32_t row_acpu = slurm_atoul(row[JOB_REQ_ACPU]);
			uint32_t row_rcpu = slurm_atoul(row[JOB_REQ_RCPU]);
			seconds = 0;

			if (row_start && (row_start < curr_start))
				row_start = curr_start;

			if (!row_start && row_end)
				row_start = row_end;

			if (!row_end || row_end > curr_end)
				row_end = curr_end;

			if (!row_start || ((row_end - row_start) < 1))
				goto calc_cluster;

			seconds = (row_end - row_start);

			if (slurm_atoul(row[JOB_REQ_SUSPENDED])) {
				MYSQL_RES *result2 = NULL;
				MYSQL_ROW row2;
				/* get the suspended time for this job */
				query = xstrdup_printf(
					"select %s from \"%s_%s\" where "
					"(time_start < %ld && (time_end >= %ld "
					"|| time_end = 0)) && job_db_inx=%s "
					"order by time_start",
					suspend_str, cluster_name,
					suspend_table,
					curr_end, curr_start,
					row[JOB_REQ_DB_INX]);

				debug4("%d(%s:%d) query\n%s",
				       mysql_conn->conn, THIS_FILE,
				       __LINE__, query);
				if (!(result2 = mysql_db_query_ret(
					     mysql_conn,
					     query, 0))) {
					xfree(query);
					_destroy_local_cluster_usage(c_usage);
					return SLURM_ERROR;
				}
				xfree(query);
				while ((row2 = mysql_fetch_row(result2))) {
					time_t local_start = slurm_atoul(
						row2[SUSPEND_REQ_START]);
					time_t local_end = slurm_atoul(
						row2[SUSPEND_REQ_END]);

					if (!local_start)
						continue;

					if (row_start > local_start)
						local_start = row_start;
					if (row_end < local_end)
						local_end = row_end;
					tot_time = (local_end - local_start);
					if (tot_time < 1)
						continue;

					seconds -= tot_time;
				}
				mysql_free_result(result2);
			}
			if (seconds < 1) {
				debug4("This job (%u) was suspended "
				       "the entire hour", job_id);
				continue;
			}

			if (last_id != assoc_id) {
				a_usage = xmalloc(sizeof(local_id_usage_t));
				a_usage->id = assoc_id;
				list_append(assoc_usage_list, a_usage);
				last_id = assoc_id;
			}

			a_usage->a_cpu += seconds * row_acpu;

			if (!track_wckey)
				goto calc_cluster;

			/* do the wckey calculation */
			if (last_wckeyid != wckey_id) {
				list_iterator_reset(w_itr);
				while ((w_usage = list_next(w_itr)))
					if (w_usage->id == wckey_id)
						break;

				if (!w_usage) {
					w_usage = xmalloc(
						sizeof(local_id_usage_t));
					w_usage->id = wckey_id;
					list_append(wckey_usage_list,
						    w_usage);
				}

				last_wckeyid = wckey_id;
			}
			w_usage->a_cpu += seconds * row_acpu;
			/* do the cluster allocated calculation */
		calc_cluster:

			/* Now figure out there was a disconnected
			   slurmctld durning this job.
			*/
			list_iterator_reset(c_itr);
			while ((loc_c_usage = list_next(c_itr))) {
				int temp_end = row_end;
				int temp_start = row_start;
				if (loc_c_usage->start > temp_start)
					temp_start = loc_c_usage->start;
				if (loc_c_usage->end < temp_end)
					temp_end = loc_c_usage->end;
				seconds = (temp_end - temp_start);
				if (seconds > 0) {
					/* info(" Job %u was running for " */
					/*      "%"PRIu64" seconds while " */
					/*      "cluster %s's slurmctld " */
					/*      "wasn't responding", */
					/*      job_id, */
					/*      (uint64_t) */
					/*      (seconds * row_acpu), */
					/*      cluster_name); */
					loc_c_usage->total_time -=
						seconds * row_acpu;
				}
			}

			/* first figure out the reservation */
			if (resv_id) {
				if (seconds <= 0)
					continue;
				/* Since we have already added the
				   entire reservation as used time on
				   the cluster we only need to
				   calculate the used time for the
				   reservation and then divy up the
				   unused time over the associations
				   able to run in the reservation.
				   Since the job was to run, or ran a
				   reservation we don't care about
				   eligible time since that could
				   totally skew the clusters reserved time
				   since the job may be able to run
				   outside of the reservation. */
				list_iterator_reset(r_itr);
				while ((r_usage = list_next(r_itr))) {
					/* since the reservation could
					   have changed in some way,
					   thus making a new
					   reservation record in the
					   database, we have to make
					   sure all the reservations
					   are checked to see if such
					   a thing has happened */
					if (r_usage->id == resv_id) {
						int temp_end = row_end;
						int temp_start = row_start;
						if (r_usage->start > temp_start)
							temp_start =
								r_usage->start;
						if (r_usage->end < temp_end)
							temp_end = r_usage->end;

						if ((temp_end - temp_start)
						   > 0) {
							r_usage->a_cpu +=
								(temp_end
								 - temp_start)
								* row_acpu;
						}
					}
				}
				continue;
			}

			/* only record time for the clusters that have
			   registered.  This continue should rarely if
			   ever happen.
			*/
			if (!c_usage)
				continue;

			if (row_start && (seconds > 0)) {
				/* info("%d assoc %d adds " */
				/*      "(%d)(%d-%d) * %d = %d " */
				/*      "to %d", */
				/*      job_id, */
				/*      a_usage->id, */
				/*      seconds, */
				/*      row_end, row_start, */
				/*      row_acpu, */
				/*      seconds * row_acpu, */
				/*      row_acpu); */

				c_usage->a_cpu += seconds * row_acpu;
			}

			/* now reserved time */
			if (!row_start || (row_start >= c_usage->start)) {
				int temp_end = row_start;
				int temp_start = row_eligible;
				if (c_usage->start > temp_start)
					temp_start = c_usage->start;
				if (c_usage->end < temp_end)
					temp_end = c_usage->end;
				seconds = (temp_end - temp_start);
				if (seconds > 0) {
					/* info("%d assoc %d reserved " */
					/*      "(%d)(%d-%d) * %d = %d " */
					/*      "to %d", */
					/*      job_id, */
					/*      assoc_id, */
					/*      seconds, */
					/*      temp_end, temp_start, */
					/*      row_rcpu, */
					/*      seconds * row_rcpu, */
					/*      row_rcpu); */
					c_usage->r_cpu += seconds * row_rcpu;
				}
			}
		}
		mysql_free_result(result);

		/* now figure out how much more to add to the
		   associations that could had run in the reservation
		*/
		list_iterator_reset(r_itr);
		while ((r_usage = list_next(r_itr))) {
			int64_t idle = r_usage->total_time - r_usage->a_cpu;
			char *assoc = NULL;
			ListIterator tmp_itr = NULL;

			if (idle <= 0)
				continue;

			/* now divide that time by the number of
			   associations in the reservation and add
			   them to each association */
			seconds = idle / list_count(r_usage->local_assocs);
/* 			info("resv %d got %d for seconds for %d assocs", */
/* 			     r_usage->id, seconds, */
/* 			     list_count(r_usage->local_assocs)); */
			tmp_itr = list_iterator_create(r_usage->local_assocs);
			while ((assoc = list_next(tmp_itr))) {
				uint32_t associd = slurm_atoul(assoc);
				if (last_id != associd) {
					list_iterator_reset(a_itr);
					while ((a_usage = list_next(a_itr))) {
						if (a_usage->id == associd) {
							last_id = a_usage->id;
							break;
						}
					}
				}

				if (!a_usage) {
					a_usage = xmalloc(
						sizeof(local_id_usage_t));
					a_usage->id = associd;
					list_append(assoc_usage_list, a_usage);
					last_id = associd;
				}

				a_usage->a_cpu += seconds;
			}
			list_iterator_destroy(tmp_itr);
		}

		/* now apply the down time from the slurmctld disconnects */
		list_iterator_reset(c_itr);
		while ((loc_c_usage = list_next(c_itr)))
			c_usage->d_cpu += loc_c_usage->total_time;

		if ((rc = _process_cluster_usage(
			     mysql_conn, cluster_name, curr_start,
			     curr_end, now, c_usage)) != SLURM_SUCCESS) {
			_destroy_local_cluster_usage(c_usage);
			goto end_it;
		}

		list_iterator_reset(a_itr);
		while ((a_usage = list_next(a_itr))) {
/* 			info("association (%d) %d alloc %d", */
/* 			     a_usage->id, last_id, */
/* 			     a_usage->a_cpu); */
			if (query) {
				xstrfmtcat(query,
					   ", (%ld, %ld, %d, %ld, %"PRIu64")",
					   now, now,
					   a_usage->id, curr_start,
					   a_usage->a_cpu);
			} else {
				xstrfmtcat(query,
					   "insert into \"%s_%s\" "
					   "(creation_time, "
					   "mod_time, id_assoc, time_start, "
					   "alloc_cpu_secs) values "
					   "(%ld, %ld, %d, %ld, %"PRIu64")",
					   cluster_name, assoc_hour_table,
					   now, now,
					   a_usage->id, curr_start,
					   a_usage->a_cpu);
			}
		}
		if (query) {
			xstrfmtcat(query,
				   " on duplicate key update "
				   "mod_time=%ld, "
				   "alloc_cpu_secs=VALUES(alloc_cpu_secs);",
				   now);

			debug3("%d(%s:%d) query\n%s",
			       mysql_conn->conn, THIS_FILE, __LINE__, query);
			rc = mysql_db_query(mysql_conn, query);
			xfree(query);
			if (rc != SLURM_SUCCESS) {
				error("Couldn't add assoc hour rollup");
				_destroy_local_cluster_usage(c_usage);
				goto end_it;
			}
		}

		if (!track_wckey)
			goto end_loop;

		list_iterator_reset(w_itr);
		while ((w_usage = list_next(w_itr))) {
/* 			info("association (%d) %d alloc %d", */
/* 			     w_usage->id, last_id, */
/* 			     w_usage->a_cpu); */
			if (query) {
				xstrfmtcat(query,
					   ", (%ld, %ld, %d, %ld, %"PRIu64")",
					   now, now,
					   w_usage->id, curr_start,
					   w_usage->a_cpu);
			} else {
				xstrfmtcat(query,
					   "insert into \"%s_%s\" "
					   "(creation_time, "
					   "mod_time, id_wckey, time_start, "
					   "alloc_cpu_secs) values "
					   "(%ld, %ld, %d, %ld, %"PRIu64")",
					   cluster_name, wckey_hour_table,
					   now, now,
					   w_usage->id, curr_start,
					   w_usage->a_cpu);
			}
		}
		if (query) {
			xstrfmtcat(query,
				   " on duplicate key update "
				   "mod_time=%ld, "
				   "alloc_cpu_secs=VALUES(alloc_cpu_secs);",
				   now);

			debug3("%d(%s:%d) query\n%s",
			       mysql_conn->conn, THIS_FILE, __LINE__, query);
			rc = mysql_db_query(mysql_conn, query);
			xfree(query);
			if (rc != SLURM_SUCCESS) {
				error("Couldn't add wckey hour rollup");
				_destroy_local_cluster_usage(c_usage);
				goto end_it;
			}
		}

	end_loop:
		_destroy_local_cluster_usage(c_usage);
		list_flush(assoc_usage_list);
		list_flush(cluster_down_list);
		list_flush(wckey_usage_list);
		list_flush(resv_usage_list);
		curr_start = curr_end;
		curr_end = curr_start + add_sec;
	}
end_it:
	xfree(suspend_str);
	xfree(job_str);
	xfree(resv_str);
	list_iterator_destroy(a_itr);
	list_iterator_destroy(c_itr);
	list_iterator_destroy(w_itr);
	list_iterator_destroy(r_itr);

	list_destroy(assoc_usage_list);
	list_destroy(cluster_down_list);
	list_destroy(wckey_usage_list);
	list_destroy(resv_usage_list);

/* 	info("stop start %s", ctime(&curr_start)); */
/* 	info("stop end %s", ctime(&curr_end)); */

	/* go check to see if we archive and purge */

	if (rc == SLURM_SUCCESS)
		rc = _process_purge(mysql_conn, cluster_name, archive_data,
				    SLURMDB_PURGE_HOURS);

	return rc;
}
示例#8
0
static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn,
						   char *cluster_name,
						   time_t curr_start,
						   time_t curr_end,
						   List cluster_down_list)
{
	local_cluster_usage_t *c_usage = NULL;
	char *query = NULL;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	int i = 0;

	char *event_req_inx[] = {
		"node_name",
		"cpu_count",
		"time_start",
		"time_end",
		"state",
	};
	char *event_str = NULL;
	enum {
		EVENT_REQ_NAME,
		EVENT_REQ_CPU,
		EVENT_REQ_START,
		EVENT_REQ_END,
		EVENT_REQ_STATE,
		EVENT_REQ_COUNT
	};

	xstrfmtcat(event_str, "%s", event_req_inx[i]);
	for(i=1; i<EVENT_REQ_COUNT; i++) {
		xstrfmtcat(event_str, ", %s", event_req_inx[i]);
	}

	/* first get the events during this time.  All that is
	 * except things with the maintainance flag set in the
	 * state.  We handle those later with the reservations.
	 */
	query = xstrdup_printf("select %s from \"%s_%s\" where "
			       "!(state & %d) && (time_start < %ld "
			       "&& (time_end >= %ld "
			       "|| time_end = 0)) "
			       "order by node_name, time_start",
			       event_str, cluster_name, event_table,
			       NODE_STATE_MAINT,
			       curr_end, curr_start);
	xfree(event_str);

	debug3("%d(%s:%d) query\n%s",
	       mysql_conn->conn, THIS_FILE, __LINE__, query);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		return NULL;
	}
	xfree(query);

	while ((row = mysql_fetch_row(result))) {
		time_t row_start = slurm_atoul(row[EVENT_REQ_START]);
		time_t row_end = slurm_atoul(row[EVENT_REQ_END]);
		uint32_t row_cpu = slurm_atoul(row[EVENT_REQ_CPU]);
		uint16_t state = slurm_atoul(row[EVENT_REQ_STATE]);
		if (row_start < curr_start)
			row_start = curr_start;

		if (!row_end || row_end > curr_end)
			row_end = curr_end;

		/* Don't worry about it if the time is less
		 * than 1 second.
		 */
		if ((row_end - row_start) < 1)
			continue;

		/* this means we are a cluster registration
		   entry */
		if (!row[EVENT_REQ_NAME][0]) {
			/* if the cpu count changes we will
			 * only care about the last cpu count but
			 * we will keep a total of the time for
			 * all cpus to get the correct cpu time
			 * for the entire period.
			 */
			if (state || !c_usage) {
				local_cluster_usage_t *loc_c_usage;

				loc_c_usage = xmalloc(
					sizeof(local_cluster_usage_t));
				loc_c_usage->cpu_count = row_cpu;
				loc_c_usage->total_time =
					(row_end - row_start) * row_cpu;
				loc_c_usage->start = row_start;
				loc_c_usage->end = row_end;
				/* If this has a state it
				   means the slurmctld went
				   down and we should put this
				   on the list and remove any
				   jobs from this time that
				   were running later.
				*/
				if (state)
					list_append(cluster_down_list,
						    loc_c_usage);
				else
					c_usage = loc_c_usage;
				loc_c_usage = NULL;
			} else {
				c_usage->cpu_count = row_cpu;
				c_usage->total_time +=
					(row_end - row_start) * row_cpu;
				c_usage->end = row_end;
			}
			continue;
		}

		/* only record down time for the cluster we
		   are looking for.  If it was during this
		   time period we would already have it.
		*/
		if (c_usage) {
			int local_start = row_start;
			int local_end = row_end;
			int seconds;
			if (c_usage->start > local_start)
				local_start = c_usage->start;
			if (c_usage->end < local_end)
				local_end = c_usage->end;
			seconds = (local_end - local_start);
			if (seconds > 0) {
				/* info("node %s adds " */
				/*      "(%d)(%d-%d) * %d = %d " */
				/*      "to %d", */
				/*      row[EVENT_REQ_NAME], */
				/*      seconds, */
				/*      local_end, local_start, */
				/*      row_cpu, */
				/*      seconds * row_cpu, */
				/*      row_cpu); */
				c_usage->d_cpu += seconds * row_cpu;
			}
		}
	}
	mysql_free_result(result);
	return c_usage;
}
示例#9
0
static int _process_cluster_usage(mysql_conn_t *mysql_conn,
				  char *cluster_name,
				  time_t curr_start, time_t curr_end,
				  time_t now, local_cluster_usage_t *c_usage)
{
	int rc = SLURM_SUCCESS;
	char *query = NULL;
	uint64_t total_used;
	char start_char[20], end_char[20];

	if (!c_usage)
		return rc;
	/* Now put the lists into the usage tables */

	/* sanity check to make sure we don't have more
	   allocated cpus than possible. */
	if (c_usage->total_time < c_usage->a_cpu) {
		slurm_make_time_str(&curr_start, start_char,
				    sizeof(start_char));
		slurm_make_time_str(&curr_end, end_char,
				    sizeof(end_char));
		error("We have more allocated time than is "
		      "possible (%"PRIu64" > %"PRIu64") for "
		      "cluster %s(%d) from %s - %s",
		      c_usage->a_cpu, c_usage->total_time,
		      cluster_name, c_usage->cpu_count,
		      start_char, end_char);
		c_usage->a_cpu = c_usage->total_time;
	}

	total_used = c_usage->a_cpu + c_usage->d_cpu + c_usage->pd_cpu;

	/* Make sure the total time we care about
	   doesn't go over the limit */
	if (c_usage->total_time < total_used) {
		int64_t overtime;

		slurm_make_time_str(&curr_start, start_char,
				    sizeof(start_char));
		slurm_make_time_str(&curr_end, end_char,
				    sizeof(end_char));
		error("We have more time than is "
		      "possible (%"PRIu64"+%"PRIu64"+%"
		      PRIu64")(%"PRIu64") > %"PRIu64" for "
		      "cluster %s(%d) from %s - %s",
		      c_usage->a_cpu, c_usage->d_cpu,
		      c_usage->pd_cpu, total_used,
		      c_usage->total_time,
		      cluster_name, c_usage->cpu_count,
		      start_char, end_char);

		/* First figure out how much actual down time
		   we have and then how much
		   planned down time we have. */
		overtime = (int64_t)(c_usage->total_time -
				     (c_usage->a_cpu + c_usage->d_cpu));
		if (overtime < 0) {
			c_usage->d_cpu += overtime;
			if ((int64_t)c_usage->d_cpu < 0)
				c_usage->d_cpu = 0;
		}

		overtime = (int64_t)(c_usage->total_time -
				     (c_usage->a_cpu + c_usage->d_cpu
				      + c_usage->pd_cpu));
		if (overtime < 0) {
			c_usage->pd_cpu += overtime;
			if ((int64_t)c_usage->pd_cpu < 0)
				c_usage->pd_cpu = 0;
		}

		total_used = c_usage->a_cpu +
			c_usage->d_cpu + c_usage->pd_cpu;
		/* info("We now have (%"PRIu64"+%"PRIu64"+" */
		/*      "%"PRIu64")(%"PRIu64") " */
		/*       "?= %"PRIu64"", */
		/*       c_usage->a_cpu, c_usage->d_cpu, */
		/*       c_usage->pd_cpu, total_used, */
		/*       c_usage->total_time); */
	}

	c_usage->i_cpu = c_usage->total_time - total_used - c_usage->r_cpu;
	/* sanity check just to make sure we have a
	 * legitimate time after we calulated
	 * idle/reserved time put extra in the over
	 * commit field
	 */
	/* info("%s got idle of %lld", c_usage->name, */
	/*      (int64_t)c_usage->i_cpu); */
	if ((int64_t)c_usage->i_cpu < 0) {
		/* info("got %d %d %d", c_usage->r_cpu, */
		/*      c_usage->i_cpu, c_usage->o_cpu); */
		c_usage->r_cpu += (int64_t)c_usage->i_cpu;
		c_usage->o_cpu -= (int64_t)c_usage->i_cpu;
		c_usage->i_cpu = 0;
		if ((int64_t)c_usage->r_cpu < 0)
			c_usage->r_cpu = 0;
	}

	/* info("cluster %s(%u) down %"PRIu64" alloc %"PRIu64" " */
	/*      "resv %"PRIu64" idle %"PRIu64" over %"PRIu64" " */
	/*      "total= %"PRIu64" ?= %"PRIu64" from %s", */
	/*      cluster_name, */
	/*      c_usage->cpu_count, c_usage->d_cpu, c_usage->a_cpu, */
	/*      c_usage->r_cpu, c_usage->i_cpu, c_usage->o_cpu, */
	/*      c_usage->d_cpu + c_usage->a_cpu + */
	/*      c_usage->r_cpu + c_usage->i_cpu, */
	/*      c_usage->total_time, */
	/*      ctime(&c_usage->start)); */
	/* info("to %s", ctime(&c_usage->end)); */
	query = xstrdup_printf("insert into \"%s_%s\" "
			       "(creation_time, "
			       "mod_time, time_start, "
			       "cpu_count, alloc_cpu_secs, "
			       "down_cpu_secs, pdown_cpu_secs, "
			       "idle_cpu_secs, over_cpu_secs, "
			       "resv_cpu_secs) "
			       "values (%ld, %ld, %ld, %d, "
			       "%"PRIu64", %"PRIu64", %"PRIu64", "
			       "%"PRIu64", %"PRIu64", %"PRIu64")",
			       cluster_name, cluster_hour_table,
			       now, now,
			       c_usage->start,
			       c_usage->cpu_count,
			       c_usage->a_cpu, c_usage->d_cpu,
			       c_usage->pd_cpu, c_usage->i_cpu,
			       c_usage->o_cpu, c_usage->r_cpu);

	/* Spacing out the inserts here instead of doing them
	   all at once in the end proves to be faster.  Just FYI
	   so we don't go testing again and again.
	*/
	if (query) {
		xstrfmtcat(query,
			   " on duplicate key update "
			   "mod_time=%ld, cpu_count=VALUES(cpu_count), "
			   "alloc_cpu_secs=VALUES(alloc_cpu_secs), "
			   "down_cpu_secs=VALUES(down_cpu_secs), "
			   "pdown_cpu_secs=VALUES(pdown_cpu_secs), "
			   "idle_cpu_secs=VALUES(idle_cpu_secs), "
			   "over_cpu_secs=VALUES(over_cpu_secs), "
			   "resv_cpu_secs=VALUES(resv_cpu_secs)",
			   now);
		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		rc = mysql_db_query(mysql_conn, query);
		xfree(query);
		if (rc != SLURM_SUCCESS)
			error("Couldn't add cluster hour rollup");
	}

	return rc;
}
示例#10
0
extern int as_mysql_monthly_rollup(mysql_conn_t *mysql_conn,
				   char *cluster_name,
				   time_t start, time_t end,
				   uint16_t archive_data)
{
	int rc = SLURM_SUCCESS;
	struct tm start_tm;
	time_t curr_start = start;
	time_t curr_end;
	time_t now = time(NULL);
	char *query = NULL;
	uint16_t track_wckey = slurm_get_track_wckey();

	if (!localtime_r(&curr_start, &start_tm)) {
		error("Couldn't get localtime from month start %ld",
		      curr_start);
		return SLURM_ERROR;
	}
	start_tm.tm_sec = 0;
	start_tm.tm_min = 0;
	start_tm.tm_hour = 0;
	start_tm.tm_mday = 1;
	start_tm.tm_mon++;
	start_tm.tm_isdst = -1;
	curr_end = mktime(&start_tm);

	while (curr_start < end) {
		debug3("curr month is now %ld-%ld", curr_start, curr_end);
/* 		info("start %s", ctime(&curr_start)); */
/* 		info("end %s", ctime(&curr_end)); */
		query = xstrdup_printf(
			"insert into \"%s_%s\" (creation_time, "
			"mod_time, id_assoc, "
			"time_start, alloc_cpu_secs) select "
			"%ld, %ld, id_assoc, "
			"%ld, @ASUM:=SUM(alloc_cpu_secs) from \"%s_%s\" where "
			"(time_start < %ld && time_start >= %ld) "
			"group by id_assoc on duplicate key update "
			"mod_time=%ld, alloc_cpu_secs=@ASUM;",
			cluster_name, assoc_month_table, now, now, curr_start,
			cluster_name, assoc_day_table,
			curr_end, curr_start, now);
		/* We group on deleted here so if there are no entries
		   we don't get an error, just nothing is returned.
		   Else we get a bunch of NULL's
		*/
		xstrfmtcat(query,
			   "insert into \"%s_%s\" (creation_time, "
			   "mod_time, time_start, cpu_count, "
			   "alloc_cpu_secs, down_cpu_secs, pdown_cpu_secs, "
			   "idle_cpu_secs, over_cpu_secs, resv_cpu_secs) "
			   "select %ld, %ld, "
			   "%ld, @CPU:=MAX(cpu_count), "
			   "@ASUM:=SUM(alloc_cpu_secs), "
			   "@DSUM:=SUM(down_cpu_secs), "
			   "@PDSUM:=SUM(pdown_cpu_secs), "
			   "@ISUM:=SUM(idle_cpu_secs), "
			   "@OSUM:=SUM(over_cpu_secs), "
			   "@RSUM:=SUM(resv_cpu_secs) from \"%s_%s\" where "
			   "(time_start < %ld && time_start >= %ld) "
			   "group by deleted "
			   "on duplicate key update "
			   "mod_time=%ld, cpu_count=@CPU, "
			   "alloc_cpu_secs=@ASUM, down_cpu_secs=@DSUM, "
			   "pdown_cpu_secs=@PDSUM, idle_cpu_secs=@ISUM, "
			   "over_cpu_secs=@OSUM, resv_cpu_secs=@RSUM;",
			   cluster_name, cluster_month_table,
			   now, now, curr_start,
			   cluster_name, cluster_day_table,
			   curr_end, curr_start, now);
		if (track_wckey) {
			xstrfmtcat(query,
				   "insert into \"%s_%s\" "
				   "(creation_time, mod_time, "
				   "id_wckey, time_start, alloc_cpu_secs) "
				   "select %ld, %ld, id_wckey, %ld, "
				   "@ASUM:=SUM(alloc_cpu_secs) "
				   "from \"%s_%s\" where (time_start < %ld && "
				   "time_start >= %ld) "
				   "group by id_wckey on duplicate key update "
				   "mod_time=%ld, alloc_cpu_secs=@ASUM;",
				   cluster_name, wckey_month_table,
				   now, now, curr_start,
				   cluster_name, wckey_day_table,
				   curr_end, curr_start, now);
		}
		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		rc = mysql_db_query(mysql_conn, query);
		xfree(query);
		if (rc != SLURM_SUCCESS) {
			error("Couldn't add day rollup");
			return SLURM_ERROR;
		}

		curr_start = curr_end;
		if (!localtime_r(&curr_start, &start_tm)) {
			error("Couldn't get localtime from month start %ld",
			      curr_start);
		}
		start_tm.tm_sec = 0;
		start_tm.tm_min = 0;
		start_tm.tm_hour = 0;
		start_tm.tm_mday = 1;
		start_tm.tm_mon++;
		start_tm.tm_isdst = -1;
		curr_end = mktime(&start_tm);
	}

	/* go check to see if we archive and purge */
	rc = _process_purge(mysql_conn, cluster_name, archive_data,
			    SLURMDB_PURGE_MONTHS);

	return rc;
}