/* monitor() * The principal function that monitors the machines */ int monitor (machine_t machines[], sensor_t *sensor, int run_mins, mmdat_t *pshort_hist, mmdat_t *plong_hist, int *timestops, int wsize, opsum_t *summary) { int rc = -1; int i = 0; int64_t timenow = epochtime (); int64_t starttime = timenow; int64_t endtime = timenow + (run_mins * 60); int index, tmpindex; struct tm prev_tm = {0}, tm = {0}, p_starttime = {0}, p_endtime = {0}; int next_timestop, prev_timestop; llist_t **prev_short_head = &(pshort_hist->head); if (run_mins == 0) timenow = 0; /* Initial step to basically initialize time */ rc = get_sensor_readings (sensor, &prev_tm); if (rc < 0) { printf ("ERROR: Retrieving sensor readings failed\n"); return rc; } rc = find_next_long_timestop (timestops, wsize, prev_tm.tm_hour, &next_timestop, &prev_timestop, &index); if (rc < 0) { printf ("ERROR: Could not find the next timestop\n"); return -1; } printf ("Current time = %d, next_timestop = %d\n", prev_tm.tm_hour, next_timestop); p_starttime = prev_tm; p_starttime.tm_hour = prev_timestop; p_starttime.tm_min = 0; p_starttime.tm_sec = 0; while (timenow < endtime) { printf ("Starting new iteration\n"); /* Retrieve environmental data and time */ rc = get_sensor_readings (sensor, &tm); if (rc < 0) { printf ("ERROR: Retrieving sensor readings failed\n"); return rc; } /* monitor/operate on each machine */ for (i = 0; i < 243; i++) { rc = monitor_machine (&machines[i]); if (rc < 0) { printf ("ERROR: operations on machine %s failed\n", machines[i].uuid); return rc; } } /* Short update */ if (short_period_over (tm, prev_tm)) { compute_short_period_averages (machines, sensor, pshort_hist, prev_tm, tm); prev_tm = tm; print_phist_data (pshort_hist->head); } /* Long update */ #if 1 if (tm.tm_hour == next_timestop) { p_endtime = tm; p_endtime.tm_min = 0; p_endtime.tm_sec = 0; compute_long_period_averages (pshort_hist, &plong_hist[index], prev_short_head, p_starttime, p_endtime); update_operations_summary (&plong_hist[index], &summary[index]); prev_short_head = &(pshort_hist->head->prev); index += 1; if (index >= wsize) index = 0; next_timestop = timestops[index]; p_starttime = p_endtime; break; } #endif #if 0 /* Code for testing purpuses only */ timenow = epochtime(); if (timenow - starttime >= 300) { p_endtime = tm; p_endtime.tm_hour = next_timestop; p_endtime.tm_min = 0; p_endtime.tm_sec = 0; compute_long_period_averages (pshort_hist, &plong_hist[0], prev_short_head, p_starttime, p_endtime); print_phist_data (plong_hist[0].head); update_operations_summary (&plong_hist[0], &summary[0]); print_operations_summary (summary, 1); index += 1; if (index >= wsize) index = 0; next_timestop = timestops[index]; starttime = timenow; p_starttime = p_endtime; } #endif /* sleep for frequency seconds */ usleep (frequency * 1000000); if (run_mins == 0) timenow = 0; else timenow = epochtime (); } rc = 0; return 0; }
static int test_a_resrc (resrc_t *resrc, bool rdl) { int found = 0; int rc = 0; int64_t nowtime = epochtime (); JSON o = NULL; JSON req_res = NULL; resrc_reqst_t *resrc_reqst = NULL; resrc_tree_t *deserialized_tree = NULL; resrc_tree_t *found_tree = NULL; resrc_tree_t *resrc_tree = NULL; resrc_tree_t *selected_tree = NULL; resrc_tree = resrc_phys_tree (resrc); ok ((resrc_tree != NULL), "resource tree valid"); if (!resrc_tree) goto ret; if (verbose) { printf ("Listing resource tree\n"); resrc_tree_print (resrc_tree); printf ("End of resource tree\n"); } /* * Build a resource composite to search for. Two variants are * constructed depending on whether the loaded resources came * from the sample RDL file or from the hwloc. The hwloc request * does not span multiple nodes or contain the localid property. */ req_res = Jnew (); if (rdl) { JSON bandwidth = Jnew (); JSON child_core = Jnew (); JSON child_sock = Jnew (); JSON graph_array = Jnew_ar (); JSON ja = Jnew_ar (); JSON jpropo = Jnew (); /* json property object */ JSON memory = Jnew (); JSON power = Jnew (); /* JSON jtago = Jnew (); /\* json tag object *\/ */ /* Jadd_bool (jtago, "maytag", true); */ /* Jadd_bool (jtago, "yourtag", true); */ Jadd_str (memory, "type", "memory"); Jadd_int (memory, "req_qty", 1); Jadd_int (memory, "size", 100); json_object_array_add (ja, memory); Jadd_str (child_core, "type", "core"); Jadd_int (child_core, "req_qty", 6); Jadd_bool (child_core, "exclusive", true); Jadd_int (jpropo, "localid", 1); json_object_object_add (child_core, "properties", jpropo); json_object_array_add (ja, child_core); Jadd_str (child_sock, "type", "socket"); Jadd_int (child_sock, "req_qty", 2); json_object_object_add (child_sock, "req_children", ja); Jadd_str (bandwidth, "type", "bandwidth"); Jadd_int (bandwidth, "size", 100); json_object_array_add (graph_array, bandwidth); Jadd_str (power, "type", "power"); Jadd_int (power, "size", 10); json_object_array_add (graph_array, power); Jadd_str (req_res, "type", "node"); Jadd_int (req_res, "req_qty", 2); Jadd_int64 (req_res, "starttime", nowtime); /* json_object_object_add (req_res, "tags", jtago); */ json_object_object_add (req_res, "req_child", child_sock); json_object_object_add (req_res, "graphs", graph_array); } else { Jadd_str (req_res, "type", "core"); Jadd_int (req_res, "req_qty", 2); Jadd_bool (req_res, "exclusive", true); } resrc_reqst = resrc_reqst_from_json (req_res, NULL); Jput (req_res); ok ((resrc_reqst != NULL), "resource request valid"); if (!resrc_reqst) goto ret; if (verbose) { printf ("Listing resource request tree\n"); resrc_reqst_print (resrc_reqst); printf ("End of resource request tree\n"); } init_time (); found = resrc_tree_search (resrc, resrc_reqst, &found_tree, true); ok (found, "found %d requested resources in %lf", found, ((double)get_time ())/1000000); if (!found) goto ret; if (verbose) { printf ("Listing found tree\n"); resrc_tree_print (found_tree); printf ("End of found tree\n"); } o = Jnew (); init_time (); rc = resrc_tree_serialize (o, found_tree); ok (!rc, "found resource serialization took: %lf", ((double)get_time ())/1000000); if (verbose) { printf ("The found resources serialized: %s\n", Jtostr (o)); } deserialized_tree = resrc_tree_deserialize (o, NULL); if (verbose) { printf ("Listing deserialized tree\n"); resrc_tree_print (deserialized_tree); printf ("End of deserialized tree\n"); } Jput (o); init_time (); /* * Exercise time-based allocations for the rdl case and * now-based allocations for the hwloc case */ selected_tree = test_select_resources (found_tree, NULL, 1); if (rdl) rc = resrc_tree_allocate (selected_tree, 1, nowtime, nowtime + 3600); else rc = resrc_tree_allocate (selected_tree, 1, 0, 0); ok (!rc, "successfully allocated resources for job 1"); resrc_tree_destroy (selected_tree, false); resrc_tree_unstage_resources (found_tree); selected_tree = test_select_resources (found_tree, NULL, 2); if (rdl) rc = resrc_tree_allocate (selected_tree, 2, nowtime, nowtime + 3600); else rc = resrc_tree_allocate (selected_tree, 2, 0, 0); ok (!rc, "successfully allocated resources for job 2"); resrc_tree_destroy (selected_tree, false); resrc_tree_unstage_resources (found_tree); selected_tree = test_select_resources (found_tree, NULL, 3); if (rdl) rc = resrc_tree_allocate (selected_tree, 3, nowtime, nowtime + 3600); else rc = resrc_tree_allocate (selected_tree, 3, 0, 0); ok (!rc, "successfully allocated resources for job 3"); resrc_tree_destroy (selected_tree, false); resrc_tree_unstage_resources (found_tree); selected_tree = test_select_resources (found_tree, NULL, 4); if (rdl) rc = resrc_tree_reserve (selected_tree, 4, nowtime, nowtime + 3600); else rc = resrc_tree_reserve (selected_tree, 4, 0, 0); ok (!rc, "successfully reserved resources for job 4"); resrc_tree_destroy (selected_tree, false); resrc_tree_unstage_resources (found_tree); printf (" allocate and reserve took: %lf\n", ((double)get_time ())/1000000); if (verbose) { printf ("Allocated and reserved resources\n"); resrc_tree_print (resrc_tree); } init_time (); rc = resrc_tree_release (found_tree, 1); ok (!rc, "resource release of job 1 took: %lf", ((double)get_time ())/1000000); if (verbose) { printf ("Same resources without job 1\n"); resrc_tree_print (resrc_tree); } init_time (); resrc_reqst_destroy (resrc_reqst); resrc_tree_destroy (deserialized_tree, true); resrc_tree_destroy (found_tree, false); printf (" destroy took: %lf\n", ((double)get_time ())/1000000); ret: return rc; }
int monitor_machine (machine_t *machine) { int rc = -1; char *url; /* create the machine url and init chunk */ asprintf (&url, "%s%s", machine_detail_base_url, machine->uuid); chunk_t chunk; chunk.data = malloc (1); chunk.size = 0; /* fetch the new machine data */ rc = fetch_curl (url, &chunk); if ((rc < 0) || (chunk.size == 0)) { printf ("fetching machine detail for machine %s failed\n", machine->uuid); return rc; } /* fetch current and current alert */ json_object *jdetail = json_tokener_parse (chunk.data); json_object *tmp = NULL; json_object_object_get_ex (jdetail, "current", &tmp); if (tmp == NULL) { printf ("ERROR: Could not get current for machine %s\n", machine->uuid); return rc; } machine->current_cur = json_object_get_double (tmp); json_object_object_get_ex (jdetail, "current_alert", &tmp); if (tmp == NULL) { printf ("ERROR: Could not get current_alert for machine %s\n", machine->uuid); } machine->current_threshold = json_object_get_double (tmp); //printf ("machine = %s, current = %f, current_alert = %f\n", machine->uuid, machine->current_cur, machine->current_threshold); /* Implementation with timestamp for each window entry */ /* send alert if current is greater than threshold */ int64_t timenow = epochtime (); if (machine->current_cur > machine->current_threshold) { int i = 0; double sum = 0, avg = 0; int count = 0; int head_dup = machine->head - 1; if (head_dup < 0) head_dup = window_size - 1; while ((machine->current_avgwindow[head_dup].timestamp != 0) && (machine->current_avgwindow[head_dup].timestamp > timenow - seconds_history) && (head_dup != machine->head)) { sum += machine->current_avgwindow[head_dup].current; count++; head_dup -= 1; if (head_dup < 0) head_dup = window_size - 1; } if (count > 0) avg = sum / count; else avg = machine->current_avgwindow[head_dup].current; send_alert (machine, avg); } /* update the average window */ machine->current_avgwindow[machine->head].current = machine->current_cur; machine->current_avgwindow[machine->head].timestamp = timenow; machine->head = (machine->head == window_size - 1) ? 0 : machine->head + 1; /* update the period window */ if (machine->phead == pwindow_size) { printf ("ERROR: phead on window_size. buffer needs clear up\n"); return rc; } machine->current_periodwindow[machine->phead].current = machine->current_cur; machine->phead++; /* free memory */ json_object_put (jdetail); free (chunk.data); rc = 0; return rc; }