Exemple #1
0
/* monitor()
 * The principal function that monitors the machines 
 */
int monitor (machine_t machines[], sensor_t *sensor, int run_mins, mmdat_t *pshort_hist, mmdat_t *plong_hist, int *timestops, int wsize, opsum_t *summary) 
{
    int rc = -1; 
    int i = 0;  

    int64_t timenow = epochtime ();
    int64_t starttime = timenow;
    int64_t endtime = timenow + (run_mins * 60);
    
    int index, tmpindex;
    struct tm prev_tm = {0}, tm = {0}, p_starttime = {0}, p_endtime = {0};
    int next_timestop, prev_timestop;
    
    llist_t **prev_short_head = &(pshort_hist->head);
    
    if (run_mins == 0) 
        timenow = 0;

    /* Initial step to basically initialize time */
    rc = get_sensor_readings (sensor, &prev_tm);
    if (rc < 0) {
        printf ("ERROR: Retrieving sensor readings failed\n");
        return rc;
    }
    
    rc = find_next_long_timestop (timestops, wsize, prev_tm.tm_hour, &next_timestop, &prev_timestop, &index);
    if (rc < 0) {
        printf ("ERROR: Could not find the next timestop\n");
        return -1;
    }
    printf ("Current time = %d, next_timestop = %d\n", prev_tm.tm_hour, next_timestop);

    p_starttime = prev_tm;
    p_starttime.tm_hour = prev_timestop;
    p_starttime.tm_min = 0;
    p_starttime.tm_sec = 0;
   
 
    while (timenow < endtime) {
    
        printf ("Starting new iteration\n");
        /* Retrieve environmental data and time */
        rc = get_sensor_readings (sensor, &tm);
        if (rc < 0) {
            printf ("ERROR: Retrieving sensor readings failed\n");
            return rc;
        }
        
        /* monitor/operate on each machine */
        for (i = 0; i < 243; i++) {         
            rc = monitor_machine (&machines[i]);
            if (rc < 0) {
                printf ("ERROR: operations on machine %s failed\n", machines[i].uuid);
                return rc;
            }
        }

        /* Short update */
        if (short_period_over (tm, prev_tm)) {
            compute_short_period_averages (machines, sensor, pshort_hist, prev_tm, tm);    
            prev_tm = tm;
            print_phist_data (pshort_hist->head);
        }


        /* Long update */
#if 1
        if (tm.tm_hour == next_timestop) {
            p_endtime = tm;
            p_endtime.tm_min = 0;
            p_endtime.tm_sec = 0;
            compute_long_period_averages (pshort_hist, &plong_hist[index], prev_short_head, p_starttime, p_endtime);
            update_operations_summary (&plong_hist[index], &summary[index]);
            prev_short_head = &(pshort_hist->head->prev);
            index += 1;
            if (index >= wsize)
                index = 0; 
            next_timestop = timestops[index];
            p_starttime = p_endtime;
            break;
        }
#endif

#if 0 
        /* Code for testing purpuses only */
        timenow = epochtime();
        if (timenow - starttime >= 300) {
            p_endtime = tm;
            p_endtime.tm_hour = next_timestop;
            p_endtime.tm_min = 0;
            p_endtime.tm_sec = 0;
            compute_long_period_averages (pshort_hist, &plong_hist[0], prev_short_head, p_starttime, p_endtime);
            print_phist_data (plong_hist[0].head);
            update_operations_summary (&plong_hist[0], &summary[0]);
            print_operations_summary (summary, 1);
            index += 1;
            if (index >= wsize)
                index = 0;
            next_timestop = timestops[index];
            starttime = timenow;
            p_starttime = p_endtime; 
        }
#endif
 
        /* sleep for frequency seconds */
        usleep (frequency * 1000000);

        if (run_mins == 0)
            timenow = 0;
        else
            timenow = epochtime ();
   } 

    rc = 0;
    return 0;
}
Exemple #2
0
static int test_a_resrc (resrc_t *resrc, bool rdl)
{
    int found = 0;
    int rc = 0;
    int64_t nowtime = epochtime ();
    JSON o = NULL;
    JSON req_res = NULL;
    resrc_reqst_t *resrc_reqst = NULL;
    resrc_tree_t *deserialized_tree = NULL;
    resrc_tree_t *found_tree = NULL;
    resrc_tree_t *resrc_tree = NULL;
    resrc_tree_t *selected_tree = NULL;

    resrc_tree = resrc_phys_tree (resrc);
    ok ((resrc_tree != NULL), "resource tree valid");
    if (!resrc_tree)
        goto ret;

    if (verbose) {
        printf ("Listing resource tree\n");
        resrc_tree_print (resrc_tree);
        printf ("End of resource tree\n");
    }

    /*
     *  Build a resource composite to search for.  Two variants are
     *  constructed depending on whether the loaded resources came
     *  from the sample RDL file or from the hwloc.  The hwloc request
     *  does not span multiple nodes or contain the localid property.
     */
    req_res = Jnew ();

    if (rdl) {
        JSON bandwidth = Jnew ();
        JSON child_core = Jnew ();
        JSON child_sock = Jnew ();
        JSON graph_array = Jnew_ar ();
        JSON ja = Jnew_ar ();
        JSON jpropo = Jnew (); /* json property object */
        JSON memory = Jnew ();
        JSON power = Jnew ();

        /* JSON jtago = Jnew ();  /\* json tag object *\/ */
        /* Jadd_bool (jtago, "maytag", true); */
        /* Jadd_bool (jtago, "yourtag", true); */

        Jadd_str (memory, "type", "memory");
        Jadd_int (memory, "req_qty", 1);
        Jadd_int (memory, "size", 100);
        json_object_array_add (ja, memory);

        Jadd_str (child_core, "type", "core");
        Jadd_int (child_core, "req_qty", 6);
        Jadd_bool (child_core, "exclusive", true);
        Jadd_int (jpropo, "localid", 1);
        json_object_object_add (child_core, "properties", jpropo);
        json_object_array_add (ja, child_core);

        Jadd_str (child_sock, "type", "socket");
        Jadd_int (child_sock, "req_qty", 2);
        json_object_object_add (child_sock, "req_children", ja);

        Jadd_str (bandwidth, "type", "bandwidth");
        Jadd_int (bandwidth, "size", 100);
        json_object_array_add (graph_array, bandwidth);

        Jadd_str (power, "type", "power");
        Jadd_int (power, "size", 10);
        json_object_array_add (graph_array, power);

        Jadd_str (req_res, "type", "node");
        Jadd_int (req_res, "req_qty", 2);
        Jadd_int64 (req_res, "starttime", nowtime);
        /* json_object_object_add (req_res, "tags", jtago); */
        json_object_object_add (req_res, "req_child", child_sock);
        json_object_object_add (req_res, "graphs", graph_array);
    } else {
        Jadd_str (req_res, "type", "core");
        Jadd_int (req_res, "req_qty", 2);
        Jadd_bool (req_res, "exclusive", true);
    }

    resrc_reqst = resrc_reqst_from_json (req_res, NULL);
    Jput (req_res);
    ok ((resrc_reqst != NULL), "resource request valid");
    if (!resrc_reqst)
        goto ret;

    if (verbose) {
        printf ("Listing resource request tree\n");
        resrc_reqst_print (resrc_reqst);
        printf ("End of resource request tree\n");
    }

    init_time ();
    found = resrc_tree_search (resrc, resrc_reqst, &found_tree, true);

    ok (found, "found %d requested resources in %lf", found,
        ((double)get_time ())/1000000);
    if (!found)
        goto ret;

    if (verbose) {
        printf ("Listing found tree\n");
        resrc_tree_print (found_tree);
        printf ("End of found tree\n");
    }

    o = Jnew ();
    init_time ();
    rc = resrc_tree_serialize (o, found_tree);
    ok (!rc, "found resource serialization took: %lf",
        ((double)get_time ())/1000000);

    if (verbose) {
        printf ("The found resources serialized: %s\n", Jtostr (o));
    }

    deserialized_tree = resrc_tree_deserialize (o, NULL);
    if (verbose) {
        printf ("Listing deserialized tree\n");
        resrc_tree_print (deserialized_tree);
        printf ("End of deserialized tree\n");
    }
    Jput (o);

    init_time ();

    /*
     * Exercise time-based allocations for the rdl case and
     * now-based allocations for the hwloc case
     */
    selected_tree = test_select_resources (found_tree, NULL, 1);
    if (rdl)
        rc = resrc_tree_allocate (selected_tree, 1, nowtime, nowtime + 3600);
    else
        rc = resrc_tree_allocate (selected_tree, 1, 0, 0);
    ok (!rc, "successfully allocated resources for job 1");
    resrc_tree_destroy (selected_tree, false);
    resrc_tree_unstage_resources (found_tree);

    selected_tree = test_select_resources (found_tree, NULL, 2);
    if (rdl)
        rc = resrc_tree_allocate (selected_tree, 2, nowtime, nowtime + 3600);
    else
        rc = resrc_tree_allocate (selected_tree, 2, 0, 0);
    ok (!rc, "successfully allocated resources for job 2");
    resrc_tree_destroy (selected_tree, false);
    resrc_tree_unstage_resources (found_tree);

    selected_tree = test_select_resources (found_tree, NULL, 3);
    if (rdl)
        rc = resrc_tree_allocate (selected_tree, 3, nowtime, nowtime + 3600);
    else
        rc = resrc_tree_allocate (selected_tree, 3, 0, 0);
    ok (!rc, "successfully allocated resources for job 3");
    resrc_tree_destroy (selected_tree, false);
    resrc_tree_unstage_resources (found_tree);

    selected_tree = test_select_resources (found_tree, NULL, 4);
    if (rdl)
        rc = resrc_tree_reserve (selected_tree, 4, nowtime, nowtime + 3600);
    else
        rc = resrc_tree_reserve (selected_tree, 4, 0, 0);
    ok (!rc, "successfully reserved resources for job 4");
    resrc_tree_destroy (selected_tree, false);
    resrc_tree_unstage_resources (found_tree);

    printf ("        allocate and reserve took: %lf\n",
            ((double)get_time ())/1000000);

    if (verbose) {
        printf ("Allocated and reserved resources\n");
        resrc_tree_print (resrc_tree);
    }

    init_time ();
    rc = resrc_tree_release (found_tree, 1);
    ok (!rc, "resource release of job 1 took: %lf",
        ((double)get_time ())/1000000);

    if (verbose) {
        printf ("Same resources without job 1\n");
        resrc_tree_print (resrc_tree);
    }

    init_time ();
    resrc_reqst_destroy (resrc_reqst);
    resrc_tree_destroy (deserialized_tree, true);
    resrc_tree_destroy (found_tree, false);
    printf ("        destroy took: %lf\n", ((double)get_time ())/1000000);
ret:
    return rc;
}
Exemple #3
0
int monitor_machine (machine_t *machine)
{
    int rc = -1;
    char *url;

    /* create the machine url and init chunk */
    asprintf (&url, "%s%s", machine_detail_base_url, machine->uuid);
    chunk_t chunk;
    chunk.data = malloc (1);
    chunk.size = 0;

    /* fetch the new machine data */
    rc = fetch_curl (url, &chunk);
    if ((rc < 0) || (chunk.size == 0)) {
        printf ("fetching machine detail for machine %s failed\n", machine->uuid);
        return rc;
    }   

    /* fetch current and current alert */
    json_object *jdetail = json_tokener_parse (chunk.data);
    json_object *tmp = NULL;
    json_object_object_get_ex (jdetail, "current", &tmp);
    if (tmp == NULL) {
        printf ("ERROR: Could not get current for machine %s\n", machine->uuid);
        return rc;
    }
    machine->current_cur = json_object_get_double (tmp);
    json_object_object_get_ex (jdetail, "current_alert", &tmp);
    if (tmp == NULL) {
        printf ("ERROR: Could not get current_alert for machine %s\n", machine->uuid);
    }
    machine->current_threshold = json_object_get_double (tmp);
    //printf ("machine = %s, current = %f, current_alert = %f\n", machine->uuid, machine->current_cur, machine->current_threshold);

    /* Implementation with timestamp for each window entry */
    /* send alert if current is greater than threshold */
    int64_t timenow = epochtime ();
    if (machine->current_cur > machine->current_threshold) {
        int i = 0;
        double sum = 0, avg = 0;
        int count = 0;
        int head_dup = machine->head - 1;
        if (head_dup < 0) 
            head_dup = window_size - 1;
        
        while ((machine->current_avgwindow[head_dup].timestamp != 0) && (machine->current_avgwindow[head_dup].timestamp > timenow - seconds_history) && (head_dup != machine->head)) {
            sum += machine->current_avgwindow[head_dup].current;
            count++;
            head_dup -= 1;
            if (head_dup < 0) 
                head_dup = window_size - 1;
        }

        if (count > 0)
            avg = sum / count;
        else 
            avg = machine->current_avgwindow[head_dup].current;

        send_alert (machine, avg);
    }
    
    /* update the average window */
    machine->current_avgwindow[machine->head].current = machine->current_cur;
    machine->current_avgwindow[machine->head].timestamp = timenow;
    machine->head = (machine->head == window_size - 1) ? 0 : machine->head + 1; 

    /* update the period window */
    if (machine->phead == pwindow_size) {
        printf ("ERROR: phead on window_size. buffer needs clear up\n");
        return rc;
    }
    machine->current_periodwindow[machine->phead].current = machine->current_cur;
    machine->phead++;

    /* free memory */
    json_object_put (jdetail);
    free (chunk.data);
    
    rc = 0;
    return rc;
}