/* Function that calculates the three different footprint values and * stores the largest as the key footprint of the node. */ void dag_node_footprint_determine_children(struct dag_node *n) { struct dag_node *c; if(!n->footprint) n->footprint = dag_node_footprint_create(); /* Have un-updated children calculate their direct children. */ set_first_element(n->descendants); while((c = set_next_element(n->descendants))){ if(!(c->footprint && c->footprint->children_updated)){ dag_node_footprint_determine_children(c); } set_insert_set(n->footprint->accounted, c->footprint->accounted); } set_first_element(n->descendants); while((c = set_next_element(n->descendants))){ if(!set_lookup(n->footprint->accounted, c)){ set_insert(n->footprint->direct_children, c); set_insert(n->footprint->accounted, c); } } n->footprint->children_updated = 1; }
/* This finds the intersect of all of the children lists. This intersect forms the basis for the parents residual nodes as all sub-branches will culminate in the listed nodes. */ void dag_node_footprint_determine_desc_residual_intersect(struct dag_node *n) { struct dag_node *node1, *node2; int comp = 1; int index = 0; while(comp){ index++; node1 = set_next_element(n->footprint->direct_children); // Get first child node1 = list_peek_current(node1->footprint->residual_nodes); // Grab next node in its list while((node2 = set_next_element(n->footprint->direct_children))){ // Loop over remaining children node2 = list_peek_current(node2->footprint->residual_nodes); /* We mark when the nodes are no longer comparable, but do not break as we need all of the lists to be in the first non-shared location for future use. */ if(!node1 || !node2 || (node1 != node2)) comp = 0; } set_first_element(n->footprint->direct_children); /* Only add the node if it occurred in all of the branch lists. */ if(comp){ list_push_tail(n->footprint->residual_nodes, node1); //res_node = node1; /* Advance all direct_children forward one residual. */ while((node1 = set_next_element(n->footprint->direct_children))){ list_next_item(node1->footprint->residual_nodes); } set_first_element(n->footprint->direct_children); } } }
void dag_node_footprint_prepare_node_terminal_files(struct dag_node *n) { struct dag_file *f; list_first_item(n->target_files); while((f = list_next_item(n->target_files))){ if(f->type == DAG_FILE_TYPE_OUTPUT){ set_push(n->footprint->terminal_files, f); } set_push(n->footprint->coexist_files, f); } struct dag_node *node1; set_first_element(n->ancestors); while((node1 = set_next_element(n->ancestors))){ set_insert_set(n->footprint->terminal_files, node1->footprint->terminal_files); set_first_element(node1->footprint->coexist_files); while((f = set_next_element(node1->footprint->coexist_files))){ if(dag_file_coexist_files(n->footprint->accounted, f)) set_push(n->footprint->coexist_files, f); } } set_first_element(n->descendants); while((node1 = set_next_element(n->descendants))){ node1->footprint->terminal_updated--; if(node1->footprint->terminal_updated <= 0) dag_node_footprint_prepare_node_terminal_files(node1); } }
void dag_node_footprint_set_desc_res_wgt_diff(struct dag_node *n) { struct dag_node *node1, *node2; set_first_element(n->footprint->direct_children); while((node1 = set_next_element(n->footprint->direct_children))){ node2 = list_peek_current(node1->footprint->residual_nodes); /* Add the last residual's residual and terminal files in the branch to the current residual files */ set_insert_set(n->footprint->residual_files, node2->footprint->residual_files); set_insert_set(n->footprint->residual_files, node2->footprint->terminal_files); /* Add the last residual's residual and terminal files in the branch to the branch's first node residual files */ set_insert_set(node1->footprint->res_files, node2->footprint->residual_files); set_insert_set(node1->footprint->res_files, node2->footprint->terminal_files); /* Set branch head's res size */ node1->footprint->res = dag_file_set_size(node1->footprint->res_files); set_insert_set(node1->footprint->wgt_files, node2->footprint->footprint_min_files); node1->footprint->wgt = node2->footprint->footprint_min_size; set_insert_set(node1->footprint->max_wgt_files, node2->footprint->footprint_max_files); node1->footprint->max_wgt = node2->footprint->footprint_max_size; list_next_item(node1->footprint->residual_nodes); while((node2 = list_peek_current(node1->footprint->residual_nodes))){ if(node2->footprint->footprint_min_size >= node1->footprint->wgt){ set_delete(node1->footprint->wgt_files); node1->footprint->wgt_files = set_duplicate(node2->footprint->footprint_min_files); node1->footprint->wgt = node2->footprint->footprint_min_size; } if(node2->footprint->footprint_max_size >= node1->footprint->max_wgt){ set_delete(node1->footprint->max_wgt_files); node1->footprint->max_wgt_files = set_duplicate(node2->footprint->footprint_max_files); node1->footprint->max_wgt = node2->footprint->footprint_max_size; } list_next_item(node1->footprint->residual_nodes); } } n->footprint->residual_size = dag_file_set_size(n->footprint->residual_files); set_first_element(n->footprint->direct_children); while((node1 = set_next_element(n->footprint->direct_children))){ node1->footprint->diff = node1->footprint->wgt - node1->footprint->res; } }
void dag_node_print_file_set(struct set *s, FILE *out, char *t) { if(!s){ fprintf(out, "\\{\\}%s", t); return; } set_first_element(s); struct dag_file *f; if(set_size(s) == 0){ fprintf(out, "\\{\\}%s", t); } else { fprintf(out, "\\{"); const char *files[set_size(s)]; int index = 0; while((f = set_next_element(s))){ files[index] = f->filename; index++; } qsort(files, index, sizeof(char *), cstring_cmp); for(int i = 0; i < index; i++){ fprintf(out, "%s", files[i]); } fprintf(out, "\\}%s", t); } }
struct catalog_query *catalog_query_create(const char *hosts, struct jx *filter_expr, time_t stoptime) { struct catalog_query *q = NULL; char *n; struct catalog_host *h; struct list *sorted_hosts = catalog_query_sort_hostlist(hosts); int backoff_interval = 1; list_first_item(sorted_hosts); while(time(NULL) < stoptime) { if(!(h = list_next_item(sorted_hosts))) { list_first_item(sorted_hosts); sleep(backoff_interval); int max_backoff_interval = MAX(0, stoptime - time(NULL)); backoff_interval = MIN(backoff_interval * 2, max_backoff_interval); continue; } struct jx *j = catalog_query_send_query(h->url, time(NULL) + 5); if(j) { q = xxmalloc(sizeof(*q)); q->data = j; q->current = j->u.items; q->filter_expr = filter_expr; if(h->down) { debug(D_DEBUG,"catalog server at %s is back up", h->host); set_first_element(down_hosts); while((n = set_next_element(down_hosts))) { if(!strcmp(n, h->host)) { free(n); set_remove(down_hosts, n); break; } } } break; } else { if(!h->down) { debug(D_DEBUG,"catalog server at %s seems to be down", h->host); set_insert(down_hosts, xxstrdup(h->host)); } } } list_first_item(sorted_hosts); while((h = list_next_item(sorted_hosts))) { free(h->host); free(h->url); free(h); } list_delete(sorted_hosts); return q; }
void makeflow_gc_prepare( struct dag *d ) { /* Files to be collected: * ((all_files \minus sink_files)) \union collect_list) \minus preserve_list) \minus source_files */ /* Parse GC_*_LIST and record which target files should be * garbage collected. */ char *collect_list = dag_variable_lookup_global_string("GC_COLLECT_LIST", d); char *preserve_list = dag_variable_lookup_global_string("GC_PRESERVE_LIST", d); struct dag_file *f; char *filename; /* add all files, but sink_files */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(!dag_file_is_sink(f)) { set_insert(d->collect_table, f); } int i, argc; char **argv; /* add collect_list, for sink_files that should be removed */ string_split_quotes(collect_list, &argc, &argv); for(i = 0; i < argc; i++) { f = dag_file_lookup_or_create(d, argv[i]); set_insert(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Added %s to garbage collection list", f->filename); } free(argv); /* remove files from preserve_list */ string_split_quotes(preserve_list, &argc, &argv); for(i = 0; i < argc; i++) { /* Must initialize to non-zero for hash_table functions to work properly. */ f = dag_file_lookup_or_create(d, argv[i]); set_remove(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Removed %s from garbage collection list", f->filename); } free(argv); /* remove source_files from collect_table */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(dag_file_is_source(f)) { set_remove(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Removed %s from garbage collection list", f->filename); } /* Print reference counts of files to be collected */ set_first_element(d->collect_table); while((f = set_next_element(d->collect_table))) debug(D_MAKEFLOW_RUN, "Added %s to garbage collection list (%d)", f->filename, f->ref_count); }
void dag_node_print_node_set(struct set *s, FILE *out, char *t) { if(!s){ fprintf(out, "\\{\\}%s", t); return; } set_first_element(s); struct dag_node *n; if(set_size(s) == 0){ fprintf(out, "\\{\\}%s", t); } else { n = set_next_element(s); fprintf(out, "\\{%d", n->nodeid); while((n = set_next_element(s))){ fprintf(out, ",%d", n->nodeid); } fprintf(out, "\\}%s", t); } }
int set_insert_set(struct set *s, struct set *s2) { set_first_element(s2); int additions = 0; const void *element; while((element = set_next_element(s2))){ additions += set_insert(s, element); } return additions; }
/* After a node has been completed mark that it and its * children are in need of being updated. */ void dag_node_footprint_reset(struct dag_node *n) { struct dag_node *n1; set_first_element(n->footprint->direct_children); while((n1 = set_next_element(n->footprint->direct_children))){ if(n1->footprint->footprint_updated || !n->footprint->terminal_updated) dag_node_footprint_reset(n1); } n->footprint->size_updated = 0; n->footprint->footprint_updated = 0; n->footprint->terminal_updated = set_size(n->ancestors); }
struct set *set_duplicate(struct set *s) { struct set *s2; s2 = set_create(0); set_first_element(s); const void *element; while((element = set_next_element(s))) set_insert(s2, element); return s2; }
struct set *set_union(struct set *s1, struct set *s2) { struct set *s = set_duplicate(s1); set_first_element(s2); const void *element; while((element = set_next_element(s2))) set_insert(s, element); return s; }
void *set_pop(struct set *s) { if( set_size(s) < 1 ) return 0; void *element; set_first_element(s); element = set_next_element(s); if(!set_remove(s, element)) return 0; else return element; }
void dag_close_over_environment(struct dag *d) { //for each exported and special variable, if the variable does not have a //value assigned yet, we look for its value in the running environment char *name; struct dag_variable_value *v; set_first_element(d->special_vars); while((name = set_next_element(d->special_vars))) { v = dag_get_variable_value(name, d->variables, d->nodeid_counter); if(!v) { char *value_env = getenv(name); if(value_env) { dag_variable_add_value(name, d->variables, 0, value_env); } } } set_first_element(d->export_vars); while((name = set_next_element(d->export_vars))) { v = dag_get_variable_value(name, d->variables, d->nodeid_counter); if(!v) { char *value_env = getenv(name); if(value_env) { dag_variable_add_value(name, d->variables, 0, value_env); } } } }
int dag_node_footprint_dependencies_active(struct dag_node *n) { return 1; if(!n->footprint->dependencies) return 1; struct dag_node *n1; set_first_element(n->footprint->dependencies); while((n1 = set_next_element(n->footprint->dependencies))){ if(!(n1->state == DAG_NODE_STATE_RUNNING || n1->state == DAG_NODE_STATE_COMPLETE)){ return 0; } } return 1; }
struct nvpair * dag_node_env_create( struct dag *d, struct dag_node *n ) { struct dag_variable_lookup_set s = { d, n->category, n, NULL }; char *key; struct nvpair *nv = 0; set_first_element(d->export_vars); while((key = set_next_element(d->export_vars))) { char *value = dag_variable_lookup_string(key, &s); if(value) { if(!nv) nv = nvpair_create(); nvpair_insert_string(nv,key,value); debug(D_MAKEFLOW_RUN, "export %s=%s", key, value); } } return nv; }
struct list *catalog_query_sort_hostlist(const char *hosts) { const char *next_host; char *n; struct catalog_host *h; struct list *previously_up = list_create(); struct list *previously_down = list_create(); if(string_null_or_empty(hosts)) { next_host = CATALOG_HOST; } else { next_host = hosts; } if(!down_hosts) { down_hosts = set_create(0); } do { int port; char host[DOMAIN_NAME_MAX]; h = xxmalloc(sizeof(*h)); next_host = parse_hostlist(next_host, host, &port); h->host = xxstrdup(host); h->url = string_format("http://%s:%d/query.json", host, port); h->down = 0; set_first_element(down_hosts); while((n = set_next_element(down_hosts))) { if(!strcmp(n, host)) { h->down = 1; } } if(h->down) { list_push_tail(previously_down, h); } else { list_push_tail(previously_up, h); } } while (next_host); return list_splice(previously_up, previously_down); }
void dag_node_footprint_prepare_node_size(struct dag_node *n) { struct dag_node *s; /* Determine source size based on either the actual inputs or the estimated size of the inputs and store in source_size */ n->footprint->source_size = dag_file_list_size(n->source_files); /* Determine target size based on either the actual outputs or the estimated size of the outputs and store in target_size */ n->footprint->target_size = dag_file_list_size(n->target_files); /* Recursively updated children if they have not yet been updated */ set_first_element(n->footprint->direct_children); while((s = set_next_element(n->footprint->direct_children)) && !s->footprint->size_updated) dag_node_footprint_prepare_node_size(s); /* Mark this node as having been updated for size */ n->footprint->size_updated = 1; }
static int set_double_buckets(struct set *s) { struct set *sn = set_create(2 * s->bucket_count); if(!sn) return 0; /* Move elements to new set */ void *element; set_first_element(s); while( (element = set_next_element(s)) ) if(!set_insert(sn, element)) { set_delete(sn); return 0; } /* Delete all elements */ struct entry *e, *f; int i; for(i = 0; i < s->bucket_count; i++) { e = s->buckets[i]; while(e) { f = e->next; free(e); e = f; } } /* Make the old point to the new */ free(s->buckets); s->buckets = sn->buckets; s->bucket_count = sn->bucket_count; s->size = sn->size; /* Delete reference to new, so old is safe */ free(sn); return 1; }
struct jx * dag_node_env_create( struct dag *d, struct dag_node *n ) { struct dag_variable_lookup_set s = { d, n->category, n, NULL }; char *key; struct jx *object = jx_object(0); char *num_cores = dag_variable_lookup_string(RESOURCES_CORES, &s); char *num_omp_threads = dag_variable_lookup_string("OMP_NUM_THREADS", &s); if (num_cores && !num_omp_threads) { // if number of cores is set, number of omp threads is not set, // then we set number of omp threads to number of cores jx_insert(object, jx_string("OMP_NUM_THREADS"), jx_string(num_cores)); } else if (num_omp_threads) { // if number of omp threads is set, then we set number of cores // to the number of omp threads jx_insert(object, jx_string(RESOURCES_CORES), jx_string(num_omp_threads)); } else { // if both number of cores and omp threads are not set, we // set them to 1 jx_insert(object, jx_string("OMP_NUM_THREADS"), jx_string("1")); jx_insert(object, jx_string(RESOURCES_CORES), jx_string("1")); } set_first_element(d->export_vars); while((key = set_next_element(d->export_vars))) { char *value = dag_variable_lookup_string(key, &s); if(value) { jx_insert(object,jx_string(key),jx_string(value)); debug(D_MAKEFLOW_RUN, "export %s=%s", key, value); } } free(num_cores); free(num_omp_threads); return object; }
static void makeflow_gc_all( struct dag *d, int maxfiles ) { int collected = 0; struct dag_file *f; timestamp_t start_time, stop_time; /* This will walk the table of files to collect and will remove any * that are below or equal to the threshold. */ start_time = timestamp_get(); set_first_element(d->collect_table); while((f = set_next_element(d->collect_table)) && collected < maxfiles) { if(f->ref_count < 1 && makeflow_gc_file(d, f)) collected++; } stop_time = timestamp_get(); /* Record total amount of files collected to Makeflowlog. */ if(collected > 0) { makeflow_gc_collected += collected; makeflow_log_gc_event(d,collected,stop_time-start_time,makeflow_gc_collected); } }
int get_ancestor_depth(struct dag_node *n) { int group_number = -1; struct dag_node *ancestor = NULL; debug(D_DEBUG, "n->ancestor_depth: %d", n->ancestor_depth); if(n->ancestor_depth >= 0) { return n->ancestor_depth; } set_first_element(n->ancestors); while((ancestor = set_next_element(n->ancestors))) { group_number = get_ancestor_depth(ancestor); debug(D_DEBUG, "group: %d, n->ancestor_depth: %d", group_number, n->ancestor_depth); if(group_number > n->ancestor_depth) { n->ancestor_depth = group_number; } } n->ancestor_depth++; return n->ancestor_depth; }
/* Function that calculates the three different footprint values and * stores the largest as the key footprint of the node. */ void dag_node_footprint_measure(struct dag_node *n) { struct dag_node *c; dag_node_footprint_determine_run_footprint(n); /* Have un-updated children calculate their current footprint. */ set_first_element(n->footprint->direct_children); while((c = set_next_element(n->footprint->direct_children))){ if(!c->footprint->footprint_updated) dag_node_footprint_measure(c); } dag_node_footprint_determine_descendant(n); /* Finds the max of all three different weights. */ dag_node_footprint_min(n); /* Finds the max of all three different weights. */ dag_node_footprint_max(n); /* Mark node as having been updated. */ n->footprint->footprint_updated = 1; }
/* The descendant footprint of a node is defined as a balance between * the widest point of the children branches, while still maintaining * the existance of the sibling branches. The assumption is that by * knowing the larget size needed, all other branches can be executed * within that designated size, so we only need to add the residual * size of a branch to hold onto it while the heavier weights are * computed. */ void dag_node_footprint_determine_descendant(struct dag_node *n) { struct dag_node *node1, *node2; //, *res_node; struct list *tmp_direct_children = list_create(); struct set *footprint = set_create(0); uint64_t footprint_size = 0; /* Create a second list of direct children that allows us to sort on footprint properties. This is used when we compare footprint and the residual nodes. */ set_first_element(n->footprint->direct_children); while((node1 = set_next_element(n->footprint->direct_children))){ list_push_tail(tmp_direct_children, node1); list_first_item(node1->footprint->residual_nodes); } /* There are two cases for descendant nodes: 1. Multiple direct_children indicating that multiple branches will need to be maintained concurrently and we need to account. 2. One descendant indicating we want to continue the chain of residual and footprints that out child holds. create empty lists for this case. */ set_first_element(n->footprint->direct_children); if(set_size(n->footprint->direct_children) > 1){ dag_node_footprint_determine_desc_residual_intersect(n); dag_node_footprint_set_desc_res_wgt_diff(n); set_insert_list(footprint, n->target_files); list_sort(tmp_direct_children, dag_node_footprint_comp_diff); list_first_item(tmp_direct_children); /* Loop over each child giving it the chance to be the largest footprint. */ while((node1 = list_next_item(tmp_direct_children))){ footprint_size = dag_file_set_size(footprint); if((footprint_size + node1->footprint->wgt) > n->footprint->delete_footprint){ set_delete(n->footprint->delete_files); n->footprint->delete_files = set_duplicate(footprint); set_insert_set(n->footprint->delete_files, node1->footprint->wgt_files); n->footprint->delete_footprint = dag_file_set_size(n->footprint->delete_files); } // This is where we would remove an input file if it wasn't needed for other branches set_insert_set(footprint, node1->footprint->res_files); list_push_tail(n->footprint->delete_run_order, node1); } list_sort(tmp_direct_children, dag_node_footprint_comp_wgt_rev); list_first_item(tmp_direct_children); node1 = list_next_item(tmp_direct_children); set_insert_set(n->footprint->prog_max_files, node1->footprint->max_wgt_files); set_insert_set(n->footprint->prog_min_files, node1->footprint->wgt_files); list_push_tail(n->footprint->prog_run_order, node1); /* Find what the total space is needed to hold all residuals and the largest footprint branch concurrently. */ while((node2 = list_next_item(tmp_direct_children))){ set_insert_set(n->footprint->prog_max_files, node2->footprint->max_wgt_files); set_insert_set(n->footprint->prog_min_files, node2->footprint->res_files); list_push_tail(n->footprint->prog_run_order, node2); } n->footprint->prog_max_footprint = dag_file_set_size(n->footprint->prog_max_files); n->footprint->prog_min_footprint = dag_file_set_size(n->footprint->prog_min_files); } else { if(set_size(n->footprint->direct_children) == 1){ node1 = set_next_element(n->footprint->direct_children); list_delete(n->footprint->residual_nodes); n->footprint->residual_nodes = list_duplicate(node1->footprint->residual_nodes); } set_insert_list(n->footprint->residual_files, n->target_files); set_insert_set(n->footprint->residual_files, n->footprint->terminal_files); n->footprint->residual_size = dag_file_set_size(n->footprint->residual_files); } /* Adding the current nodes list so parents can quickly access these decisions. */ list_push_tail(n->footprint->residual_nodes, n); list_delete(tmp_direct_children); set_delete(footprint); }