void makeflow_gc_prepare( struct dag *d ) { /* Files to be collected: * ((all_files \minus sink_files)) \union collect_list) \minus preserve_list) \minus source_files */ /* Parse GC_*_LIST and record which target files should be * garbage collected. */ char *collect_list = dag_variable_lookup_global_string("GC_COLLECT_LIST", d); char *preserve_list = dag_variable_lookup_global_string("GC_PRESERVE_LIST", d); struct dag_file *f; char *filename; /* add all files, but sink_files */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(!dag_file_is_sink(f)) { set_insert(d->collect_table, f); } int i, argc; char **argv; /* add collect_list, for sink_files that should be removed */ string_split_quotes(collect_list, &argc, &argv); for(i = 0; i < argc; i++) { f = dag_file_lookup_or_create(d, argv[i]); set_insert(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Added %s to garbage collection list", f->filename); } free(argv); /* remove files from preserve_list */ string_split_quotes(preserve_list, &argc, &argv); for(i = 0; i < argc; i++) { /* Must initialize to non-zero for hash_table functions to work properly. */ f = dag_file_lookup_or_create(d, argv[i]); set_remove(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Removed %s from garbage collection list", f->filename); } free(argv); /* remove source_files from collect_table */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(dag_file_is_source(f)) { set_remove(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Removed %s from garbage collection list", f->filename); } /* Print reference counts of files to be collected */ set_first_element(d->collect_table); while((f = set_next_element(d->collect_table))) debug(D_MAKEFLOW_RUN, "Added %s to garbage collection list (%d)", f->filename, f->ref_count); }
struct list *makeflow_wrapper_generate_files( struct list *result, struct list *input, struct dag_node *n, struct makeflow_wrapper *w) { char *f; char *nodeid = string_format("%d",n->nodeid); struct list *files = list_create(); list_first_item(input); while((f = list_next_item(input))) { char *filename = string_replace_percents(f, nodeid); char *f = xxstrdup(filename); free(filename); char *remote, *p; struct dag_file *file; p = strchr(f, '='); if(p) { *p = 0; file = dag_file_lookup_or_create(n->d, f); if(!n->local_job && !itable_lookup(w->remote_names, (uintptr_t) file)){ remote = xxstrdup(p+1); itable_insert(w->remote_names, (uintptr_t) file, (void *)remote); hash_table_insert(w->remote_names_inv, remote, (void *)file); } *p = '='; } else { file = dag_file_lookup_or_create(n->d, f); } free(f); list_push_tail(files, file); } free(nodeid); result = list_splice(result, files); return result; }
/* Adds the local name to the list of source files of the node, * and adds the node as a dependant of the file. If remotename is * not NULL, it is added to the namespace of the node. */ void dag_node_add_source_file(struct dag_node *n, const char *filename, char *remotename) { struct dag_file *source = dag_file_lookup_or_create(n->d, filename); if(remotename) dag_node_add_remote_name(n, filename, remotename); /* register this file as a source of the node */ list_push_head(n->source_files, source); /* register this file as a requirement of the node */ list_push_head(source->needed_by, n); source->ref_count++; }
/* Adds the local name as a target of the node, and register the * node as the producer of the file. If remotename is not NULL, * it is added to the namespace of the node. */ void dag_node_add_target_file(struct dag_node *n, const char *filename, char *remotename) { struct dag_file *target = dag_file_lookup_or_create(n->d, filename); if(target->target_of && target->target_of != n) fatal("%s is defined multiple times at %s:%d and %s:%d\n", filename, filename, target->target_of->linenum, filename, n->linenum); if(remotename) dag_node_add_remote_name(n, filename, remotename); /* register this file as a target of the node */ list_push_head(n->target_files, target); /* register this node as the creator of the file */ target->target_of = n; }
void makeflow_parse_input_outputs( struct dag *d ) { /* Check if GC_*_LIST is specified and warn user about deprecated usage */ char *collect_list = dag_variable_lookup_global_string("GC_COLLECT_LIST" , d); if(collect_list) debug(D_NOTICE, "GC_COLLECT_LIST is specified: Please refer to manual about MAKEFLOW_INPUTS/OUTPUTS"); char *preserve_list = dag_variable_lookup_global_string("GC_PRESERVE_LIST", d); if(preserve_list) debug(D_NOTICE, "GC_PRESERVE_LIST is specified: Please refer to manual about MAKEFLOW_INPUTS/OUTPUTS"); /* Parse INPUT and OUTPUT lists */ struct dag_file *f; char *filename; int i, argc; char **argv; char *input_list = dag_variable_lookup_global_string("MAKEFLOW_INPUTS" , d); char *output_list = dag_variable_lookup_global_string("MAKEFLOW_OUTPUTS", d); if(input_list) { /* add collect_list, for sink_files that should be removed */ string_split_quotes(input_list, &argc, &argv); for(i = 0; i < argc; i++) { d->completed_files += 1; f = dag_file_lookup_or_create(d, argv[i]); set_insert(d->inputs, f); debug(D_MAKEFLOW_RUN, "Added %s to input list", f->filename); } free(argv); } else { debug(D_NOTICE, "MAKEFLOW_INPUTS is not specified"); } /* add all source files */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(dag_file_is_source(f)) { set_insert(d->inputs, f); debug(D_MAKEFLOW_RUN, "Added %s to input list", f->filename); } if(output_list) { /* remove files from preserve_list */ string_split_quotes(output_list, &argc, &argv); for(i = 0; i < argc; i++) { /* Must initialize to non-zero for hash_table functions to work properly. */ f = dag_file_lookup_or_create(d, argv[i]); set_remove(d->outputs, f); debug(D_MAKEFLOW_RUN, "Added %s to output list", f->filename); } free(argv); } else { debug(D_NOTICE, "MAKEFLOW_OUTPUTS is not specified"); /* add all sink if OUTPUTS not specified */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(dag_file_is_sink(f)) { set_insert(d->outputs, f); debug(D_MAKEFLOW_RUN, "Added %s to output list", f->filename); } } }
/** The clean_mode variable was added so that we could better print out error messages * apply in the situation. Currently only used to silence node rerun checking. */ void makeflow_log_recover(struct dag *d, const char *filename, int verbose_mode, struct batch_queue *queue, makeflow_clean_depth clean_mode) { char *line, *name, file[MAX_BUFFER_SIZE]; int nodeid, state, jobid, file_state; int first_run = 1; struct dag_node *n; struct dag_file *f; struct stat buf; timestamp_t previous_completion_time; d->logfile = fopen(filename, "r"); if(d->logfile) { int linenum = 0; first_run = 0; printf("recovering from log file %s...\n",filename); while((line = get_line(d->logfile))) { linenum++; if(sscanf(line, "# %d %s %" SCNu64 "", &file_state, file, &previous_completion_time) == 3) { f = dag_file_lookup_or_create(d, file); f->state = file_state; if(file_state == DAG_FILE_STATE_EXISTS){ d->completed_files += 1; f->creation_logged = (time_t) (previous_completion_time / 1000000); } else if(file_state == DAG_FILE_STATE_DELETE){ d->deleted_files += 1; } continue; } if(line[0] == '#') continue; if(sscanf(line, "%" SCNu64 " %d %d %d", &previous_completion_time, &nodeid, &state, &jobid) == 4) { n = itable_lookup(d->node_table, nodeid); if(n) { n->state = state; n->jobid = jobid; /* Log timestamp is in microseconds, we need seconds for diff. */ n->previous_completion = (time_t) (previous_completion_time / 1000000); continue; } } fprintf(stderr, "makeflow: %s appears to be corrupted on line %d\n", filename, linenum); exit(1); } fclose(d->logfile); } d->logfile = fopen(filename, "a"); if(!d->logfile) { fprintf(stderr, "makeflow: couldn't open logfile %s: %s\n", filename, strerror(errno)); exit(1); } if(setvbuf(d->logfile, NULL, _IOLBF, BUFSIZ) != 0) { fprintf(stderr, "makeflow: couldn't set line buffer on logfile %s: %s\n", filename, strerror(errno)); exit(1); } if(first_run && verbose_mode) { struct dag_file *f; struct dag_node *p; for(n = d->nodes; n; n = n->next) { /* Record node information to log */ fprintf(d->logfile, "# NODE\t%d\t%s\n", n->nodeid, n->command); /* Record the node category to the log */ fprintf(d->logfile, "# CATEGORY\t%d\t%s\n", n->nodeid, n->category->label); fprintf(d->logfile, "# SYMBOL\t%d\t%s\n", n->nodeid, n->category->label); /* also write the SYMBOL as alias of CATEGORY, deprecated. */ /* Record node parents to log */ fprintf(d->logfile, "# PARENTS\t%d", n->nodeid); list_first_item(n->source_files); while( (f = list_next_item(n->source_files)) ) { p = f->created_by; if(p) fprintf(d->logfile, "\t%d", p->nodeid); } fputc('\n', d->logfile); /* Record node inputs to log */ fprintf(d->logfile, "# SOURCES\t%d", n->nodeid); list_first_item(n->source_files); while( (f = list_next_item(n->source_files)) ) { fprintf(d->logfile, "\t%s", f->filename); } fputc('\n', d->logfile); /* Record node outputs to log */ fprintf(d->logfile, "# TARGETS\t%d", n->nodeid); list_first_item(n->target_files); while( (f = list_next_item(n->target_files)) ) { fprintf(d->logfile, "\t%s", f->filename); } fputc('\n', d->logfile); /* Record translated command to log */ fprintf(d->logfile, "# COMMAND\t%d\t%s\n", n->nodeid, n->command); } } dag_count_states(d); // Check for log consistency if(!first_run) { hash_table_firstkey(d->files); while(hash_table_nextkey(d->files, &name, (void **) &f)) { if(dag_file_should_exist(f) && !dag_file_is_source(f) && !(batch_fs_stat(queue, f->filename, &buf) >= 0)){ fprintf(stderr, "makeflow: %s is reported as existing, but does not exist.\n", f->filename); makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN); continue; } if(S_ISDIR(buf.st_mode)) continue; if(dag_file_should_exist(f) && !dag_file_is_source(f) && difftime(buf.st_mtime, f->creation_logged) > 0) { fprintf(stderr, "makeflow: %s is reported as existing, but has been modified (%" SCNu64 " ,%" SCNu64 ").\n", f->filename, (uint64_t)buf.st_mtime, (uint64_t)f->creation_logged); makeflow_clean_file(d, queue, f, 0); makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN); } } } int silent = 0; if(clean_mode != MAKEFLOW_CLEAN_NONE) silent = 1; // Decide rerun tasks if(!first_run) { struct itable *rerun_table = itable_create(0); for(n = d->nodes; n; n = n->next) { makeflow_node_decide_rerun(rerun_table, d, n, silent); } itable_delete(rerun_table); } //Update file reference counts from nodes in log for(n = d->nodes; n; n = n->next) { if(n->state == DAG_NODE_STATE_COMPLETE) { struct dag_file *f; list_first_item(n->source_files); while((f = list_next_item(n->source_files))) f->ref_count += -1; } } }
void makeflow_wrapper_generate_files( struct batch_task *task, struct list *input, struct list *output, struct dag_node *n, struct makeflow_wrapper *w) { char *f; char *nodeid = string_format("%d",n->nodeid); list_first_item(input); while((f = list_next_item(input))) { char *filename = string_replace_percents(f, nodeid); char *f = xxstrdup(filename); free(filename); char *remote, *p; struct dag_file *file; p = strchr(f, '='); if(p) { *p = 0; file = dag_file_lookup_or_create(n->d, f); if(!n->local_job && !itable_lookup(w->remote_names, (uintptr_t) file)){ remote = xxstrdup(p+1); itable_insert(w->remote_names, (uintptr_t) file, (void *)remote); hash_table_insert(w->remote_names_inv, remote, (void *)file); makeflow_hook_add_input_file(n->d, task, f, remote, file->type); } else { makeflow_hook_add_output_file(n->d, task, f, NULL, file->type); } *p = '='; } else { file = dag_file_lookup_or_create(n->d, f); makeflow_hook_add_input_file(n->d, task, f, NULL, file->type); } free(f); } list_first_item(output); while((f = list_next_item(output))) { char *filename = string_replace_percents(f, nodeid); char *f = xxstrdup(filename); free(filename); char *remote, *p; struct dag_file *file; p = strchr(f, '='); if(p) { *p = 0; file = dag_file_lookup_or_create(n->d, f); if(!n->local_job && !itable_lookup(w->remote_names, (uintptr_t) file)){ remote = xxstrdup(p+1); itable_insert(w->remote_names, (uintptr_t) file, (void *)remote); hash_table_insert(w->remote_names_inv, remote, (void *)file); makeflow_hook_add_output_file(n->d, task, f, remote, file->type); } else { makeflow_hook_add_output_file(n->d, task, f, NULL, file->type); } *p = '='; } else { file = dag_file_lookup_or_create(n->d, f); makeflow_hook_add_output_file(n->d, task, f, NULL, file->type); } free(f); } free(nodeid); }