void makeflow_gc_prepare( struct dag *d ) { /* Files to be collected: * ((all_files \minus sink_files)) \union collect_list) \minus preserve_list) \minus source_files */ /* Parse GC_*_LIST and record which target files should be * garbage collected. */ char *collect_list = dag_variable_lookup_global_string("GC_COLLECT_LIST", d); char *preserve_list = dag_variable_lookup_global_string("GC_PRESERVE_LIST", d); struct dag_file *f; char *filename; /* add all files, but sink_files */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(!dag_file_is_sink(f)) { set_insert(d->collect_table, f); } int i, argc; char **argv; /* add collect_list, for sink_files that should be removed */ string_split_quotes(collect_list, &argc, &argv); for(i = 0; i < argc; i++) { f = dag_file_lookup_or_create(d, argv[i]); set_insert(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Added %s to garbage collection list", f->filename); } free(argv); /* remove files from preserve_list */ string_split_quotes(preserve_list, &argc, &argv); for(i = 0; i < argc; i++) { /* Must initialize to non-zero for hash_table functions to work properly. */ f = dag_file_lookup_or_create(d, argv[i]); set_remove(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Removed %s from garbage collection list", f->filename); } free(argv); /* remove source_files from collect_table */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(dag_file_is_source(f)) { set_remove(d->collect_table, f); debug(D_MAKEFLOW_RUN, "Removed %s from garbage collection list", f->filename); } /* Print reference counts of files to be collected */ set_first_element(d->collect_table); while((f = set_next_element(d->collect_table))) debug(D_MAKEFLOW_RUN, "Added %s to garbage collection list (%d)", f->filename, f->ref_count); }
void makeflow_parse_input_outputs( struct dag *d ) { /* Check if GC_*_LIST is specified and warn user about deprecated usage */ char *collect_list = dag_variable_lookup_global_string("GC_COLLECT_LIST" , d); if(collect_list) debug(D_NOTICE, "GC_COLLECT_LIST is specified: Please refer to manual about MAKEFLOW_INPUTS/OUTPUTS"); char *preserve_list = dag_variable_lookup_global_string("GC_PRESERVE_LIST", d); if(preserve_list) debug(D_NOTICE, "GC_PRESERVE_LIST is specified: Please refer to manual about MAKEFLOW_INPUTS/OUTPUTS"); /* Parse INPUT and OUTPUT lists */ struct dag_file *f; char *filename; int i, argc; char **argv; char *input_list = dag_variable_lookup_global_string("MAKEFLOW_INPUTS" , d); char *output_list = dag_variable_lookup_global_string("MAKEFLOW_OUTPUTS", d); if(input_list) { /* add collect_list, for sink_files that should be removed */ string_split_quotes(input_list, &argc, &argv); for(i = 0; i < argc; i++) { d->completed_files += 1; f = dag_file_lookup_or_create(d, argv[i]); set_insert(d->inputs, f); debug(D_MAKEFLOW_RUN, "Added %s to input list", f->filename); } free(argv); } else { debug(D_NOTICE, "MAKEFLOW_INPUTS is not specified"); } /* add all source files */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(dag_file_is_source(f)) { set_insert(d->inputs, f); debug(D_MAKEFLOW_RUN, "Added %s to input list", f->filename); } if(output_list) { /* remove files from preserve_list */ string_split_quotes(output_list, &argc, &argv); for(i = 0; i < argc; i++) { /* Must initialize to non-zero for hash_table functions to work properly. */ f = dag_file_lookup_or_create(d, argv[i]); set_remove(d->outputs, f); debug(D_MAKEFLOW_RUN, "Added %s to output list", f->filename); } free(argv); } else { debug(D_NOTICE, "MAKEFLOW_OUTPUTS is not specified"); /* add all sink if OUTPUTS not specified */ hash_table_firstkey(d->files); while((hash_table_nextkey(d->files, &filename, (void **) &f))) if(dag_file_is_sink(f)) { set_insert(d->outputs, f); debug(D_MAKEFLOW_RUN, "Added %s to output list", f->filename); } } }
int makeflow_clean(struct dag *d, struct batch_queue *queue, makeflow_clean_depth clean_depth)//, struct makeflow_wrapper *w, struct makeflow_monitor *m) { struct dag_file *f; char *name; hash_table_firstkey(d->files); while(hash_table_nextkey(d->files, &name, (void **) &f)) { int silent = 1; if(dag_file_should_exist(f)) silent = 0; /* We have a record of the file, but it is no longer created or used so delete */ if(dag_file_is_source(f) && dag_file_is_sink(f) && !set_lookup(d->inputs, f)) makeflow_clean_file(d, queue, f, silent); if(dag_file_is_source(f)) { if(f->source && (clean_depth == MAKEFLOW_CLEAN_CACHE || clean_depth == MAKEFLOW_CLEAN_ALL)) { /* this file is specified in the mountfile */ if(makeflow_clean_mount_target(f->filename)) { fprintf(stderr, "Failed to remove %s!\n", f->filename); return -1; } } continue; } if(clean_depth == MAKEFLOW_CLEAN_ALL) { makeflow_clean_file(d, queue, f, silent); } else if(set_lookup(d->outputs, f) && (clean_depth == MAKEFLOW_CLEAN_OUTPUTS)) { makeflow_clean_file(d, queue, f, silent); } else if(!set_lookup(d->outputs, f) && (clean_depth == MAKEFLOW_CLEAN_INTERMEDIATES)){ makeflow_clean_file(d, queue, f, silent); } } /* clean up the cache dir created due to the usage of mountfile */ if(clean_depth == MAKEFLOW_CLEAN_CACHE || clean_depth == MAKEFLOW_CLEAN_ALL) { if(d->cache_dir && unlink_recursive(d->cache_dir)) { fprintf(stderr, "Failed to clean up the cache dir (%s) created due to the usage of the mountfile!\n", d->cache_dir); dag_mount_clean(d); return -1; } dag_mount_clean(d); } struct dag_node *n; for(n = d->nodes; n; n = n->next) { /* If the node is a Makeflow job, then we should recursively call the * * clean operation on it. */ if(n->nested_job) { char *command = xxmalloc(sizeof(char) * (strlen(n->command) + 4)); sprintf(command, "%s -c", n->command); /* XXX this should use the batch job interface for consistency */ makeflow_node_export_variables(d, n); system(command); free(command); } } return 0; }