void hash_table_traverse_with_args(void (*f)(Element *, void *),void ** args, HashTable * hash_table){ long long i; printf("\n"); long long one_percent = (hash_table->number_buckets * hash_table->bucket_size) / 100 ; int percent = 0; log_progress_bar(0); for(i=0;i<hash_table->number_buckets * hash_table->bucket_size;i++){ if (!element_check_for_flag_ALL_OFF(&hash_table->table[i])){ f(&hash_table->table[i], args[0]); } if(one_percent > 0){ if(i % one_percent == 0){ percent = ((double)i / (double)(hash_table->number_buckets * hash_table->bucket_size)) *100; log_progress_bar(percent); } } } log_progress_bar(100); printf("\n"); }
void hash_table_n_buckets_traverse_with_args(int block, int number_of_blocks, void (*f)(Element *, void *), void * args, HashTable * hash_table){ long long i; printf("\n"); long long buckets_to_iterate = hash_table->number_buckets / number_of_blocks; assert(buckets_to_iterate * number_of_blocks == hash_table->number_buckets); assert(block < number_of_blocks); long long one_percent = buckets_to_iterate / 100 ; int percent = 0; log_progress_bar(0); long long first_bucket = buckets_to_iterate * block; long long last_bucket = first_bucket + buckets_to_iterate; for(i=first_bucket; i < last_bucket; i++){ hash_table_traverse_bucket_with_args(i, f,args, hash_table); if(one_percent > 0){ if(i % one_percent == 0){ percent = ((double)i / (double)(buckets_to_iterate)) *100; printf("%d:", block); log_progress_bar(percent); } } } log_progress_bar(100); printf("\n"); }
void metacortex_find_subgraphs(dBGraph* graph, char* consensus_contigs_filename, int min_subgraph_kmers) { SubGraphInfo* sub_graphs; FILE* fp; Path *path_fwd = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size); Path *path_rev = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size); Path *final_path = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size); char seq[256]; char analysis_filename[strlen(consensus_contigs_filename) + 10]; long int total_nodes = 0; int n_seeds = 0; int i; sprintf(analysis_filename, "%s.analysis", consensus_contigs_filename); log_and_screen_printf("Running metacortex subgraph analysis...\n"); log_and_screen_printf(" Contig file: %s\n", consensus_contigs_filename); log_and_screen_printf(" Analysis file: %s\n", analysis_filename); log_and_screen_printf("Minimum subgraph size: %i\n", min_subgraph_kmers); /* Initialise temporaray path array buffers */ path_array_initialise_buffers(graph->kmer_size); /* Create a list of subgraphs */ log_and_screen_printf("Allocating %d Mb to store subgraph information (max %d seeds)...\n", ((MAX_SEEDS * sizeof(SubGraphInfo)) / 1024) / 1024, MAX_SEEDS); sub_graphs = calloc(MAX_SEEDS, sizeof(SubGraphInfo)); if (!sub_graphs) { log_and_screen_printf("ERROR: Can't get memory for subgraphs\n"); exit(-1); } /* Open the analysis file */ fp = fopen(analysis_filename, "w"); if (!fp) { log_and_screen_printf("ERROR: Can't open analysis file.\n"); exit(-1); } /* For each node, if it's not pruned or visited, try and grow a graph */ void explore_node(dBNode * node) { if (node == NULL) { log_and_screen_printf("Error: NULL node passed to explore_node.\n"); exit(-1); } if (db_node_check_for_any_flag(node, PRUNED | VISITED) == false) { int nodes_in_graph; /* Grow graph from this node, returning the 'best' (highest coverage) node to store as seed point */ nodes_in_graph = grow_graph_from_node(node, &(sub_graphs[n_seeds].seed_node), graph); total_nodes += nodes_in_graph; if (sub_graphs[n_seeds].seed_node == NULL) { printf("ERROR: Seed node is NULL, nodes in graph is %d\n", nodes_in_graph); } else { /* Write data to analysis file */ binary_kmer_to_seq(&(node->kmer), graph->kmer_size, seq); fprintf(fp, "%i\t%i\t%ld\t%s\t", n_seeds, nodes_in_graph, total_nodes, seq); binary_kmer_to_seq(&(sub_graphs[n_seeds].seed_node->kmer), graph->kmer_size, seq); fprintf(fp, "%s\n", seq); /* Store nodes in this subgraph */ sub_graphs[n_seeds].graph_size = nodes_in_graph; n_seeds++; /* Check we've not run out of seed storage - in future, this should dynamically allocate */ if (n_seeds == MAX_SEEDS) { log_and_screen_printf("Error: MAX_SEEDS exceeded. Quitting.\n"); exit(-1); } } } } /* Traverse each node... */ log_and_screen_printf("Finding subgraphs...\n"); hash_table_traverse(&explore_node, graph); log_and_screen_printf("Finished. Total: %ld\n", total_nodes); fclose(fp); /* Open consensus contigs file */ fp = fopen(consensus_contigs_filename, "w"); if (!fp) { log_and_screen_printf("ERROR: Can't open contig file.\n"); exit(-1); } /* Now go through all the seed points and generate the consensus contigs by walking forward and backward from the seed */ db_graph_reset_flags(graph); log_and_screen_printf("Outputting contigs...\n"); log_progress_bar(0); long long one_percent = n_seeds/100; int percent; if (one_percent < 1) { one_percent = 1; } for (i=0; i<n_seeds; i++) { if (i % one_percent == 0) { percent = (100 * i) / n_seeds; log_progress_bar(percent); } //log_printf("Graph %i\n", i); if (sub_graphs[i].graph_size >= min_subgraph_kmers) { binary_kmer_to_seq(&(sub_graphs[i].seed_node->kmer), graph->kmer_size, seq); coverage_walk_get_path(sub_graphs[i].seed_node, forward, NULL, graph, path_fwd); coverage_walk_get_path(sub_graphs[i].seed_node, reverse, NULL, graph, path_rev); path_reverse(path_fwd, final_path); path_append(final_path, path_rev); final_path->id = i; path_to_fasta(final_path, fp); //log_printf(" Seed %s\tFwd path length %i\tRev path length %i\tFinal path length %i\n", seq, path_fwd->length, path_rev->length, final_path->length); path_reset(path_fwd); perfect_path_get_path(sub_graphs[i].seed_node, forward, &db_node_action_do_nothing, graph, path_fwd); //log_printf("\t\tPerfect path fwd length %i\n", path_fwd->length); path_reset(path_rev); path_reset(final_path); } else { log_printf(" Number of nodes (%i} too small. Not outputting contig.\n", sub_graphs[i].graph_size); } } log_progress_bar(100); printf("\n"); log_and_screen_printf("Finished contig output.\n"); fclose(fp); free(sub_graphs); }