/* COMPARE_CHROMA Perform Dynamic Programming to find optimal alignment */ int Scorealign::compare_chroma() { float *path; /* Allocate the distance matrix */ path = (float *) calloc(file0_frames * file1_frames, sizeof(float)); /* skip over initial silence in signals */ if (ignore_silence) { first_x = frames_of_init_silence(chrom_energy0, file0_frames); last_x = last_non_silent_frame(chrom_energy0, file0_frames); first_y = frames_of_init_silence(chrom_energy1, file1_frames); last_y = last_non_silent_frame(chrom_energy1, file1_frames); } else { first_x = 0; last_x = file0_frames - 1; first_y = 0; last_y = file1_frames - 1; } if (last_x - first_x <= 0 || last_y - first_y <= 0) { return SA_TOOSHORT; } /* Initialize first row and column */ if (verbose) printf("Performing DP\n"); PATH(first_x, first_y) = gen_dist(first_x, first_y); for (int x = first_x + 1; x <= last_x; x++) PATH(x, first_y) = gen_dist(x, first_y) + PATH(x - 1, first_y); for (int y = 1; y <= last_y; y++) PATH(first_x, y) = gen_dist(first_x, y) + PATH(first_x, y - 1); #if DEBUG_LOG fprintf(dbf, "DISTANCE MATRIX ***************************\n"); #endif /* Perform DP for the rest of the matrix */ for (int x = first_x + 1; x <= last_x; x++) { for (int y = first_y + 1; y <= last_y; y++) { PATH(x, y) = gen_dist(x, y) + float(min3(PATH(x-1, y-1), PATH(x-1, y), PATH(x, y-1))); #if DEBUG_LOG fprintf(dbf, "(%d %d %g) ", x, y, gen_dist(x, y), PATH(x, y)); #endif } #if DEBUG_LOG fprintf(dbf, "\n"); #endif // report progress for each file0_frame (column) // This is not quite right if we are ignoring silence because // then only a sub-matrix is computed. if (progress && !progress->set_matrix_progress(file1_frames)) return SA_CANCEL; } #if DEBUG_LOG fprintf(dbf, "END OF DISTANCE MATRIX ********************\n"); #endif if (verbose) printf("Completed Dynamic Programming.\n"); //x and y are the ending points, it can end at either the end of midi, // or end of audio or both pathx = ALLOC(short, (file0_frames + file1_frames)); pathy = ALLOC(short, (file0_frames + file1_frames)); assert(pathx != NULL); assert(pathy != NULL); // map from file0 time to file1 time time_map = ALLOC(float, file0_frames); smooth_time_map = ALLOC(float, file0_frames); int x = last_x; int y = last_y; if (!force_final_alignment) { #if DEBUG_LOG fprintf(dbf, "\nOptimal Path: "); #endif // find end point, the lowest cost matrix value at one of the // sequence endings float min_cost = 1.0E10; for (int i = first_x; i <= last_x; i++) { if (PATH(i, last_y) <= min_cost) { min_cost = PATH(i, last_y); x = i; y = last_y; } } for (int j = first_y; j <= last_y; j++) { if (PATH(last_x, j) <= min_cost) { min_cost = PATH(last_x, j); x = last_x; y = j; } } #if DEBUG_LOG fprintf(dbf, "Min cost at %d %d\n\nPATH:\n", x, y); #endif } while ((x != first_x) || (y != first_y)) { path_step(x, y); /* Check for the optimal path backwards*/ if (x > first_x && y > first_y && PATH(x-1, y-1) <= PATH(x-1, y) && PATH(x-1, y-1) <= PATH(x, y-1)) { x--; y--; } else if (x > first_x && y > first_y && PATH(x-1, y) <= PATH(x, y-1)) { x--; } else if (y > first_y) { y--; } else if (x > first_x) { x--; } } path_step(x, y); path_reverse(); free(path); return SA_SUCCESS; // success }
void metacortex_find_subgraphs(dBGraph* graph, char* consensus_contigs_filename, int min_subgraph_kmers) { SubGraphInfo* sub_graphs; FILE* fp; Path *path_fwd = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size); Path *path_rev = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size); Path *final_path = path_new(MAX_EXPLORE_PATH_LENGTH, graph->kmer_size); char seq[256]; char analysis_filename[strlen(consensus_contigs_filename) + 10]; long int total_nodes = 0; int n_seeds = 0; int i; sprintf(analysis_filename, "%s.analysis", consensus_contigs_filename); log_and_screen_printf("Running metacortex subgraph analysis...\n"); log_and_screen_printf(" Contig file: %s\n", consensus_contigs_filename); log_and_screen_printf(" Analysis file: %s\n", analysis_filename); log_and_screen_printf("Minimum subgraph size: %i\n", min_subgraph_kmers); /* Initialise temporaray path array buffers */ path_array_initialise_buffers(graph->kmer_size); /* Create a list of subgraphs */ log_and_screen_printf("Allocating %d Mb to store subgraph information (max %d seeds)...\n", ((MAX_SEEDS * sizeof(SubGraphInfo)) / 1024) / 1024, MAX_SEEDS); sub_graphs = calloc(MAX_SEEDS, sizeof(SubGraphInfo)); if (!sub_graphs) { log_and_screen_printf("ERROR: Can't get memory for subgraphs\n"); exit(-1); } /* Open the analysis file */ fp = fopen(analysis_filename, "w"); if (!fp) { log_and_screen_printf("ERROR: Can't open analysis file.\n"); exit(-1); } /* For each node, if it's not pruned or visited, try and grow a graph */ void explore_node(dBNode * node) { if (node == NULL) { log_and_screen_printf("Error: NULL node passed to explore_node.\n"); exit(-1); } if (db_node_check_for_any_flag(node, PRUNED | VISITED) == false) { int nodes_in_graph; /* Grow graph from this node, returning the 'best' (highest coverage) node to store as seed point */ nodes_in_graph = grow_graph_from_node(node, &(sub_graphs[n_seeds].seed_node), graph); total_nodes += nodes_in_graph; if (sub_graphs[n_seeds].seed_node == NULL) { printf("ERROR: Seed node is NULL, nodes in graph is %d\n", nodes_in_graph); } else { /* Write data to analysis file */ binary_kmer_to_seq(&(node->kmer), graph->kmer_size, seq); fprintf(fp, "%i\t%i\t%ld\t%s\t", n_seeds, nodes_in_graph, total_nodes, seq); binary_kmer_to_seq(&(sub_graphs[n_seeds].seed_node->kmer), graph->kmer_size, seq); fprintf(fp, "%s\n", seq); /* Store nodes in this subgraph */ sub_graphs[n_seeds].graph_size = nodes_in_graph; n_seeds++; /* Check we've not run out of seed storage - in future, this should dynamically allocate */ if (n_seeds == MAX_SEEDS) { log_and_screen_printf("Error: MAX_SEEDS exceeded. Quitting.\n"); exit(-1); } } } } /* Traverse each node... */ log_and_screen_printf("Finding subgraphs...\n"); hash_table_traverse(&explore_node, graph); log_and_screen_printf("Finished. Total: %ld\n", total_nodes); fclose(fp); /* Open consensus contigs file */ fp = fopen(consensus_contigs_filename, "w"); if (!fp) { log_and_screen_printf("ERROR: Can't open contig file.\n"); exit(-1); } /* Now go through all the seed points and generate the consensus contigs by walking forward and backward from the seed */ db_graph_reset_flags(graph); log_and_screen_printf("Outputting contigs...\n"); log_progress_bar(0); long long one_percent = n_seeds/100; int percent; if (one_percent < 1) { one_percent = 1; } for (i=0; i<n_seeds; i++) { if (i % one_percent == 0) { percent = (100 * i) / n_seeds; log_progress_bar(percent); } //log_printf("Graph %i\n", i); if (sub_graphs[i].graph_size >= min_subgraph_kmers) { binary_kmer_to_seq(&(sub_graphs[i].seed_node->kmer), graph->kmer_size, seq); coverage_walk_get_path(sub_graphs[i].seed_node, forward, NULL, graph, path_fwd); coverage_walk_get_path(sub_graphs[i].seed_node, reverse, NULL, graph, path_rev); path_reverse(path_fwd, final_path); path_append(final_path, path_rev); final_path->id = i; path_to_fasta(final_path, fp); //log_printf(" Seed %s\tFwd path length %i\tRev path length %i\tFinal path length %i\n", seq, path_fwd->length, path_rev->length, final_path->length); path_reset(path_fwd); perfect_path_get_path(sub_graphs[i].seed_node, forward, &db_node_action_do_nothing, graph, path_fwd); //log_printf("\t\tPerfect path fwd length %i\n", path_fwd->length); path_reset(path_rev); path_reset(final_path); } else { log_printf(" Number of nodes (%i} too small. Not outputting contig.\n", sub_graphs[i].graph_size); } } log_progress_bar(100); printf("\n"); log_and_screen_printf("Finished contig output.\n"); fclose(fp); free(sub_graphs); }