dstring_t *haplo_split(GapIO *io, snp_t *snp, int nsnps, int verbose, double min_score, int two_pass, int fast_mode, double c_offset, int max_sets) { graph *g; edge *e; dstring_t *ds; verbosity = verbose; g = graph_from_snps(io, snp, nsnps, c_offset); if (verbosity >= 3) print_matrix(g); graph_add_edges(g); graph_calc_chimeric_scores(g); graph_calc_link_scores(g, 1); if (verbosity >= 3) graph_print(g, 0); if (verbosity) puts("Merging graph nodes"); while ((e = best_edge(g)) && (e->linkage_score > min_score)) { if (verbosity >= 1) { putchar('.'); fflush(stdout); } merge_node(g, e); graph_calc_link_scores(g, fast_mode ? 0 : 1); if (verbosity >= 4) { print_matrix(g); graph_print(g, 1); } } if (verbosity >= 1) puts(""); /* graph_print(g, 1); */ if (two_pass) { /* Add fake zero-score edges if we want just 2-haplotypes */ add_zero_edges(g); graph_calc_link_scores(g, 1); if (verbosity >= 4) graph_print(g, 1); puts("===pass 2==="); while ((e = best_edge(g)) && (e->linkage_score > min_score)) { merge_node(g, e); graph_calc_link_scores(g, fast_mode ? 0 : 1); /* graph_print(g, 1); */ } /* graph_print(g, 1); */ } /* Force number of groups to be X? */ if (max_sets) { int ngroups = count_groups(g); add_zero_edges(g); for (; ngroups > max_sets; ngroups--) { e = best_edge(g); if (!e) { printf("Bailed out as no edge connecting groups\n"); break; } merge_node(g, e); graph_calc_link_scores(g, fast_mode ? 0 : 1); } } /* print_groups(g); */ ds = list_groups(g); graph_destroy(g); return ds; }
struct cb_alignment cb_align_nw(struct cb_align_nw_memory *mem, char *rseq, int dp_len1, int i1, int dir1, char *oseq, int dp_len2, int i2, int dir2, bool *matches, int *matches_index) { struct cb_alignment align; int matches_count = 0, i = 0; struct cb_nw_tables tables = make_nw_tables(rseq, dp_len1, i1, dir1, oseq, dp_len2, i2, dir2); int *best = best_edge(tables.dp_score, dp_len1, dp_len2); int cur_j1, cur_j2; int dir_prod; int **dp_score, **dp_from; bool *matches_to_add; char *subs1_dp, *subs2_dp; int num_steps; best = backtrack_to_clump(tables, best); if (best[0] <= 0) { align.ref = "\0"; align.org = "\0"; align.length = -1; free(best); for (i = 0; i <= dp_len1; i++) { free(tables.dp_score[i]); free(tables.dp_from[i]); } free(tables.dp_score); free(tables.dp_from); return align; } cur_j1 = best[0]; cur_j2 = best[1]; dir_prod = dir1 * dir2; dp_score = tables.dp_score; dp_from = tables.dp_from; matches_to_add = malloc((cur_j1 + cur_j2)*sizeof(*matches_to_add)); assert(matches_to_add); subs1_dp = malloc((cur_j1 + cur_j2)*sizeof(*subs1_dp)); assert(subs1_dp); subs2_dp = malloc((cur_j1 + cur_j2)*sizeof(*subs2_dp)); assert(subs2_dp); num_steps = 0; align.ref = "\0"; align.org = "\0"; align.length = -1; while (!(cur_j1 == 0 && cur_j2 == 0)) { int prev_j1, prev_j2; switch (dp_from[cur_j1][cur_j2]) { char c1, c2; case 0: prev_j1 = cur_j1-1; prev_j2 = cur_j2-1; /*match or substitution*/ c1 = rseq[i1+dir1*prev_j1]; /*comp if antisense*/ c2 = oseq[i2+dir2*prev_j2]; if (dir_prod == -1) c2 = base_complement(c2); subs1_dp[num_steps] = c1; subs2_dp[num_steps] = c2; break; case 2: prev_j1 = cur_j1; prev_j2 = cur_j2-1; /*advance 2; gap in 1*/ c2 = oseq[i2+dir2*prev_j2]; if (dir_prod == -1) c2 = base_complement(c2); /*comp if antisense*/ subs1_dp[num_steps] = '-'; subs2_dp[num_steps] = c2; break; default: prev_j1 = cur_j1-1; prev_j2 = cur_j2; /*advance 1; gap in 2*/ c1 = rseq[i1+dir1*prev_j1]; subs1_dp[num_steps] = c1; subs2_dp[num_steps] = '-'; } matches_to_add[num_steps] = dp_score[cur_j1][cur_j2] > dp_score[prev_j1][prev_j2]; num_steps++; cur_j1 = prev_j1; cur_j2 = prev_j2; } for (i = 0; i < num_steps/2; i++) { /* flip order */ bool temp = matches_to_add[num_steps-i-1]; matches_to_add[num_steps-1-i] = matches_to_add[i]; matches_to_add[i] = temp; } /*note: need to flip order*/ if (dp_len1 < compress_flags.min_match_len && dp_len2 < compress_flags.min_match_len) for (i = *matches_index - 100; i < *matches_index; i++) if (matches[i]) matches_count++; /*Make sure we don't have a bad window unless we are running Needleman-Wunsch alignment on a match. If we have a bad window, then throw out this alignment. Otherwise, copy the alignment into align.org and align.ref.*/ if (dp_len1 < compress_flags.min_match_len && dp_len2 < compress_flags.min_match_len && check_and_update(matches, matches_index, &matches_count, matches_to_add, num_steps) != num_steps) align.length = -1; else { align.length = num_steps; align.org = malloc((align.length+1)*sizeof(*(align.org))); assert(align.org); align.ref = malloc((align.length+1)*sizeof(*(align.ref))); assert(align.ref); for (i = 0; i < align.length; i++) { /*Don't update the matches array if we are running Needleman-Wunsch alignment on a match.*/ if (dp_len1 < compress_flags.min_match_len && dp_len2 < compress_flags.min_match_len) matches[(*matches_index)+i] = matches_to_add[i]; align.ref[i] = subs1_dp[align.length-i-1]; align.org[i] = subs2_dp[align.length-i-1]; } align.org[align.length] = '\0'; align.ref[align.length] = '\0'; } free(best); for (i = 0; i <= dp_len1; i++) { free(tables.dp_score[i]); free(tables.dp_from[i]); } free(tables.dp_score); free(tables.dp_from); free(subs1_dp); free(subs2_dp); free(matches_to_add); return align; }