Ejemplo n.º 1
0
dstring_t *haplo_split(GapIO *io, snp_t *snp, int nsnps, int verbose,
		       double min_score, int two_pass, int fast_mode,
		       double c_offset, int max_sets) {
    graph *g;
    edge *e;
    dstring_t *ds;

    verbosity = verbose;
    g = graph_from_snps(io, snp, nsnps, c_offset);
    if (verbosity >= 3)
	print_matrix(g);

    graph_add_edges(g);
    graph_calc_chimeric_scores(g);
    graph_calc_link_scores(g, 1);
    if (verbosity >= 3)
	graph_print(g, 0);

    if (verbosity)
	puts("Merging graph nodes");

    while ((e = best_edge(g)) && (e->linkage_score > min_score)) {
	if (verbosity >= 1) {
	    putchar('.');
	    fflush(stdout);
	}
	merge_node(g, e);
	graph_calc_link_scores(g, fast_mode ? 0 : 1);
	if (verbosity >= 4) {
	    print_matrix(g);
	    graph_print(g, 1);
	}
    }
    if (verbosity >= 1)
	puts("");

    /* graph_print(g, 1); */

    if (two_pass) {
	/* Add fake zero-score edges if we want just 2-haplotypes */
	add_zero_edges(g);
	graph_calc_link_scores(g, 1);
	if (verbosity >= 4)
	    graph_print(g, 1);

	puts("===pass 2===");
	while ((e = best_edge(g)) && (e->linkage_score > min_score)) {
	    merge_node(g, e);
	    graph_calc_link_scores(g, fast_mode ? 0 : 1);
	    /* graph_print(g, 1); */
	}
	/* graph_print(g, 1); */
    }

    /* Force number of groups to be X? */
    if (max_sets) {
	int ngroups = count_groups(g);
	add_zero_edges(g);
	for (; ngroups > max_sets; ngroups--) {
	    e = best_edge(g);
	    if (!e) {
		printf("Bailed out as no edge connecting groups\n");
		break;
	    }
	    merge_node(g, e);
	    graph_calc_link_scores(g, fast_mode ? 0 : 1);
	}
    }

    /* print_groups(g); */

    ds = list_groups(g);
    graph_destroy(g);

    return ds;
}
Ejemplo n.º 2
0
struct cb_alignment
cb_align_nw(struct cb_align_nw_memory *mem,
             char *rseq, int dp_len1, int i1, int dir1,
             char *oseq, int dp_len2, int i2, int dir2,
             bool *matches, int *matches_index)
{
    struct cb_alignment align;
    int matches_count = 0, i = 0;
    struct cb_nw_tables tables = make_nw_tables(rseq, dp_len1, i1, dir1,
                                                 oseq, dp_len2, i2, dir2);
    int *best = best_edge(tables.dp_score, dp_len1, dp_len2);
    int cur_j1, cur_j2;
    int dir_prod;
    int **dp_score, **dp_from;
    bool *matches_to_add;
    char *subs1_dp, *subs2_dp;
    int num_steps;

    best = backtrack_to_clump(tables, best);

    if (best[0] <= 0) {
        align.ref = "\0";
        align.org = "\0";
        align.length = -1;
        free(best);
        for (i = 0; i <= dp_len1; i++) {
            free(tables.dp_score[i]);
            free(tables.dp_from[i]);
        }
        free(tables.dp_score);
        free(tables.dp_from);
       
        return align;
    }
    cur_j1 = best[0];
    cur_j2 = best[1];

    dir_prod = dir1 * dir2;

    dp_score = tables.dp_score;
    dp_from = tables.dp_from;

    matches_to_add = malloc((cur_j1 + cur_j2)*sizeof(*matches_to_add));
    assert(matches_to_add);

    subs1_dp = malloc((cur_j1 + cur_j2)*sizeof(*subs1_dp));
    assert(subs1_dp);
    subs2_dp = malloc((cur_j1 + cur_j2)*sizeof(*subs2_dp));
    assert(subs2_dp);

    num_steps = 0;

    align.ref = "\0";
    align.org = "\0";
    align.length = -1;

    while (!(cur_j1 == 0 && cur_j2 == 0)) {
        int prev_j1, prev_j2;
        switch (dp_from[cur_j1][cur_j2]) {
            char c1, c2;
        case 0:
            prev_j1 = cur_j1-1; prev_j2 = cur_j2-1; /*match or substitution*/
            c1 = rseq[i1+dir1*prev_j1]; /*comp if antisense*/
            c2 = oseq[i2+dir2*prev_j2];
            if (dir_prod == -1) c2 = base_complement(c2);
            subs1_dp[num_steps] = c1;
            subs2_dp[num_steps] = c2;
            break;
        case 2: prev_j1 = cur_j1; prev_j2 = cur_j2-1; /*advance 2; gap in 1*/
            c2 = oseq[i2+dir2*prev_j2];
            if (dir_prod == -1) c2 = base_complement(c2); /*comp if antisense*/
            subs1_dp[num_steps] = '-';
            subs2_dp[num_steps] = c2;
            break;
        default: prev_j1 = cur_j1-1; prev_j2 = cur_j2; /*advance 1; gap in 2*/
            c1 = rseq[i1+dir1*prev_j1];
            subs1_dp[num_steps] = c1;
            subs2_dp[num_steps] = '-';
        }
        matches_to_add[num_steps] = dp_score[cur_j1][cur_j2] >
                                    dp_score[prev_j1][prev_j2];
        num_steps++;
        cur_j1 = prev_j1; cur_j2 = prev_j2;
    }
    for (i = 0; i < num_steps/2; i++) { /* flip order */
        bool temp = matches_to_add[num_steps-i-1];
        matches_to_add[num_steps-1-i] = matches_to_add[i];
        matches_to_add[i] = temp;
    }

    /*note: need to flip order*/
    if (dp_len1 < compress_flags.min_match_len &&
        dp_len2 < compress_flags.min_match_len)
        for (i = *matches_index - 100; i < *matches_index; i++)
            if (matches[i])
                matches_count++;

    /*Make sure we don't have a bad window unless we are running
      Needleman-Wunsch alignment on a match.  If we have a bad window, then
      throw out this alignment.  Otherwise, copy the alignment into align.org
      and align.ref.*/
    if (dp_len1 < compress_flags.min_match_len &&
        dp_len2 < compress_flags.min_match_len &&
        check_and_update(matches, matches_index, &matches_count,
                         matches_to_add, num_steps) != num_steps)
        align.length = -1;
    else {
        align.length = num_steps;
        align.org = malloc((align.length+1)*sizeof(*(align.org)));
        assert(align.org);
        align.ref = malloc((align.length+1)*sizeof(*(align.ref)));
        assert(align.ref);
        for (i = 0; i < align.length; i++) {
            /*Don't update the matches array if we are running Needleman-Wunsch
              alignment on a match.*/
            if (dp_len1 < compress_flags.min_match_len &&
                dp_len2 < compress_flags.min_match_len)
                matches[(*matches_index)+i] = matches_to_add[i];

            align.ref[i] = subs1_dp[align.length-i-1];
            align.org[i] = subs2_dp[align.length-i-1];
        }
        align.org[align.length] = '\0';
        align.ref[align.length] = '\0';
    }
    free(best);
    for (i = 0; i <= dp_len1; i++) {
        free(tables.dp_score[i]);
        free(tables.dp_from[i]);
    }
    free(tables.dp_score);
    free(tables.dp_from);
    free(subs1_dp);
    free(subs2_dp);
    free(matches_to_add);
    return align;
}