示例#1
0
文件: comptool.cpp 项目: stomk/dop
// Receive an alignment file, run chaining and output a chain file
void CompTool::run_chaining(const string file, ofstream& ofs, const int near_dist, const bool forward){
    // Count alignments
    ifstream ifs;
    ifs.open(file.c_str());
    if(!ifs.is_open()) {cout << "File not found\n> " << file << endl; exit(1);}
    string line;
    int num_alignment = 0;
    getline(ifs, line);  // Skip header
    while(getline(ifs, line)) num_alignment++;
    ifs.close();

    // Load alignments
    ifs.open(file.c_str());
    getline(ifs, line);  // Skip header
    Alignment* alignments = new Alignment[num_alignment];

    // TODO: Parse kmer-size from header and calculate end positions of matches
    int* buf = new int[5];
    for(int i = 0; i < num_alignment; i++){
        for(int j = 0; j < 5; j++){
            ifs >> buf[j];
        }
        alignments[i].set(buf);
    }
    delete[] buf;

    // Run chaining
    Chaining chaining(alignments, num_alignment, near_dist);
    chaining.run();
    chaining.output_major_chains(ofs);
}
示例#2
0
void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2])
{
	hsaip_t *z[2], *chain[2];
	int i, j, k, n[2], m[2];
	char *flag;
	// initialization
	n[0] = b[0]->n; n[1] = b[1]->n;
	z[0] = (hsaip_t*)calloc(n[0] + n[1], sizeof(hsaip_t));
	z[1] = z[0] + n[0];
	chain[0] = (hsaip_t*)calloc(n[0] + n[1], sizeof(hsaip_t));
	for (k = j = 0; k < 2; ++k) {
		for (i = 0; i < b[k]->n; ++i) {
			bsw2hit_t *p = b[k]->hits + i;
			hsaip_t *q = z[k] + i;
			q->flag = k; q->idx = i;
			q->tbeg = p->k; q->tend = p->k + p->len;
			q->chain = -1;
			q->qbeg = p->beg; q->qend = p->end;
		}
	}
	// chaining
	m[0] = chaining(opt, 0,    n[0], z[0], chain[0]);
	chain[1] = chain[0] + m[0];
	m[1] = chaining(opt, m[0], n[1], z[1], chain[1]);	
	// change query coordinate on the reverse strand
	for (k = 0; k < m[1]; ++k) {
		hsaip_t *p = chain[1] + k;
		int tmp = p->qbeg;
		p->qbeg = len - p->qend; p->qend = len - tmp;
	}
	// filtering
	flag = (char*)calloc(m[0] + m[1], 1);
	ks_introsort(hsaip, m[0] + m[1], chain[0]);
	for (k = 1; k < m[0] + m[1]; ++k) {
		hsaip_t *p = chain[0] + k;
		for (j = 0; j < k; ++j) {
			hsaip_t *q = chain[0] + j;
			if (flag[q->idx]) continue;
			if (q->qend >= p->qend && q->chain > p->chain * opt->t_seeds * 2) {
				flag[p->idx] = 1;
				break;
			}
		}
	}
	for (k = 0; k < n[0] + n[1]; ++k) {
		hsaip_t *p = z[0] + k;
		if (flag[p->chain])
			b[p->flag]->hits[p->idx].G = 0;
	}
	free(flag);
	// squeeze out filtered elements in b[2]
	for (k = 0; k < 2; ++k) {
		for (j = i = 0; j < n[k]; ++j) {
			bsw2hit_t *p = b[k]->hits + j;
			if (p->G) {
				if (i != j) b[k]->hits[i++] = *p;
				else ++i;
			}
		}
		b[k]->n = i;
	}
	// free
	free(z[0]); free(chain[0]);
}