Пример #1
0
extern "C" void bwa_seed2genome_pos(uint64_t sa_pos, uint64_t *contig_id, uint64_t *contig_pos, bwa_seq_t *seq)
{
	bwa_seq_t *p=seq ;

	p->sa = sa_pos ;
	p->c1 = 1 ;
	p->type=BWA_TYPE_UNIQUE ;
	p->cigar=NULL ;
	p->strand=0 ;
	
	mybwa_cal_pac_pos_core(bwt_bwt[0], bwt_bwt[1], p, 0, 0); 
				
	uint64_t len = pos_end(p) - p->pos; 
	int seq_id=-1 ;
	
	bns_coor_pac2real(bwt_bns, p->pos, len, &seq_id) ;
	uint64_t pos = (int)(p->pos - bwt_bns->anns[seq_id].offset) ;
	
	if (false && sa_pos==461542)
	{
		fprintf(stdout, "seq_id=%i, pos=%lu, n_aln=%i, multi=%i, strand=%i\n", seq_id, pos, p->n_aln, p->n_multi, p->strand) ; 

		p->sa = 461542;//461970 ;
		p->c1 = 1 ;
		p->type=BWA_TYPE_UNIQUE ;
		p->cigar=NULL ;
		p->strand=1 ;
		
		mybwa_cal_pac_pos_core(bwt_bwt[0], bwt_bwt[1], p, 0, 0); 
		
		uint64_t len = pos_end(p) - p->pos; 
		int seq_id=-1 ;
		
		bns_coor_pac2real(bwt_bns, p->pos, len, &seq_id) ;
		uint64_t pos = (int)(p->pos - bwt_bns->anns[seq_id].offset) ;

		fprintf(stdout, "+++ seq_id=%i, pos=%lu, n_aln=%i, multi=%i, strand=%i\n", seq_id, pos, p->n_aln, p->n_multi, p->strand) ;
		//fprintf(stdout, "bwt->seq_len=%lld", (long long int)bwt_bwt[0]->seq_len) ;
		//fprintf(stdout, "reverse_bwt->seq_len=%lld", (long long int)bwt_bwt[1]->seq_len) ;
		
		//bwa_seq_t *a=NULL ;
		//fprintf(stdout, "error%lld", (long long int)a->sa) ;
	}

	*contig_id=seq_id ;
	*contig_pos=pos ;
}
Пример #2
0
Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) {
  // Calculate the local coordinate and local alignment.
  bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr);
  bwa_refine_gapped(bns, 1, sequence, reference, NULL);

  // Copy the local alignment data into the alignment object.
  Alignment alignment;

  // Populate basic path info
  alignment.edit_distance = sequence->nm;
  alignment.num_mismatches = sequence->n_mm;
  alignment.num_gap_opens = sequence->n_gapo;
  alignment.num_gap_extensions = sequence->n_gape;
  alignment.num_best = sequence->c1;
  alignment.num_second_best = sequence->c2;
  
  // Final alignment position.
  alignment.type = sequence->type;
  bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig);
  alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1;
  alignment.negative_strand = sequence->strand;
  alignment.mapping_quality = sequence->mapQ;
  
  // Cigar step.
  alignment.cigar = NULL;
  if(sequence->cigar) {
    alignment.cigar = new uint16_t[sequence->n_cigar];
    memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t));
  }
  alignment.n_cigar = sequence->n_cigar;

  // MD tag with a better breakdown of differences in the cigar
  alignment.md = strdup(sequence->md);
  delete[] sequence->md;
  sequence->md = NULL;

  return alignment;
}
Пример #3
0
void Gmsh2GeoIO::loadMeshAsGeometry (std::string & fname, GeoLib::GEOObjects* geo)
{
	// open file
	std::ifstream ins (fname.c_str());
	if (!ins)
	{
		std::cout << "could not open file " << fname << std::endl;
		return;
	}

	std::string line;
	// read gmsh header
	getline (ins, line); // $MeshFormat
	getline (ins, line);
	getline (ins, line); // $EndMeshFormat

	// read nodes tag
	getline (ins, line);
	// read number of nodes
	getline (ins, line);
	const size_t n_pnts (str2number<size_t>(line));
	std::vector<GeoLib::Point*>* pnts (new std::vector<GeoLib::Point*>);
	for (size_t k(0); k < n_pnts; k++)
	{
		getline (ins, line);
		// parse id
		size_t pos_beg(0);
		size_t pos_end (line.find(" "));
		// the sub string line.substr(pos_beg, pos_end-pos_beg) represents the id
		// parse x coordinate
		pos_beg = pos_end + 1;
		pos_end = line.find(" ", pos_beg);
		double x (str2number<double>(line.substr(pos_beg, pos_end - pos_beg)));
		// parse y coordinate
		pos_beg = pos_end + 1;
		pos_end = line.find(" ", pos_beg);
		double y (str2number<double>(line.substr(pos_beg, pos_end - pos_beg)));
		// parse z coordinate
		pos_beg = pos_end + 1;
		pos_end = line.find("\n", pos_beg);
		double z (str2number<double>(line.substr(pos_beg, pos_end - pos_beg)));

		pnts->push_back (new GeoLib::Point (x,y,z));
	}
	// read end nodes tag
	getline (ins, line);

	geo->addPointVec (pnts, fname);

	std::vector<size_t> const& pnt_id_map (geo->getPointVecObj(fname)->getIDMap());
	// read element tag
	getline (ins, line);
	// read number of elements
	getline (ins, line);
	const size_t n_elements (str2number<size_t>(line));
	GeoLib::Surface* sfc (new GeoLib::Surface (*pnts));
	for (size_t k(0); k < n_elements; k++)
	{
		getline (ins, line);
		// parse id
		size_t pos_beg(0);
		size_t pos_end (line.find(" "));
		// the sub string line.substr(pos_beg, pos_end-pos_beg) represents the id
		// parse element type
		pos_beg = pos_end + 1;
		pos_end = line.find(" ", pos_beg);
		size_t ele_type (str2number<size_t>(line.substr(pos_beg, pos_end - pos_beg)));
		if (ele_type == 2) // read 3 node triangle
		{ // parse number of tags
			pos_beg = pos_end + 1;
			pos_end = line.find(" ", pos_beg);
			const size_t n_tags (str2number<size_t>(line.substr(pos_beg,
			                                                    pos_end - pos_beg)));
			// (over) read tags
			for (size_t j(0); j < n_tags; j++)
			{
				pos_beg = pos_end + 1;
				pos_end = line.find(" ", pos_beg);
			}
			// parse first id of triangle
			pos_beg = pos_end + 1;
			pos_end = line.find(" ", pos_beg);
			const size_t id0 (str2number<size_t>(line.substr(pos_beg,
			                                                 pos_end - pos_beg)) - 1); // shift -1!
			// parse second id of triangle
			pos_beg = pos_end + 1;
			pos_end = line.find(" ", pos_beg);
			const size_t id1 (str2number<size_t>(line.substr(pos_beg,
			                                                 pos_end - pos_beg)) - 1); // shift -1!
			// parse third id of triangle
			pos_beg = pos_end + 1;
			pos_end = line.find(" ", pos_beg);
			const size_t id2 (str2number<size_t>(line.substr(pos_beg,
			                                                 pos_end - pos_beg)) - 1); // shift -1!
			sfc->addTriangle (pnt_id_map[id0], pnt_id_map[id1], pnt_id_map[id2]);
		}
	}
	// read end element tag
	getline (ins, line);

	std::vector<GeoLib::Surface*>* sfcs (new std::vector<GeoLib::Surface*>);
	sfcs->push_back(sfc);
	geo->addSurfaceVec (sfcs, fname);
}
Пример #4
0
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2, const char *bwa_rg_id)
{
	int j;
	//if (strcmp (p->name, "HWUSI-EAS1600:WT2_250_read_1:11_30_09:3:1:83:1066#0") == 0)
	//{
	//	fprintf (stderr, "found %s\n", p->name);
	//}
	if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) {
		int seqid, nn, am = 0, flag = p->extra_flag;
		char XT;

		if (p->type == BWA_TYPE_NO_MATCH) {
			p->pos = mate->pos;
			p->strand = mate->strand;
			flag |= SAM_FSU;
			j = 1;
		} else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment

		// get seqid
		nn = bns_coor_pac2real(bns, p->pos, j, &seqid);
		if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len)
			flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences

		// update flag and print it
		if (p->strand) flag |= SAM_FSR;
		if (mate) {
			if (mate->type != BWA_TYPE_NO_MATCH) {
				if (mate->strand) flag |= SAM_FMR;
			} else flag |= SAM_FMU;
		}
		printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name);
		printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ);

		// print CIGAR
		if (p->cigar) {
			for (j = 0; j != p->n_cigar; ++j)
				printf("%d%c", __cigar_len(p->cigar[j]), "MIDSN"[__cigar_op(p->cigar[j])]);
		} else if (p->type == BWA_TYPE_NO_MATCH) printf("*");
		else printf("%dM", p->len);

		// print mate coordinate
		if (mate && mate->type != BWA_TYPE_NO_MATCH) {
			int m_seqid, m_is_N;
			long long isize;
			am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality
			// redundant calculation here, but should not matter too much
			m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid);
			printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name);
			isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0;
			if (p->type == BWA_TYPE_NO_MATCH) isize = 0;
			printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize);
		} else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1));
		else printf("\t*\t0\t0\t");

		// print sequence and quality
		if (p->strand == 0)
			for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]);
		else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]);
		putchar('\t');
		if (p->qual) {
			if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
			printf("%s", p->qual);
		} else printf("*");

		if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id);
		if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
		if (p->type != BWA_TYPE_NO_MATCH) {
			int i;
			// calculate XT tag
			XT = "NURM"[p->type];
			if (nn > 10) XT = 'N';
			// print tags
			printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm);
			// print XS tag, to be compatible with Cufflinks
			if (p->sense_strand != 2 ) printf("\tXS:A:%c", p->sense_strand ? '-':'+' );
			else printf("\tXS:A:.");
			if (nn) printf("\tXN:i:%d", nn);
			if (mate) printf("\tSM:i:%lu\tAM:i:%d", p->seQ, am);
			if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment
				printf("\tX0:i:%lu", p->c1);
				if (p->c1 <= max_top2) printf("\tX1:i:%lu", p->c2);
			}
			printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo_t + p->n_gapo_q, p->n_gapo_t+p->n_gape_t+p->n_gapo_q+p->n_gape_q);
			if (p->md) printf("\tMD:Z:%s", p->md);
			// print multiple hits
			if (p->n_multi) {
				bool header_printed = 0;
				for (i = 0; i < p->n_multi; ++i) {
					bwt_multi1_t *q = p->multi + i;
					j = pos_end_multi(q, p->len) - q->pos;
					nn = bns_coor_pac2real(bns, q->pos, j, &seqid);
					if(pos_end_multi(q, p->len) - bns->anns[seqid].offset > bns->anns[seqid].len) continue; //the alignment bridges adjacent sequences (chroms)
//TODO: need to avoid this at the first place in the junction discovery step, but this should be rare for mm or human
					if (! header_printed) {
						header_printed = 1;
						printf("\tXA:Z:");
					}
					int k;
					printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
						   (int)(q->pos - bns->anns[seqid].offset + 1));
					if (q->cigar) {
						for (k = 0; k < q->n_cigar; ++k)
							printf("%d%c", __cigar_len(q->cigar[k]), "MIDSN"[__cigar_op(q->cigar[k])]);
					} else printf("%dM", p->len);
					printf(",%d", q->nm); //q->gap_t + q->gap_q + q->mm);
					if (q->sense_strand != 2) printf(",%c;", q->sense_strand? '-' : '+' );
					else printf(",.;");
				}
			}
		}
		putchar('\n');
	} else { // this read has no match
		ubyte_t *s = p->strand? p->rseq : p->seq;
		int flag = p->extra_flag | SAM_FSU;
		if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
		printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
		for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
		putchar('\t');
		if (p->qual) {
			if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
			printf("%s", p->qual);
		} else printf("*");
		if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
		putchar('\n');
	}
}
Пример #5
0
static int64_t pos_5(const bwa_seq_t *p)
{
	if (p->type != BWA_TYPE_NO_MATCH)
		return p->strand? pos_end(p) : p->pos;
	return -1;
}