extern "C" void bwa_seed2genome_pos(uint64_t sa_pos, uint64_t *contig_id, uint64_t *contig_pos, bwa_seq_t *seq) { bwa_seq_t *p=seq ; p->sa = sa_pos ; p->c1 = 1 ; p->type=BWA_TYPE_UNIQUE ; p->cigar=NULL ; p->strand=0 ; mybwa_cal_pac_pos_core(bwt_bwt[0], bwt_bwt[1], p, 0, 0); uint64_t len = pos_end(p) - p->pos; int seq_id=-1 ; bns_coor_pac2real(bwt_bns, p->pos, len, &seq_id) ; uint64_t pos = (int)(p->pos - bwt_bns->anns[seq_id].offset) ; if (false && sa_pos==461542) { fprintf(stdout, "seq_id=%i, pos=%lu, n_aln=%i, multi=%i, strand=%i\n", seq_id, pos, p->n_aln, p->n_multi, p->strand) ; p->sa = 461542;//461970 ; p->c1 = 1 ; p->type=BWA_TYPE_UNIQUE ; p->cigar=NULL ; p->strand=1 ; mybwa_cal_pac_pos_core(bwt_bwt[0], bwt_bwt[1], p, 0, 0); uint64_t len = pos_end(p) - p->pos; int seq_id=-1 ; bns_coor_pac2real(bwt_bns, p->pos, len, &seq_id) ; uint64_t pos = (int)(p->pos - bwt_bns->anns[seq_id].offset) ; fprintf(stdout, "+++ seq_id=%i, pos=%lu, n_aln=%i, multi=%i, strand=%i\n", seq_id, pos, p->n_aln, p->n_multi, p->strand) ; //fprintf(stdout, "bwt->seq_len=%lld", (long long int)bwt_bwt[0]->seq_len) ; //fprintf(stdout, "reverse_bwt->seq_len=%lld", (long long int)bwt_bwt[1]->seq_len) ; //bwa_seq_t *a=NULL ; //fprintf(stdout, "error%lld", (long long int)a->sa) ; } *contig_id=seq_id ; *contig_pos=pos ; }
Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { // Calculate the local coordinate and local alignment. bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); bwa_refine_gapped(bns, 1, sequence, reference, NULL); // Copy the local alignment data into the alignment object. Alignment alignment; // Populate basic path info alignment.edit_distance = sequence->nm; alignment.num_mismatches = sequence->n_mm; alignment.num_gap_opens = sequence->n_gapo; alignment.num_gap_extensions = sequence->n_gape; alignment.num_best = sequence->c1; alignment.num_second_best = sequence->c2; // Final alignment position. alignment.type = sequence->type; bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; alignment.negative_strand = sequence->strand; alignment.mapping_quality = sequence->mapQ; // Cigar step. alignment.cigar = NULL; if(sequence->cigar) { alignment.cigar = new uint16_t[sequence->n_cigar]; memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); } alignment.n_cigar = sequence->n_cigar; // MD tag with a better breakdown of differences in the cigar alignment.md = strdup(sequence->md); delete[] sequence->md; sequence->md = NULL; return alignment; }
void Gmsh2GeoIO::loadMeshAsGeometry (std::string & fname, GeoLib::GEOObjects* geo) { // open file std::ifstream ins (fname.c_str()); if (!ins) { std::cout << "could not open file " << fname << std::endl; return; } std::string line; // read gmsh header getline (ins, line); // $MeshFormat getline (ins, line); getline (ins, line); // $EndMeshFormat // read nodes tag getline (ins, line); // read number of nodes getline (ins, line); const size_t n_pnts (str2number<size_t>(line)); std::vector<GeoLib::Point*>* pnts (new std::vector<GeoLib::Point*>); for (size_t k(0); k < n_pnts; k++) { getline (ins, line); // parse id size_t pos_beg(0); size_t pos_end (line.find(" ")); // the sub string line.substr(pos_beg, pos_end-pos_beg) represents the id // parse x coordinate pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); double x (str2number<double>(line.substr(pos_beg, pos_end - pos_beg))); // parse y coordinate pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); double y (str2number<double>(line.substr(pos_beg, pos_end - pos_beg))); // parse z coordinate pos_beg = pos_end + 1; pos_end = line.find("\n", pos_beg); double z (str2number<double>(line.substr(pos_beg, pos_end - pos_beg))); pnts->push_back (new GeoLib::Point (x,y,z)); } // read end nodes tag getline (ins, line); geo->addPointVec (pnts, fname); std::vector<size_t> const& pnt_id_map (geo->getPointVecObj(fname)->getIDMap()); // read element tag getline (ins, line); // read number of elements getline (ins, line); const size_t n_elements (str2number<size_t>(line)); GeoLib::Surface* sfc (new GeoLib::Surface (*pnts)); for (size_t k(0); k < n_elements; k++) { getline (ins, line); // parse id size_t pos_beg(0); size_t pos_end (line.find(" ")); // the sub string line.substr(pos_beg, pos_end-pos_beg) represents the id // parse element type pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); size_t ele_type (str2number<size_t>(line.substr(pos_beg, pos_end - pos_beg))); if (ele_type == 2) // read 3 node triangle { // parse number of tags pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); const size_t n_tags (str2number<size_t>(line.substr(pos_beg, pos_end - pos_beg))); // (over) read tags for (size_t j(0); j < n_tags; j++) { pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); } // parse first id of triangle pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); const size_t id0 (str2number<size_t>(line.substr(pos_beg, pos_end - pos_beg)) - 1); // shift -1! // parse second id of triangle pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); const size_t id1 (str2number<size_t>(line.substr(pos_beg, pos_end - pos_beg)) - 1); // shift -1! // parse third id of triangle pos_beg = pos_end + 1; pos_end = line.find(" ", pos_beg); const size_t id2 (str2number<size_t>(line.substr(pos_beg, pos_end - pos_beg)) - 1); // shift -1! sfc->addTriangle (pnt_id_map[id0], pnt_id_map[id1], pnt_id_map[id2]); } } // read end element tag getline (ins, line); std::vector<GeoLib::Surface*>* sfcs (new std::vector<GeoLib::Surface*>); sfcs->push_back(sfc); geo->addSurfaceVec (sfcs, fname); }
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2, const char *bwa_rg_id) { int j; //if (strcmp (p->name, "HWUSI-EAS1600:WT2_250_read_1:11_30_09:3:1:83:1066#0") == 0) //{ // fprintf (stderr, "found %s\n", p->name); //} if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) { int seqid, nn, am = 0, flag = p->extra_flag; char XT; if (p->type == BWA_TYPE_NO_MATCH) { p->pos = mate->pos; p->strand = mate->strand; flag |= SAM_FSU; j = 1; } else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment // get seqid nn = bns_coor_pac2real(bns, p->pos, j, &seqid); if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len) flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences // update flag and print it if (p->strand) flag |= SAM_FSR; if (mate) { if (mate->type != BWA_TYPE_NO_MATCH) { if (mate->strand) flag |= SAM_FMR; } else flag |= SAM_FMU; } printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name); printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ); // print CIGAR if (p->cigar) { for (j = 0; j != p->n_cigar; ++j) printf("%d%c", __cigar_len(p->cigar[j]), "MIDSN"[__cigar_op(p->cigar[j])]); } else if (p->type == BWA_TYPE_NO_MATCH) printf("*"); else printf("%dM", p->len); // print mate coordinate if (mate && mate->type != BWA_TYPE_NO_MATCH) { int m_seqid, m_is_N; long long isize; am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality // redundant calculation here, but should not matter too much m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid); printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name); isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0; if (p->type == BWA_TYPE_NO_MATCH) isize = 0; printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize); } else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1)); else printf("\t*\t0\t0\t"); // print sequence and quality if (p->strand == 0) for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]); else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]); putchar('\t'); if (p->qual) { if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality printf("%s", p->qual); } else printf("*"); if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id); if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); if (p->type != BWA_TYPE_NO_MATCH) { int i; // calculate XT tag XT = "NURM"[p->type]; if (nn > 10) XT = 'N'; // print tags printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm); // print XS tag, to be compatible with Cufflinks if (p->sense_strand != 2 ) printf("\tXS:A:%c", p->sense_strand ? '-':'+' ); else printf("\tXS:A:."); if (nn) printf("\tXN:i:%d", nn); if (mate) printf("\tSM:i:%lu\tAM:i:%d", p->seQ, am); if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment printf("\tX0:i:%lu", p->c1); if (p->c1 <= max_top2) printf("\tX1:i:%lu", p->c2); } printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo_t + p->n_gapo_q, p->n_gapo_t+p->n_gape_t+p->n_gapo_q+p->n_gape_q); if (p->md) printf("\tMD:Z:%s", p->md); // print multiple hits if (p->n_multi) { bool header_printed = 0; for (i = 0; i < p->n_multi; ++i) { bwt_multi1_t *q = p->multi + i; j = pos_end_multi(q, p->len) - q->pos; nn = bns_coor_pac2real(bns, q->pos, j, &seqid); if(pos_end_multi(q, p->len) - bns->anns[seqid].offset > bns->anns[seqid].len) continue; //the alignment bridges adjacent sequences (chroms) //TODO: need to avoid this at the first place in the junction discovery step, but this should be rare for mm or human if (! header_printed) { header_printed = 1; printf("\tXA:Z:"); } int k; printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+', (int)(q->pos - bns->anns[seqid].offset + 1)); if (q->cigar) { for (k = 0; k < q->n_cigar; ++k) printf("%d%c", __cigar_len(q->cigar[k]), "MIDSN"[__cigar_op(q->cigar[k])]); } else printf("%dM", p->len); printf(",%d", q->nm); //q->gap_t + q->gap_q + q->mm); if (q->sense_strand != 2) printf(",%c;", q->sense_strand? '-' : '+' ); else printf(",.;"); } } } putchar('\n'); } else { // this read has no match ubyte_t *s = p->strand? p->rseq : p->seq; int flag = p->extra_flag | SAM_FSU; if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]); putchar('\t'); if (p->qual) { if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality printf("%s", p->qual); } else printf("*"); if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); putchar('\n'); } }
static int64_t pos_5(const bwa_seq_t *p) { if (p->type != BWA_TYPE_NO_MATCH) return p->strand? pos_end(p) : p->pos; return -1; }