예제 #1
0
파일: bamFilter.c 프로젝트: wwood/BamM
int bam_cigar2matches(int n_cigar, const uint32_t *cigar)
{
    int k, l;
    for (k = l = 0; k < n_cigar; ++k)
        if (bam_cigar_type(bam_cigar_op(cigar[k]))==3)
            l += bam_cigar_oplen(cigar[k]);
    return l;
}
예제 #2
0
int bam_cigar2ulen(int n_cigar, const uint32_t *cigar)
{
    int k, l;
    for (k = l = 0; k < n_cigar; ++k) {
        if (bam_cigar_type(bam_cigar_op(cigar[k])) &1) {
            l += bam_cigar_oplen(cigar[k]);
        } else if (bam_cigar_op(cigar[k]) == BAM_CHARD_CLIP) {
            l += bam_cigar_oplen(cigar[k]);
        }
    }
    return l;
}
예제 #3
0
void CigarParser::advance() {
    int type = bam_cigar_type(currentOp_);
    if (type & BAM_CONSUME_REFERENCE) {
        refPos_ += currentOpLen_;
    }
    if (type & BAM_CONSUME_QUERY) {
        readPos_ += currentOpLen_;
    }

    ++currentOpIdx_;
    assert(currentOpIdx_ < len_);
    currentOp_ = bam_cigar_op(cigar_[currentOpIdx_]);
    currentOpLen_ = bam_cigar_oplen(cigar_[currentOpIdx_]);
}
예제 #4
0
파일: realign_c_util.c 프로젝트: Lingrui/TS
unsigned seq_lens_from_bin_cigar (uint32_t* cigar_bin, unsigned cigar_bin_sz, unsigned* q_len, unsigned* r_len)
{
    unsigned oplen, constype;
    uint32_t *sent;
    *q_len = *r_len = 0;
    unsigned allen = 0;
    for (sent = cigar_bin + cigar_bin_sz; cigar_bin != sent; ++cigar_bin)
    {
        oplen = bam_cigar_oplen (*cigar_bin);
        constype = bam_cigar_type (*cigar_bin);
        if (constype & CONSUME_QRY) *q_len += oplen;
        if (constype & CONSUME_REF) *r_len += oplen;
        allen += oplen;
    }
    return allen;
}
예제 #5
0
// Returns 0 to indicate read should be output 1 otherwise
static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settings)
{
    if (settings->remove_B) bam_remove_B(b);
    if (settings->min_qlen > 0) {
        int k, qlen = 0;
        uint32_t *cigar = bam_get_cigar(b);
        for (k = 0; k < b->core.n_cigar; ++k)
            if ((bam_cigar_type(bam_cigar_op(cigar[k]))&1) || bam_cigar_op(cigar[k]) == BAM_CHARD_CLIP)
                qlen += bam_cigar_oplen(cigar[k]);
        if (qlen < settings->min_qlen) return 1;
    }
    if (b->core.qual < settings->min_mapQ || ((b->core.flag & settings->flag_on) != settings->flag_on) || (b->core.flag & settings->flag_off))
        return 1;
    if (settings->bed && (b->core.tid < 0 || !bed_overlap(settings->bed, h->target_name[b->core.tid], b->core.pos, bam_endpos(b))))
        return 1;
    if (settings->subsam_frac > 0.) {
        uint32_t k = __ac_Wang_hash(__ac_X31_hash_string(bam_get_qname(b)) ^ settings->subsam_seed);
        if ((double)(k&0xffffff) / 0x1000000 >= settings->subsam_frac) return 1;
    }
    if (settings->rghash) {
        uint8_t *s = bam_aux_get(b, "RG");
        if (s) {
            khint_t k = kh_get(rg, settings->rghash, (char*)(s + 1));
            if (k == kh_end(settings->rghash)) return 1;
        }
    }
    if (settings->library) {
        const char *p = bam_get_library((bam_hdr_t*)h, b);
        if (!p || strcmp(p, settings->library) != 0) return 1;
    }
    if (settings->remove_aux_len) {
        size_t i;
        for (i = 0; i < settings->remove_aux_len; ++i) {
            uint8_t *s = bam_aux_get(b, settings->remove_aux[i]);
            if (s) {
                bam_aux_del(b, s);
            }
        }
    }
    return 0;
}
예제 #6
0
파일: wiggle.cpp 프로젝트: jmarshall/RSEM
void add_bam_record_to_wiggle(const bam1_t *b, Wiggle& wiggle) {
    double w;

    if (no_fractional_weight) w = 1.0;
    else {
      uint8_t *p_tag = bam_aux_get(b, "ZW");
      if (p_tag == NULL) return;
      w = bam_aux2f(p_tag);
    }

    int pos = b->core.pos;
    uint32_t *p = bam_get_cigar(b);
    
    for (int i = 0; i < (int)b->core.n_cigar; ++i, ++p) {
      char op = bam_cigar_op(*p);
      int op_len = bam_cigar_oplen(*p);

      if (op == BAM_CMATCH)
	for (int j = 0; j < op_len; ++j, ++pos) wiggle.read_depth[pos] += w;
      else pos += ((bam_cigar_type(op) & 2) ? op_len : 0);
    }
}
예제 #7
0
파일: realign_c_util.c 프로젝트: Lingrui/TS
unsigned alignment_bounds_from_bin_cigar (uint32_t* cigar_bin, unsigned cigar_bin_sz, uint8_t forward, unsigned qry_len, unsigned* q_beg, unsigned* q_end, unsigned* r_beg, unsigned* r_end)
{
    unsigned oplen, op, constype;
    uint32_t *sent;
    *q_beg = *q_end = *r_beg = *r_end = 0;
    unsigned allen = 0;
    uint32_t tail = 0;
    for (sent = cigar_bin + cigar_bin_sz; cigar_bin != sent; ++cigar_bin)
    {
        oplen = bam_cigar_oplen (*cigar_bin);
        op = bam_cigar_op (*cigar_bin);
        constype = bam_cigar_type (*cigar_bin);

        if (tail && (op == BAM_CHARD_CLIP || op == BAM_CSOFT_CLIP)) // the aligned zone ended, clip started. Note that tail indels are not valid, so we do not assume they are possible..
            break;

        if (op != BAM_CHARD_CLIP && op != BAM_CSOFT_CLIP)
            tail = 1;

        if (constype & CONSUME_QRY)
        {
            if (!tail) *q_beg += oplen;
            *q_end += oplen;
        }
        if (constype & CONSUME_REF)
        {
            if (!tail) *r_beg += oplen;
            *r_end += oplen;
        }
        allen += oplen;
    }
    if (!forward)
    {
        unsigned tmp = qry_len - *q_beg;
        *q_beg = qry_len - *q_end;
        *q_end = tmp;
    }
    return allen;
}
예제 #8
0
/* Make a node containing an InDel
 *
 * b     The input read
 * cigar_op_num The operation number of the Insertion/Deletion
 *
 * returns a node, that must be either inserted into the linked list or
 * destroyed with destroyNode()
 */
InDel *makeNode(bam1_t *b, int cigar_op_num) {
    int i, op, oplen, quit = 0, type;
    int32_t start = b->core.pos-1;
    int32_t end;
    uint32_t *cigar = bam_get_cigar(b);
    InDel *node;

    for(i=0; i<cigar_op_num; i++) {
        oplen = bam_cigar_oplen(cigar[i]);
        type = bam_cigar_type(bam_cigar_op(cigar[i]));
        if(type & 2) start += oplen;
    }

    end = ++start;
    for(i=cigar_op_num; i<b->core.n_cigar; i++) {
        op = bam_cigar_op(cigar[i]);
        oplen = bam_cigar_oplen(cigar[i]);
        switch(op) {
        case 1: //I
        case 2: //D
            end = (end>start+oplen) ? end : start+oplen;
            break;
        default :
            quit = 1;
            break;
        }
        if(quit) break;
    }

    //Make the node
    node = calloc(1, sizeof(InDel));
    node->tid = b->core.tid;
    node->start = start;
    node->end = end;
    node->count = 1;

    return node;
}
예제 #9
0
파일: sw_align.c 프로젝트: Annak17/partis
static aln_v align_read(const kseq_t *read,
                        const kseq_v targets,
                        const align_config_t *conf)
{
    kseq_t *r;
    const int32_t read_len = read->seq.l;

    aln_v result;
    kv_init(result);
    kv_resize(aln_t, result, kv_size(targets));

    uint8_t *read_num = calloc(read_len, sizeof(uint8_t));

    for(size_t k = 0; k < read_len; ++k)
        read_num[k] = conf->table[(int)read->seq.s[k]];

    // Align to each target
    kswq_t *qry = NULL;
    for(size_t j = 0; j < kv_size(targets); j++) {
        // Encode target
        r = &kv_A(targets, j);
        uint8_t *ref_num = calloc(r->seq.l, sizeof(uint8_t));
        for(size_t k = 0; k < r->seq.l; ++k)
            ref_num[k] = conf->table[(int)r->seq.s[k]];

        aln_t aln;
        aln.target_idx = j;
        aln.loc = ksw_align(read_len, read_num,
                            r->seq.l, ref_num,
                            conf->m,
                            conf->mat,
                            conf->gap_o,
                            conf->gap_e,
                            KSW_XSTART,
                            &qry);
        ksw_global(aln.loc.qe - aln.loc.qb + 1,
                   &read_num[aln.loc.qb],
                   aln.loc.te - aln.loc.tb + 1,
                   &ref_num[aln.loc.tb],
                   conf->m,
                   conf->mat,
                   conf->gap_o,
                   conf->gap_e,
                   50, /* TODO: Magic number - band width */
                   &aln.n_cigar,
                   &aln.cigar);

        aln.nm = 0;
        size_t qi = aln.loc.qb, ri = aln.loc.tb;
        for(size_t k = 0; k < aln.n_cigar; k++) {
            const int32_t oplen = bam_cigar_oplen(aln.cigar[k]),
                          optype = bam_cigar_type(aln.cigar[k]);

            if(optype & 3) { // consumes both - check for mismatches
                for(size_t j = 0; j < oplen; j++) {
                    if(UNLIKELY(read_num[qi + j] != ref_num[ri + j]))
                        aln.nm++;
                }
            } else {
                aln.nm += oplen;
            }
            if(optype & 1) qi += oplen;
            if(optype & 2) ri += oplen;
        }

        kv_push(aln_t, result, aln);
        free(ref_num);
    }
    free(qry);
    free(read_num);
    ks_introsort(dec_score, kv_size(result), result.a);

    return result;
}
예제 #10
0
bool clipAdapters(
	libmaus2::bambam::BamAlignment & algn,
	libmaus2::autoarray::AutoArray<char> & R,
	libmaus2::autoarray::AutoArray<char> & Q,
	libmaus2::bambam::BamSeqEncodeTable const & seqenc,
	libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> & cigop,
	libmaus2::bambam::BamAlignment::D_array_type & T
)
{
	// a3,as
	uint64_t const asclip = algn.hasAux("as") ? algn.getAuxAsNumber<int>("as") : 0;
	uint64_t const a3clip = algn.hasAux("a3") ? algn.getAuxAsNumber<int>("a3") : 0;
	uint64_t const aclip = std::max(asclip,a3clip);
	bool     const reverse = algn.isReverse();

	if ( aclip )
	{
		uint64_t const len = algn.decodeRead(R);
		algn.decodeQual(Q);

		if ( (len - aclip) > 1 )
		{
			if ( algn.isMapped() )
			{
				uint32_t const numcigop = algn.getCigarOperations(cigop);

				if ( numcigop == cigop.size() )
					cigop.resize(numcigop+1);

				if ( reverse )
				{
				    std::reverse(cigop.begin(),cigop.begin()+numcigop);
				}


				// can't just add a HC to the cigar
				uint32_t index;
				uint32_t hardclip = 0;
				uint32_t cig_type;
				int32_t  left     = aclip;
				int32_t  repos    = 0;

				for ( index = numcigop - 1; index > 0; index-- )
				{
				    	cig_type = bam_cigar_type(cigop[index].first);

					if ( cig_type == 0 )
					{
				    	    	hardclip += cigop[index].second;
					}
					else
					{
					    	if ( cig_type & 1 )
						{
						    	if ( cigop[index].second < left )
							{
					    	    	    	left -= cigop[index].second;
						    	}
							else
							{
							    	break;
						    	}
					    	}

				    	    	if ( cig_type & 2 )
						{
				    		    	// move pos if reversed
						    	repos += cigop[index].second;
					    	}
					}
				}

				cig_type = bam_cigar_type(cigop[index].first);

				if ( cigop[index].second != left )
				{
				    	cigop[index++].second -= left;
				}

 				cigop[index] = libmaus2::bambam::cigar_operation(libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CHARD_CLIP, aclip + hardclip);

			    	if ( numcigop > index + 1 )
				    	cigop.resize(index + 1);

				if ( reverse )
				{
				    std::reverse(cigop.begin(),cigop.begin() + index + 1);

				    // account for the last possible pos move
    	    	    	    	    if ( cig_type & 2 )
				    	    repos += left;

				    if ( repos )
				    {
				    	// clipping has moved the pos point
					algn.putPos(algn.getPos() + repos);
				    }

				}

				algn.replaceCigarString(cigop.begin(),index + 1,T);
			}


		    	if ( !reverse )
			{
				algn.replaceSequence(seqenc,R.begin(),Q.begin(),len-aclip,T);
				algn.putAuxString("qs",std::string(R.begin()+(len-aclip),R.begin()+len));
				algn.putAuxString("qq",std::string(Q.begin()+(len-aclip),Q.begin()+len));
			}
			else
			{
				algn.replaceSequence(seqenc, (R.begin() + aclip), (Q.begin() + aclip), len - aclip, T);
				algn.putAuxString("qs", std::string(R.begin(), R.begin() + aclip));
				algn.putAuxString("qq", std::string(Q.begin(), Q.begin() + aclip));
			}
		}
	}

	return true;
}
예제 #11
0
static aln_t align_read_against_one(kseq_t *target, const int read_len,
                                    uint8_t *read_num, kswq_t **qry,
                                    const align_config_t *conf,
                                    const int min_score) {
  uint8_t *ref_num = calloc(target->seq.l, sizeof(uint8_t));
  for (size_t k = 0; k < target->seq.l; ++k)
    ref_num[k] = conf->table[(int)target->seq.s[k]];

  aln_t aln;
  aln.cigar = NULL;
  aln.loc = ksw_align(read_len, read_num, target->seq.l, ref_num, conf->m,
                      conf->mat, conf->gap_o, conf->gap_e, KSW_XSTART, qry);

  aln.target_name = target->name.s;

  if (aln.loc.score < min_score) {
    free(ref_num);
    return aln;
  }

  ksw_global(aln.loc.qe - aln.loc.qb + 1, &read_num[aln.loc.qb],
             aln.loc.te - aln.loc.tb + 1, &ref_num[aln.loc.tb], conf->m,
             conf->mat, conf->gap_o, conf->gap_e, conf->bandwidth, &aln.n_cigar,
             &aln.cigar);

  aln.nm = 0;
  size_t qi = aln.loc.qb, ri = aln.loc.tb;
  for (int k = 0; k < aln.n_cigar; k++) {
    const int32_t oplen = bam_cigar_oplen(aln.cigar[k]),
                  optype = bam_cigar_type(aln.cigar[k]);

    if (optype & 3) { // consumes both - check for mismatches
      for (int j = 0; j < oplen; j++) {
        if (UNLIKELY(read_num[qi + j] != ref_num[ri + j]))
          aln.nm++;
      }
    } else {
      aln.nm += oplen;
    }
    if (optype & 1)
      qi += oplen;
    if (optype & 2)
      ri += oplen;
  }

  free(ref_num);

  /* size_t cigar_len = aln.loc.qb; */
  /* for (int c = 0; c < aln.n_cigar; c++) { */
  /*   int32_t length = (0xfffffff0 & *(aln.cigar + c)) >> 4; */
  /*   cigar_len += length; */
  /* } */
  /* cigar_len += read_len - aln.loc.qe - 1; */
  /* if(cigar_len != (size_t)read_len) { */
  /*   /\* printf("[ig_align] Error: cigar length (score %d) not equal to read length for XXX (target %s): %zu vs %d\n", aln.loc.score, target->name.s, cigar_len, read_len); *\/ */
  /*   // NOTE: */
  /*   //   It is *really* *f*****g* *scary* that it's spitting out cigars that are not the same length as the query sequence. */
  /*   //   Nonetheless, fixing it seems to involve delving into the depths of ksw_align() and ksw_global(), which would be very time consuming, and the length discrepancy seems to ony appear in very poor matches. */
  /*   //   I.e., poor enough that we will subsequently ignore them in partis/python/waterer.py, so it seems to not screw anything up downstream to just set the length-discrepant matches' scores to zero, such that ig-sw doesn't write them to its sam output. */
  /*   //   Note also that it is not always the lowest- or highest-scoring matches that have discrepant lengths (i.e. setting their scores to zero promotes matches swith poorer scores, but which do not have discrepant lengths. */
  /*   /\* aln.loc.score = 0; *\/ */
  /*   aln.cigar = NULL; */
  /* } */

  return aln;
}