示例#1
0
void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str)
{
	bam1_t *swap;
	int i, end;
	uint32_t *cigar;
	str->l = 0;
	if (b1->core.tid != b2->core.tid || b1->core.tid < 0) return; // coordinateless or not on the same chr; skip
	if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate
	kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
	kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
	for (i = 0, cigar = bam1_cigar(b1); i < b1->core.n_cigar; ++i) {
		kputw(bam_cigar_oplen(cigar[i]), str);
		kputc(bam_cigar_opchr(cigar[i]), str);
	}
	end = bam_calend(&b1->core, cigar);
	kputw(b2->core.pos - end, str);
	kputc('T', str);
	kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
	kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
	for (i = 0, cigar = bam1_cigar(b2); i < b2->core.n_cigar; ++i) {
		kputw(bam_cigar_oplen(cigar[i]), str);
		kputc(bam_cigar_opchr(cigar[i]), str);
	}
	bam_aux_append(b1, "CT", 'Z', str->l+1, (uint8_t*)str->s); 
}
示例#2
0
/*
 * This function calculates ct tag for two bams, it assumes they are from the same template and
 * writes the tag to the first read in position terms.
 */
static void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str)
{
    bam1_t *swap;
    int i, end;
    uint32_t *cigar;
    str->l = 0;
    if (b1->core.tid != b2->core.tid || b1->core.tid < 0 || b1->core.pos < 0 || b2->core.pos < 0 || b1->core.flag&BAM_FUNMAP || b2->core.flag&BAM_FUNMAP) return; // coordinateless or not on the same chr; skip
    if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate
    kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
    kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
    for (i = 0, cigar = bam_get_cigar(b1); i < b1->core.n_cigar; ++i) {
        kputw(bam_cigar_oplen(cigar[i]), str);
        kputc(bam_cigar_opchr(cigar[i]), str);
    }
    end = bam_endpos(b1);
    kputw(b2->core.pos - end, str);
    kputc('T', str);
    kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
    kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
    for (i = 0, cigar = bam_get_cigar(b2); i < b2->core.n_cigar; ++i) {
        kputw(bam_cigar_oplen(cigar[i]), str);
        kputc(bam_cigar_opchr(cigar[i]), str);
    }

    uint8_t* data;
    if ((data = bam_aux_get(b1,"ct")) != NULL) bam_aux_del(b1, data);
    if ((data = bam_aux_get(b2,"ct")) != NULL) bam_aux_del(b2, data);

    bam_aux_append(b1, "ct", 'Z', str->l+1, (uint8_t*)str->s);
}
示例#3
0
文件: filter.c 项目: ernfrid/lumpy-sv
void calcAlnOffsets(uint32_t *cigar,
                    uint32_t n_cigar,
                    uint32_t sa_pos,
                    char sa_strand,
                    struct line *l)
{
    l->raLen = 0;
    l->qaLen = 0;
    l->sclip = 0;
    l->eclip = 0;
    l->SQO = 0;
    l->EQO = 0;
    bool first = true;

    uint32_t k;
    for (k = 0; k < n_cigar; ++k) 
    {
        uint32_t opLen = bam_cigar_oplen(cigar[k]);
        char opCode = bam_cigar_opchr(cigar[k]);
        if      (opCode == 'M' || opCode == '=' || opCode == 'X')
        {
            l->raLen += opLen;
            l->qaLen += opLen;
            first = false;
        }
        else if (opCode == 'S' || opCode == 'H')
        {
            if (first) l->sclip += opLen;
            else       l->eclip += opLen;
        }
        else if (opCode == 'D' || opCode == 'N')
        {
            l->raLen += opLen;
        }
        else if (opCode == 'I')
        {
            l->qaLen += opLen;
        }
    }
    //*rapos = str2pos(line->fields[POS]);
    l->rapos = sa_pos;
    if (sa_strand == '+')
    {
        l->pos = l->rapos - l->sclip;
        l->SQO = l->sclip;
        l->EQO = l->sclip + l->qaLen - 1;
    }
    else
    {
        l->pos = l->rapos + l->raLen + l->eclip - 1;
        l->SQO = l->eclip;
        l->EQO = l->eclip + l->qaLen - 1;
    }
}
示例#4
0
// Adapted from samtools/bam.c
int32_t b2g_bam_clippedlength(bam1_t *b) {
  const bam1_core_t c = b->core;
  const uint32_t *cigar = bam1_cigar(b);
  uint32_t k;
  int32_t l = 0;
  for (k = 0; k < c.n_cigar; ++k) {
    if ('S' == bam_cigar_opchr(bam_cigar_op(cigar[k]))) {
      l += bam_cigar_oplen(cigar[k]);
    }
  }
  return l;
}
示例#5
0
文件: hts_utils.cpp 项目: pkrusche/vt
/**
 * Gets the cigar from a BAM record
 */
void bam_get_cigar_string(bam1_t *s, kstring_t *cigar_string)
{
    cigar_string->l=0;
    int32_t n_cigar_op = bam_get_n_cigar_op(s);
    if (n_cigar_op)
    {
        uint32_t *cigar = bam_get_cigar(s);
        for (int32_t i = 0; i < n_cigar_op; ++i)
        {
            kputw(bam_cigar_oplen(cigar[i]), cigar_string);
            kputc(bam_cigar_opchr(cigar[i]), cigar_string);
        }
    }
}
示例#6
0
static int32_t unclipped_start(bam1_t *b) {
    uint32_t *cigar = bam_get_cigar(b);
    int32_t clipped = 0;
    uint32_t i;

    for (i = 0; i < b->core.n_cigar; i++) {
        char c = bam_cigar_opchr(cigar[i]);

        if (c == 'S' || c == 'H') { // clips
            clipped += bam_cigar_oplen(cigar[i]);
        } else {
            break;
        }
    }

    return b->core.pos - clipped + 1;
}
示例#7
0
// Returns 0 on success, -1 on failure.
static int bam_format_cigar(const bam1_t* b, kstring_t* str)
{
    // An empty cigar is a special case return "*" rather than ""
    if (b->core.n_cigar == 0) {
        return (kputc('*', str) == EOF) ? -1 : 0;
    }

    const uint32_t *cigar = bam_get_cigar(b);
    uint32_t i;

    for (i = 0; i < b->core.n_cigar; ++i) {
        if (kputw(bam_cigar_oplen(cigar[i]), str) == EOF) return -1;
        if (kputc(bam_cigar_opchr(cigar[i]), str) == EOF) return -1;
    }

    return 0;
}
示例#8
0
static int32_t unclipped_end(bam1_t *b) {
    uint32_t *cigar = bam_get_cigar(b);
    int32_t end_pos, clipped = 0;
    int32_t i;

    end_pos = bam_endpos(b);

    // now get the clipped end bases (if any)
    // if we get to the beginning of the cigar string
    // without hitting a non-clip then the results are meaningless
    for (i = b->core.n_cigar - 1; i >= 0; i--) {
        char c = bam_cigar_opchr(cigar[i]);

        if (c == 'S' || c == 'H') { // clips
            clipped += bam_cigar_oplen(cigar[i]);
        } else {
            break;
        }
    }

    return end_pos + clipped;
}
示例#9
0
string bam_cigarString (bam1_t *b) {//output CIGAR string
//    kstring_t strK;
//    kstring_t *str=&strK;
    const bam1_core_t *c = &b->core;

    string cigarString("");
    if ( c->n_cigar > 0 ) {
      uint32_t *cigar = bam_get_cigar(b);
      for (int i = 0; i < c->n_cigar; ++i) {
        cigarString+=to_string((uint)bam_cigar_oplen(cigar[i]))+bam_cigar_opchr(cigar[i]);
      };
    };

    
//	if (c->n_cigar) { // cigar
//		for (int i = 0; i < c->n_cigar; ++i) {
//			kputw(bam_cigar_oplen(cigar[i]), str);
//			kputc(bam_cigar_opchr(cigar[i]), str);
//		}
//	} else kputc('*', str);
//    
//    string cigarString (str->s,str->l);
    return cigarString;
};
示例#10
0
int do_grep() {
#ifdef DEBUGa
	printf("[!]do_grep\n");
#endif
	BamInfo_t *pbam;
	kh_cstr_t BamID;
	khiter_t ki, bami;
	kstring_t ks1 = { 0, 0, NULL };
	kstring_t ks2 = { 0, 0, NULL };
	kstring_t ks3 = { 0, 0, NULL };

	samFile *in;
	bam_hdr_t *h;
	hts_idx_t *idx;
	bam1_t *b, *d, *d2, *bR1, *bR2, *bR3;
	bR1 = bam_init1(); bR2 = bam_init1(); bR3 = bam_init1();
	//htsFile *out;
	//hts_opt *in_opts = NULL, *out_opts = NULL;
	int r = 0, exit_code = 0;

	kvec_t(bam1_t) R1, R2, RV;
	pierCluster_t *pierCluster;
	//samdat_t tmp_samdat;
	FILE *fs = fopen("./test.txt","w");

	for (bami = kh_begin(bamNFOp); bami != kh_end(bamNFOp); ++bami) {
		//printf(">[%d]:\n",bami);
		if (kh_exist(bamNFOp, bami)) {
			kv_init(R1); kv_init(R2); kv_init(RV);
			//tmp_samdat = (const samdat_t){ 0 };
			//memset(&tmp_samdat,0,sizeof(samdat_t));
			//printf("-[%d]:\n",bami);
			BamID = kh_key(bamNFOp, bami);
			pbam = &kh_value(bamNFOp, bami);
			fprintf(stderr, "%u [%s]=%s\t%u %u\n",bami,BamID,pbam->fileName,pbam->insertSize,pbam->SD);

			in = sam_open(pbam->fileName, "r");
			if (in == NULL) {
				fprintf(stderr, "[x]Error opening \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			}
			h = sam_hdr_read(in);
/*			out = hts_open("-", "w");
			if (out == NULL) {
				fprintf(stderr, "[x]Error opening standard output\n");
				return EXIT_FAILURE;
			}
			if (sam_hdr_write(out, h) < 0) {
				fprintf(stderr, "[!]Error writing output header.\n");
				exit_code = 1;
			}
*/
			int8_t *ChrIsHum;
			if (h == NULL) {
				fprintf(stderr, "[x]Couldn't read header for \"%s\"\n", pbam->fileName);
				return EXIT_FAILURE;
			} else {
				ChrIsHum = malloc(h->n_targets * sizeof(int8_t));
				for (int32_t i=0; i < h->n_targets; ++i) {
					//ChrIsHum[i] = -1;
					ki = kh_get(chrNFO, chrNFOp, h->target_name[i]);
					if (ki == kh_end(chrNFOp)) {
						errx(4,"[x]Cannot find ChrID for [%s] !",h->target_name[i]);
					} else {
						ChrInfo_t * tmp = &kh_value(chrNFOp, ki);
						ChrIsHum[i] = tmp->isHum;
						//printf(">>> %d Chr:%s %d\n",i,h->target_name[i],ChrIsHum[i]);
					}
				}
			}
			h->ignore_sam_err = 0;
			b = bam_init1();
			d = bam_init1();
			d2 = bam_init1();
			if ((idx = sam_index_load(in, pbam->fileName)) == 0) {
				fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__);
				return 1;
			}
			pierCluster = sam_plp_init();
			while ((r = sam_read1(in, h, b)) >= 0) {
				int8_t flag = false;
				const bam1_core_t *c = &b->core;
				if (c->flag & BAM_FSECONDARY) continue;
				if (c->n_cigar) {
					uint32_t *cigar = bam_get_cigar(b);
					for (int i = 0; i < c->n_cigar; ++i) {
						if (bam_cigar_opchr(cigar[i])=='S') {	// soft clipping
							if ( bam_cigar_oplen(cigar[i]) >= myConfig.minGrepSlen ) {
								flag = true;
							}
						}
					}
				}
				if (flag && ChrIsHum[c->tid]) {	// Now, skip Virus items.
					//bam_copy1(bR1, b);
					flag = 0;	// recycle
					//int enoughMapQ = 0;
					//kstring_t ks = { 0, 0, NULL };
					/*if (sam_format1(h, b, &ks1) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					} else*/ if ((c->mtid == c->tid && ChrIsHum[c->tid]) || (ChrIsHum[c->tid] ^ ChrIsHum[c->mtid])) {	// Only grep those mapped on same Human ChrID, or diff species/一方在病毒的情况.
						//printf(">[%s]\n",ks_str(&ks1));
						flag |= 1;
						//tmp_samdat.b = bam_dup1(b);
						//kv_push(samdat_t,R1,tmp_samdat);
						/*if (checkMapQ(ChrIsHum, b, true)) {
							++enoughMapQ;
						}*/
					}
					if (getPairedSam(in, idx, b, d) != 0) {
						flag &= ~1;
						continue;
					} else {
						flag |= 2;
						/*if (checkMapQ(ChrIsHum, d, false)) {
							++enoughMapQ;
						}*/
						/*if (c->flag & BAM_FSECONDARY) {
							if (getPairedSam(in, idx, d, d2) == 0) {
								//sam_format1(h, d2, &ks3);
								flag |= 4;
								if (checkMapQ(ChrIsHum, d2, false)) {
									++enoughMapQ;
								}
							}
						}*/
					}
/*
对于 BAM_FSECONDARY(256) 的 Read,跳两次 与 读 SA 项,效果一样。
>[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	353	chr2	13996555	0	50S40M	chr18	48245109	0ACACAACAATGTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:40	AS:i:40	XS:i:40	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,-,40S46M4S,60,0;	YC:Z:CT	YD:Z:f]
-[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	177	chr18	48245109	9	40S50M	gi|59585|emb|X04615.1|2000	0	GTTCCGGAGACTCTAAGGCCTCCCGATACAGAGCAGAGGCCACACACACACACACCATGGAATACTATTCAGCCAAAAAAAGGAATTCAA	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:50	AS:i:50	XS:i:46	RG:Z:Fsimout_mB	SA:Z:rgi|59585|emb|X04615.1|,2000,+,50S40M,9,0;	YC:Z:GA	YD:Z:f]
+[sf95_Ref_48245009_48245108_48245208_Vir_-_2000_2044_R_100_90	113	gi|59585|emb|X04615.1|	2000	60	40S46M4S	chr18	48245109	0	TTTTTTGGCTGAATAGTATTCCATGGTGTGTGTGTGTGTGGCCTCTGCTCTGTATCGGGAGGCCTTAGAGTCTCCGGAACATTGTTGTGT	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	NM:i:0	MD:Z:46	AS:i:46	XS:i:27	RG:Z:Fsimout_mB	SA:Z:fchr2,13996555,+,50S40M,0,0;	YC:Z:CT	YD:Z:r]
*/
					/*if (sam_format1(h, d, &ks2) < 0) {
						fprintf(stderr, "Error writing output.\n");
						exit_code = 1;
						break;
					}*/
					if (((flag & 3) == 3) /*&& enoughMapQ >= myConfig.samples*/) {
						/*printf(">%d[%s]\n",checkMapQ(ChrIsHum, b, true),ks_str(&ks1));
						printf("-%d[%s]\n",checkMapQ(ChrIsHum, d, false),ks_str(&ks2));
						if (flag & 4) {
							printf("+%d[%s]\n",checkMapQ(ChrIsHum, d2, false),ks_str(&ks3));
						}
						printf("<--%d\n",enoughMapQ);*/
						if (sam_plp_push(ChrIsHum, pierCluster, b) == 0) {
							//printf("--HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							if ((!ChrIsHum[(d->core).tid]) && (flag & 2)) sam_plp_push(ChrIsHum, pierCluster, d);
							//if ((!ChrIsHum[(d2->core).tid]) && (flag & 4)) sam_plp_push(ChrIsHum, pierCluster, d2);
						} else {
							//print
							fprintf(fs,"[%s]\nHumRange=%s:%d-%d\n", BamID, h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							fprintf(fs,"VirRange=%s:%d-%d\n", h->target_name[(pierCluster->VirusRange).tid], (pierCluster->VirusRange).pos, (pierCluster->VirusRange).endpos);
							for (size_t i=0; i<kv_size(pierCluster->Reads);++i) {
								bam1_t *bi = kv_A(pierCluster->Reads, i);
								if (sam_format1(h, bi, &ks1) < 0) {
									fprintf(stderr, "Error writing output.\n");
									exit_code = 1;
									break;
								} else {
									fprintf(fs,"%s\n",ks1.s);
								}
							}
							fprintf(fs,"\n");
							//printf("HumRange=%s:%d-%d\n", h->target_name[(pierCluster->HumanRange).tid], (pierCluster->HumanRange).pos, (pierCluster->HumanRange).endpos);
							//fflush(fs);
							sam_plp_dectroy(pierCluster);
							pierCluster = sam_plp_init();
						}
					}
				}
				/*char *qname = bam_get_qname(b);
				if (sam_write1(out, h, b) < 0) {
					fprintf(stderr, "[x]Error writing output.\n");
					exit_code = 1;
					break;
				}*/
			}
/*			r = sam_close(out);   // stdout can only be closed once
			if (r < 0) {
				fprintf(stderr, "Error closing output.\n");
				exit_code = 1;
			}
*/
			hts_idx_destroy(idx);
			bam_destroy1(b);
			bam_destroy1(d);
			bam_destroy1(d2);
			bam_hdr_destroy(h);
			r = sam_close(in);
			free(ChrIsHum);
#ifdef DEBUGa
			fflush(NULL);
			//pressAnyKey();
#endif
			sam_plp_dectroy(pierCluster);
			//printf("<[%d]:\n",bami);
		}
	}
	fclose(fs);
	getPairedSam(NULL, NULL, NULL, NULL);	// sam_close(fp2);
	//printf("---[%d]---\n",exit_code);
	bam_destroy1(bR1); bam_destroy1(bR2); bam_destroy1(bR3);
	ks_release(&ks1);
	ks_release(&ks2);
	ks_release(&ks3);
	return exit_code;
}
示例#11
0
char CigarOperation::TypeToChar(const CigarOperationType type)
{ return bam_cigar_opchr(static_cast<int>(type)); }
示例#12
0
文件: hts_utils.cpp 项目: pkrusche/vt
/**
 * Gets the base in the read that is mapped to a genomic position.
 * Extracts the read sequence and aualities too.
 */
void bam_get_base_and_qual_and_read_and_qual(bam1_t *srec, uint32_t pos, char& base, char& qual, int32_t& rpos, kstring_t* readseq, kstring_t* readqual)
{
    bam1_core_t *c = &srec->core;
    int32_t rlen = c->l_qseq;
    uint32_t cpos = c->pos; //reference coordinates of the first mapped base
    rpos = 0; //read coordinates

    kstring_t str;
    str.l = str.m = 0, str.s = 0;
    base = 'N';
    qual = 0;

    if (c->n_cigar)
    {
        uint32_t *cigar = bam_get_cigar(srec);
        for (uint32_t i = 0; i < c->n_cigar; ++i)
        {
            char op = bam_cigar_opchr(cigar[i]);
            str.l = 0;
            kputw(bam_cigar_oplen(cigar[i]), &str);
            char* stop;
            uint32_t len = strtol(str.s, &stop, 10);
            assert(stop);

            if (op=='M')
            {
                if (pos>=cpos && pos<=cpos+len-1)
                {
                    rpos += pos-cpos;
                    break;
                }

                cpos += len;
                rpos += len;
            }
            else if (op=='D')
            {
                if (pos>=cpos && pos<=cpos+len-1)
                {
                    rpos = -1;
                    break;
                }

                cpos += len;
            }
            else if (op=='S' || op=='I')
            {
                rpos += len;
            }
        }

        //std::cout << "bpos " << bpos << "\n";
        if (rpos>=0 && rpos<=rlen)
        {
            //sequence
            bam_get_seq_string(srec, readseq);
            base = readseq->s[rpos];

            //qual
            bam_get_qual_string(srec, readqual);
            qual = readqual->s[rpos];
        }
        else
        {
            rpos = BAM_READ_INDEX_NA;
        }
    }
//    std::cout << "b: " << base << "\n";
//    std::cout << "q: " << s[bpos-1] << " " << q << "\n";
//    for (uint32_t i = 0; i < c->l_qseq; ++i) std::cerr << ((char)(s[i] + 33));
};