Exemple #1
0
int bcf_gt_type(bcf_fmt_t *fmt_ptr, int isample, int *_ial, int *_jal)
{
    int i, nals = 0, has_ref = 0, has_alt = 0, ial = 0, jal = 0;
    #define BRANCH_INT(type_t,missing,vector_end) { \
        type_t *p = (type_t*) (fmt_ptr->p + isample*fmt_ptr->size); \
        for (i=0; i<fmt_ptr->n; i++) \
        { \
            if ( p[i] == vector_end ) break; /* smaller ploidy */ \
            if ( !p[i] || p[i] == missing ) continue; /* missing allele */ \
            int tmp = p[i]>>1; \
            if ( tmp>1 ) \
            { \
                if ( !ial ) { ial = tmp; has_alt = 1; } \
                else if ( tmp!=ial ) \
                { \
                    if ( tmp<ial ) \
                    { \
                        jal = ial; \
                        ial = tmp; \
                    } \
                    else \
                    { \
                        jal = tmp; \
                    } \
                    has_alt = 2; \
                } \
            } \
            else has_ref = 1; \
            nals++; \
        } \
    }
    switch (fmt_ptr->type) {
        case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_missing, bcf_int8_vector_end); break;
        case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
        case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
        default: fprintf(stderr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
    }
    #undef BRANCH_INT

    if ( _ial ) *_ial = ial>0 ? ial-1 : ial;
    if ( _jal ) *_jal = jal>0 ? jal-1 : jal;
    if ( !nals ) return GT_UNKN;
    if ( nals==1 )
        return has_ref ? GT_HAPL_R : GT_HAPL_A;
    if ( !has_ref ) 
        return has_alt==1 ? GT_HOM_AA : GT_HET_AA;
    if ( !has_alt )
        return GT_HOM_RR;
    return GT_HET_RA;
}
Exemple #2
0
// true if all samples are phased.
// haploid genotypes are considered phased
// ./. => not phased, .|. => phased
int bcf_all_phased(const bcf_hdr_t *header, bcf1_t *line)
{
    bcf_unpack(line, BCF_UN_FMT);
    bcf_fmt_t *fmt_ptr = bcf_get_fmt(header, line, "GT");
    int all_phased = 1;
    if ( fmt_ptr )
    {
        int i, isample;
        for (isample=0; isample<line->n_sample; isample++)
        {
            int sample_phased = 0;
            #define BRANCH_INT(type_t,vector_end) { \
                type_t *p = (type_t*) (fmt_ptr->p + isample*fmt_ptr->size); \
                for (i=0; i<fmt_ptr->n; i++) \
                { \
                    if (fmt_ptr->n == 1 || (p[i] == vector_end && i == 1)) { sample_phased = 1; break; } /* haploid phased by definition */ \
                    if ( p[i] == vector_end ) { break; }; /* smaller ploidy */ \
                    if ( bcf_gt_is_missing(p[i]) ) continue; /* missing allele */ \
                    if ((p[i])&1) { \
                        sample_phased = 1; \
                        break; \
                    } \
                } \
            }
            switch (fmt_ptr->type) {
                case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_vector_end); break;
                case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
                case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
                default: fprintf(stderr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
            }
            #undef BRANCH_INT
            if (!sample_phased) {
                all_phased = 0;
                break;
            }
        }
    }
    return all_phased;
}
Exemple #3
0
int calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which)
{
	int i;
	for (i=0; i<line->n_allele; i++) ac[i]=0;

	// Use INFO/AC,AN field only when asked
	if ( which&BCF_UN_INFO )
	{
		bcf_unpack(line, BCF_UN_INFO);
		int an_id = bcf_id2int(header, BCF_DT_ID, "AN");
		int ac_id = bcf_id2int(header, BCF_DT_ID, "AC");
		if ( an_id>=0 && ac_id>=0 )
		{
			int i, an=0, ac_len=0, ac_type=0;
			uint8_t *ac_ptr=NULL;
			for (i=0; i<line->n_info; i++)
			{
				bcf_info_t *z = &line->d.info[i];
				if ( z->key == an_id ) an = z->v1.i;
				else if ( z->key == ac_id ) { ac_ptr = z->vptr; ac_len = z->len; ac_type = z->type; }
			}
			int nac = 0;
            #define BRANCH_INT(type_t) {        \
                type_t *p = (type_t *) ac_ptr;  \
                for (i=0; i<ac_len; i++)        \
                {                               \
                    ac[i+1] = p[i];             \
                    nac += p[i];                \
                }                               \
            }
            if ( ac_type==BCF_BT_INT8 ) { BRANCH_INT(uint8_t) }
            else if ( ac_type==BCF_BT_INT16 ) { BRANCH_INT(uint16_t) }
            else if ( ac_type==BCF_BT_INT32 ) { BRANCH_INT(uint32_t) }
            #undef BRANCH_INT
			ac[0] = an - nac;
			return 1;
		}
Exemple #4
0
int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which)
{
	int i;
	for (i=0; i<line->n_allele; i++) ac[i]=0;

	// Use INFO/AC,AN field only when asked
	if ( which&BCF_UN_INFO )
	{
		bcf_unpack(line, BCF_UN_INFO);
		int an_id = bcf_hdr_id2int(header, BCF_DT_ID, "AN");
		int ac_id = bcf_hdr_id2int(header, BCF_DT_ID, "AC");
        int i, an=-1, ac_len=0, ac_type=0;
        uint8_t *ac_ptr=NULL;
		if ( an_id>=0 && ac_id>=0 )
		{
			for (i=0; i<line->n_info; i++)
			{
				bcf_info_t *z = &line->d.info[i];
				if ( z->key == an_id ) an = z->v1.i;
				else if ( z->key == ac_id ) { ac_ptr = z->vptr; ac_len = z->len; ac_type = z->type; }
			}
        }
        if ( an>=0 && ac_ptr )
        {
			int nac = 0;
            #define BRANCH_INT(type_t) {        \
                type_t *p = (type_t *) ac_ptr;  \
                for (i=0; i<ac_len; i++)        \
                {                               \
                    ac[i+1] = p[i];             \
                    nac += p[i];                \
                }                               \
            }
            switch (ac_type) {
                case BCF_BT_INT8:  BRANCH_INT(int8_t); break;
                case BCF_BT_INT16: BRANCH_INT(int16_t); break;
                case BCF_BT_INT32: BRANCH_INT(int32_t); break;
                default: fprintf(stderr, "[E::%s] todo: %d at %s:%d\n", __func__, ac_type, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break;
            }
            #undef BRANCH_INT
            assert( an>=nac );  // sanity check for missing values
			ac[0] = an - nac;
			return 1;
        }
	}

	// Split genotype fields only when asked
	if ( which&BCF_UN_FMT )
	{
		int i, gt_id = bcf_hdr_id2int(header,BCF_DT_ID,"GT");
		if ( gt_id<0 ) return 0;
		bcf_unpack(line, BCF_UN_FMT);
		bcf_fmt_t *fmt_gt = NULL;
		for (i=0; i<(int)line->n_fmt; i++) 
			if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; }
		if ( !fmt_gt ) return 0;
        #define BRANCH_INT(type_t,missing,vector_end) { \
		    for (i=0; i<line->n_sample; i++) \
		    { \
                type_t *p = (type_t*) (fmt_gt->p + i*fmt_gt->size); \
		    	int ial; \
		    	for (ial=0; ial<fmt_gt->n; ial++) \
		    	{ \
                    if ( p[ial]==vector_end ) break; /* smaller ploidy */ \
                    if ( !(p[ial]>>1) || p[ial]==missing ) continue; /* missing allele */ \
		    		ac[(p[ial]>>1)-1]++; \
		    	} \
		    } \
        }
        switch (fmt_gt->type) {
            case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_missing, bcf_int8_vector_end); break;
            case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
            case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
            default: fprintf(stderr, "[E::%s] todo: %d at %s:%d\n", __func__, fmt_gt->type, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break;
        }
        #undef BRANCH_INT
		return 1;
	}
	return 0;
}
Exemple #5
0
bcf1_t *process(bcf1_t *rec)
{
    int i, ns = 0;

    bcf_unpack(rec, BCF_UN_FMT);
    bcf_fmt_t *fmt_gt = NULL;
    for (i=0; i<rec->n_fmt; i++)
        if ( rec->d.fmt[i].id==args.gt_id ) { fmt_gt = &rec->d.fmt[i]; break; }
    if ( !fmt_gt ) return rec;    // no GT tag

    hts_expand(int32_t,rec->n_allele,args.marr,args.arr);
    hts_expand(float,rec->n_allele,args.mfarr,args.farr);
    hts_expand(counts_t,rec->n_allele,args.mcounts,args.counts);
    memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
    memset(args.counts,0,sizeof(*args.counts)*rec->n_allele);

    #define BRANCH_INT(type_t,vector_end) { \
        for (i=0; i<rec->n_sample; i++) \
        { \
            type_t *p = (type_t*) (fmt_gt->p + i*fmt_gt->size); \
            int ial, als = 0; \
            for (ial=0; ial<fmt_gt->n; ial++) \
            { \
                if ( p[ial]==vector_end ) break; /* smaller ploidy */ \
                if ( bcf_gt_is_missing(p[ial]) ) break; /* missing allele */ \
                int idx = bcf_gt_allele(p[ial]); \
                \
                if ( idx >= rec->n_allele ) \
                    error("Incorrect allele (\"%d\") in %s at %s:%d\n",idx,args.in_hdr->samples[i],bcf_seqname(args.in_hdr,rec),rec->pos+1); \
                als |= (1<<idx);  /* this breaks with too many alleles */ \
            } \
            if ( ial==0 ) continue; /* missing alleles */ \
            ns++; \
            int is_hom  = als && !(als & (als-1)); /* only one bit is set */ \
            int is_hemi = ial==1; \
            for (ial=0; als; ial++) \
            { \
                if ( als&1 ) \
                { \
                    if ( !is_hom ) \
                        args.counts[ial].nhet++; \
                    else if ( !is_hemi ) \
                        args.counts[ial].nhom += 2; \
                    else \
                        args.counts[ial].nhemi++; \
                } \
                als >>= 1; \
            } \
        } \
    }
    switch (fmt_gt->type) {
        case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_vector_end); break;
        case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
        case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
        default: error("The GT type is not recognised: %d at %s:%d\n",fmt_gt->type, bcf_seqname(args.in_hdr,rec),rec->pos+1); break;
    }
    #undef BRANCH_INT
    if ( args.tags&SET_NS )
    {
        if ( bcf_update_info_int32(args.out_hdr,rec,"NS",&ns,1)!=0 )
            error("Error occurred while updating NS at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AN )
    {
        args.arr[0] = 0;
        for (i=0; i<rec->n_allele; i++)
            args.arr[0] += args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi;
        if ( bcf_update_info_int32(args.out_hdr,rec,"AN",args.arr,1)!=0 )
            error("Error occurred while updating AN at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AF )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            args.arr[0] = 0;
            for (i=0; i<rec->n_allele; i++)
                args.arr[0] += args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi;
            for (i=1; i<rec->n_allele; i++)
                args.farr[i] = (args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi)*1.0/args.arr[0];
        }
        if ( args.arr[0] )
        {
            if ( bcf_update_info_float(args.out_hdr,rec,"AF",args.farr+1,n)!=0 )
                error("Error occurred while updating AF at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
        }
    }
    if ( args.tags&SET_AC )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] = args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC",args.arr+1,n)!=0 )
            error("Error occurred while updating AC at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AC_Het )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] += args.counts[i].nhet;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC_Het",args.arr+1,n)!=0 )
            error("Error occurred while updating AC_Het at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AC_Hom )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] += args.counts[i].nhom;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC_Hom",args.arr+1,n)!=0 )
            error("Error occurred while updating AC_Hom at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AC_Hemi )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] += args.counts[i].nhemi;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC_Hemi",args.arr+1,n)!=0 )
            error("Error occurred while updating AC_Hemi at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    return rec;
}