Example #1
0
/*
** def_tag_args
**
*/
BOOL def_tag_args( HSCPRC *hp, HSCTAG *tag )
{
    BOOL    ok = FALSE;
    STRPTR  nw;
    INFILE *inpf = hp->inpf;

    if ( tag ) {

        ok = TRUE;

        /* read args */
        nw = infgetw( inpf );

        /*
        ** set tag options
        */
        while ( nw && ( ( !strcmp( nw, "/" ) ) || !strcmp( nw, "\n" ) ) ) {

            nw = infgetw( inpf );
            if ( nw ) {

                if ( strcmp( nw, "\n" ) )
                    ok &= parse_tag_option( hp, nw, tag );
                nw = infgetw( inpf );

            }

        }

        /*
        ** set tag attributes
        */
        while ( nw && ( ( strcmp( nw, ">" ) || !strcmp( nw, "\n" ) ) ) ) {

            if ( strcmp( nw, "\n" ) ) {

                inungetcw( inpf );
                ok &= parse_tag_var( hp, tag );

            }

            nw = infgetw( inpf );

        }

        /* skip linefeeds */
        while ( nw && !strcmp( nw, "\n" ) )
            nw = infgetw( inpf );

        /* check for ">" at end */
        if ( nw ) {

            inungetcw( inpf );
            ok = parse_gt( hp );

        }
    }

    return( ok );
}
Example #2
0
/*
**-------------------------------------
** <$LET> set a new global attribute
**        or overwrite a defined one
**-------------------------------------
*/
BOOL handle_hsc_let( INFILE *inpf, HSCTAG *tag )
{
    STRPTR varname = infgetw( inpf );
    BOOL   ok = FALSE;

    /* create copy of varname */
    if ( varname )
        varname = strclone( varname );
    else
        err_eof( inpf, "missing attribute name" );

    if ( varname ) {

        ok = parse_wd( inpf, ":" );
        if ( ok && define_var( varname, vars, inpf, 0 ) )
            ok = TRUE;
        if ( ok )
            ok = parse_gt( inpf );
    } else
        err_mem( inpf );

    /* release mem */
    ufreestr( varname );

    /* if error occured, skip rest of tag */
    if ( !ok )
        skip_until_eot( inpf );

    return ( ok );
}
    void
    make_record_haploid(VcfRecord& vcfr) const {
        const char* gt(vcfr.GetSampleVal("GT"));
        if(NULL == gt)  return;
        parse_gt(gt,_gti);
            
        if(_gti.size() == 2) { // record is diploid
            if(_gti[0] == _gti[1]) {
                // change GT:
                static const char* unknown(".");
                const char* val(unknown);
                if(_gti[0]>=0) {
                    val=_intstr.get32(_gti[0]);
                }
                vcfr.SetSampleVal("GT",val);

                // move PL field to 'backup' OPL field:
                const char* pl(vcfr.GetSampleVal("PL"));
                if(NULL != pl) {
                    vcfr.SetSampleVal(_shopt.orig_pl_tag.c_str(),pl);
                    vcfr.DeleteSampleKeyVal("PL");
                }
            } else {
                vcfr.AppendFilter(_shopt.haploid_conflict_label.c_str());
            }
        }
    }
static
bool
get_digt_code(const char* const* word,
              std::vector<int>& digt_code) {

    const char* gtstr(get_format_string_nocopy(word,"GT"));
    if (gtstr == NULL)
    {
        digt_code.clear();
        digt_code.push_back(-1);
    }
    else
    {
        parse_gt(gtstr,digt_code,true);
    }
    return (digt_code.size()==2 && digt_code[0]>=0 && digt_code[1]>=0);
}
Example #5
0
/*
 * hsc_parse_tag
 *
 * parse tag (after "<")
 */
BOOL hsc_parse_tag(HSCPRC * hp)
{
    INFILE *inpf = hp->inpf;
    STRPTR nxtwd = NULL;
    DLNODE *nd = NULL;
    HSCTAG *tag = NULL;
    ULONG tci = 0;              /* tag_call_id returned by set_tag_args() */
    BOOL(*hnd) (HSCPRC * hp, HSCTAG * tag) = NULL;
    BOOL open_tag;
    DLLIST *taglist = hp->deftag;
    BOOL rplc_lt = FALSE;       /* TRUE, if replace spc. char "<" */
    BOOL hnd_result = TRUE;     /* result returned by handle */
    BOOL unknown_tag = FALSE;   /* TRUE, if tag has not been defined before */
    BOOL preceeding_whtspc = estrlen(hp->whtspc);

    /* init strings used inside tag-handles */
    set_estr(hp->tag_name_str, infgetcw(inpf));
    clr_estr(hp->tag_attr_str);
    clr_estr(hp->tag_close_str);

    if (hp->smart_ent && preceeding_whtspc)
    {
        /*
         * check for special char "<"
         */
        int ch = infgetc(inpf);

        /* check if next char is a white space */
        if (hsc_whtspc(ch))
        {
            rplc_lt = TRUE;

            /* write "&lt;" and white spaces */
            message_rplc(hp, "<", "&lt;");
            hsc_output_text(hp, "", "&lt;");
        }
        inungetc(ch, inpf);
    }

    if (!rplc_lt)
    {
        /* get tag id */
        nxtwd = infget_tagid(hp);

        if (!hp->fatal)
        {
            /* append tag-name to tag_name_str */
            app_estr(hp->tag_name_str, infgetcw(inpf));

            /* check for hsctag; if not, enable output */
            if (hp->suppress_output
                && upstrncmp(nxtwd, HSC_TAGID, strlen(HSC_TAGID))
                && strcmp(nxtwd, HSC_COMMENT_STR)
                && strcmp(nxtwd, HSC_ONLYCOPY_STR)
                )
            {
                hp_enable_output(hp, "non-hsctag occured");
            }

            if (!hp->suppress_output)
            {
                D(fprintf(stderr, DHL "tag <"));
            }
        }
    }

    if (!hp->fatal && !rplc_lt)
    {
        BOOL write_tag = FALSE; /* flag: write tag text & attrs to output? */

        if (strcmp("/", nxtwd)) /* is it a closing tag? */
        {
            /*
             *
             * process start-tag
             *
             */
            open_tag = TRUE;
            if (!hp->suppress_output)
            {
                D(fprintf(stderr, "%s>\n", nxtwd));
            }
            /* search for tag in list */
            nd = find_dlnode(taglist->first, (APTR) nxtwd, cmp_strtag);
            if (nd == NULL)
            {
                hsc_message(hp, MSG_UNKN_TAG,   /* tag not found */
                            "unknown %t", nxtwd);
                tag = new_hsctag(nxtwd);
                tag->option |= HT_UNKNOWN;
                unknown_tag = TRUE;
#if 0 /* TODO: remove */
                /* NOTE: This one's a bit perverted, because
                 * the closing ">" is appended to the
                 * attribute string, and the closing string
                 * is left empty; as there is nearly no code
                 * between setting and writing the strings,
                 * I think this is more reasonable than doing
                 * some tricky string-manipulation...
                 */
                skip_until_eot(hp, hp->tag_attr_str);
                clr_estr(hp->tag_close_str);
#endif
            }
            else
            {
                tag = (HSCTAG *) nd->data;
            }

            /* set handle-function */
            hnd = tag->o_handle;

            /*
             * handle options
             */

            /* check for obsolete tag */
            if (tag->option & HT_OBSOLETE)
            {
                hsc_message(hp, MSG_TAG_OBSOLETE,
                            "%T is obsolete", tag);
            }

            /* check for jerk-tag */
            if (tag->option & HT_JERK)
            {
                hsc_message(hp, MSG_TAG_JERK,
                            "%T is only used by %j", tag);
            }

            /* only-once-tag occured twice? */
            if ((tag->option & HT_ONLYONCE) && (tag->occured))
            {
                hsc_message(hp, MSG_TAG_TOO_OFTEN,
                            "%T occured too often", tag);
            }

            /* set occured-flag */
            if (tag->option & (HT_ONLYONCE | HT_REQUIRED))
                tag->occured = TRUE;

            /* check for "must be inside"/"not allowed within"-tags */
            if (!check_mbinaw(hp, tag))
                hnd = NULL;

            /* clear (reset to default) attribute values of tag */
            clr_varlist(tag->attr);

            /* set attributes or check for ">" */
            if (!(tag->option & HT_SPECIAL))
            {
                tci = set_tag_args(hp, tag);
                if (tci == MCI_ERROR)
                {
                    skip_until_eot(hp, NULL);
                    hnd = NULL;
                }

                if (!hp->fatal)
                {
                    /* set ">" in string that contains closing text */
                    if (!hp->compact)
                    {
                        set_estr(hp->tag_close_str, infgetcws(inpf));
                    }
                    else
                    {
                        clr_estr(hp->tag_close_str);
                    }
                    app_estr(hp->tag_close_str, infgetcw(inpf));

                    /* check for succeeding white-space */
                    if ((tag->option & HT_WHTSPC) && !infeof(inpf))
                    {
                        int ch = infgetc(inpf);

                        if (hsc_whtspc(ch))
                        {
                            if (hp->strip_badws)
                            {
                                hp->strip_next2_whtspc = TRUE;
                            }
                            else
                            {
                                hsc_message(hp, MSG_SUCC_WHTSPC,
                                            "succeeding white-space for %T",
                                            tag);
                            }
                        }
                        inungetc(ch, inpf);
                    }
                }
            }

            /* end-tag required? */
            if (tag->option & HT_CLOSE)
                app_ctag(hp, tag);
        }
        else
        {
            /*
             *
             * process end-tag
             *
             */

            /* get tag id */
            nxtwd = infget_tagid(hp);   /* get tag id */
            open_tag = FALSE;

            /* append tag-name to tag_name_str */
            if (!hp->compact)
            {
                app_estr(hp->tag_name_str, infgetcws(inpf));
            }
            app_estr(hp->tag_name_str, infgetcw(inpf));

            if (!hp->suppress_output)
            {
                D(fprintf(stderr, "/%s>\n", nxtwd));
            }
            /* search for tag in taglist */
            /* (see if it exists at all) */
            nd = find_dlnode(taglist->first, (APTR) nxtwd, cmp_strtag);
            if (nd == NULL)
            {
                /* closing tag is absolutely unknown */
                hsc_message(hp, MSG_UNKN_TAG,   /* tag not found */
                            "unknown %c", nxtwd);
                skip_until_eot(hp, hp->tag_attr_str);
            }
            else
            {
                tag = (HSCTAG *) nd->data;      /* fitting tag in taglist */

                /* check for preceding white-spaces */
                if ((tag->option & HT_WHTSPC) && anyWhtspc(hp))
                {
                    if (hp->strip_badws)
                    {
                        hp->strip_next_whtspc = TRUE;
                    }
                    else
                    {
                        hsc_message(hp, MSG_PREC_WHTSPC,
                                    "preceding white space for %C", tag);
                    }
                }

                if (tag->option & (HT_CLOSE | HT_AUTOCLOSE))
                {
                    /* set closing handle */
                    hnd = tag->c_handle;

                    /* check for no args */
                    if (!parse_wd(hp, ">"))
                    {
                        hsc_message(hp, MSG_CL_TAG_ARG,
                                    "no attributes allowed for end-tags");
                    }
                    else
                    {
                        /* set ">" in string that contains closing text */
                        if (!hp->compact)
                        {
                            set_estr(hp->tag_close_str, infgetcws(inpf));
                        }
                        app_estr(hp->tag_close_str, infgetcw(inpf));
                    }

                    /* set values of attributes stored
                     * in end-tag,
                     * remove end-tag from stack
                     */
                    remove_ctag(hp, tag);
                }
                else
                {
                    /* illegal closing tag */
                    hsc_message(hp, MSG_ILLG_CTAG,      /* tag not found */
                                "illegal %c", nxtwd);
                    parse_gt(hp);
                    tag = NULL;
                }
            }
        }

        /*
         * processed for opening AND closing tag
         */
        write_tag = (!(tag) || !(tag->option & HT_NOCOPY));

        if (tag)
        {
            /*
             * check if tag should be stripped
             */
            if (!postprocess_tagattr(hp, tag, open_tag))
            {
                /* stripped tag with external reference */
                if (open_tag)
                    hsc_msg_stripped_tag(hp, tag, "external reference");
                hnd = NULL;     /* don't call handle */
                write_tag = FALSE;      /* don't output tag */
            }
            else if (hp->strip_tags
                     && strenum(tag->name, hp->strip_tags, '|', STEN_NOCASE))
            {
                /* strip tag requested by user */
                if (!(tag->option & HT_SPECIAL))
                {
                    if (open_tag)
                        hsc_msg_stripped_tag(hp, tag, "as requested");
                    hnd = NULL; /* don't call handle */
                    write_tag = FALSE;  /* don't output tag */
                }
                else
                {
                    hsc_message(hp, MSG_TAG_CANT_STRIP,
                                "can not strip special tag %T", tag);
                }

                /*
                 * get values for size from reference
                 */
            }
            else if (tag->uri_size && get_vartext(tag->uri_size))
                get_attr_size(hp, tag);
        }

        /* call handle if available */
        if (hnd && !hp->fatal)
            hnd_result = (*hnd) (hp, tag);

        /* write whole tag out */
        if (write_tag && hnd_result)
        {
            VOID(*tag_callback) (struct hscprocess * hp,
                                 HSCTAG * tag,
                 STRPTR tag_name, STRPTR tag_attr, STRPTR tag_close) = NULL;

            if (open_tag)
                tag_callback = hp->CB_start_tag;
            else
                tag_callback = hp->CB_end_tag;

            /* write white spaces */
            hsc_output_text(hp, "", "");

            if (tag_callback)
            {
                (*tag_callback) (hp, tag,
                                 estr2str(hp->tag_name_str),
                                 estr2str(hp->tag_attr_str),
                                 estr2str(hp->tag_close_str));
            }
        }

        /* skip LF if requested */
        if (tag && (tag->option & HT_SKIPLF))
        {
            skip_next_lf(hp);   /* TODO: really skip single lf */
        }

        /* remove temporary created tag */
        if (unknown_tag)
            del_hsctag(tag);


#if (defined MSDOS && (!defined HSC_TRIGGER))
#define UNLIKELY (10*1024)
        /* crash randomly */
        if ((rand() % UNLIKELY) == (UNLIKELY / 2))
        {
            enforcerHit();
        }
#endif
    }

    return (BOOL) (!hp->fatal);
}
// modify overlapping site and indel records to be self-consistent:
void
VcfRecordBlocker::
GroomRecordBuffer() {

    const unsigned n_records(_recordBuffer.size());

#ifdef VDEBUG
    if (true) {
        std::cerr << "VDEBUG input: indel count: " << _indelIndex.size() << "\n";
        for (unsigned i(0); i<n_records; ++i) {
            _recordBuffer[i].WriteUnaltered(std::cerr);
        }
    }
#endif

    // create a map of 'covered' ploidy through the indel region based
    // on the first indel, any additional inside of the first must be
    // conflict:
    region_info rinfo;

    if (_indelIndex.size() > 1) {
        rinfo.filters.push_back(_opt.indel_conflict_label);
    } else {
        // set additional indel filters:
        const GatkVcfRecord& record(_recordBuffer[_indelIndex[0]]);
        const std::vector<std::string>& filters(record.GetFilter());
        const unsigned n_filt(filters.size());
        if ((n_filt!=1) || filters[0] != "PASS") {
            rinfo.filters = filters;
        }

        // set additional minq:
        rinfo.qual.str=record.GetQual().c_str();
        rinfo.qual.is_valid=checked_double_parse(rinfo.qual.str,rinfo.qual.val);
        rinfo.gq.str=record.GetSampleVal("GQ");
        rinfo.gq.is_valid=checked_double_parse(rinfo.gq.str,rinfo.gq.val);

        _gti.clear();
        if (! record.GetGT().empty()) {
            parse_gt(record.GetGT().c_str(),_gti);
        }
        if (_gti.size() == 2) {
            if ((_gti[0]==0 && _gti[1]>0) || (_gti[1]==0 && _gti[0]>0)) { rinfo.copyn=1; }
        }
    }


    // 2) modify site records according to overlapping filter status (or mark all as IndelConflict)
    //
    bool is_edit(true);
    std::vector<refedit> edits;
    for (unsigned record_index(0); record_index<n_records; ++record_index) {
        GatkVcfRecord& record(_recordBuffer[record_index]);
        const int pos(record.GetPos());
        const bool is_in_indel((pos>=_bufferStartPos) && (pos<=_bufferEndPos));
        if (! is_in_indel) continue;
        const unsigned offset(pos-_bufferStartPos);
        adjust_overlap_record(_opt,rinfo,offset,record,is_edit,edits);
        // regroom record to account for quality value changes, etc:
        GroomInputRecord(record);
    }

    // 3) modify indel records according to any site conflicts or hemizygous snps present:

    // this is 90% done, but no easy way to make the per-allele tag adjustment reliable w/o parsing
    // header for all cases first. not worth pursuing for now...
#if 0
    if (is_edit && (! edits.empty())) {
        // we should only get here for simple het deletions:
        GatkVcfRecord& record(_recordBuffer[_indelIndex[0]]);
        std::string allele(record.GetRef());
        bool is_diff(false);
        for (unsigned i(0); i<edits.size(); ++i) {
            if (allele[edits[i].first+1] != edits[i].second) {
                allele[edits[i].first+1] = edits[i].second;
                is_diff=true;
            }
        }
        if (is_diff) {
            // 1) insert new alternate allele
            // 2) update GT
            // 3) modify or delete all other allele dependent tags (this might just be AD in practice)
            std::vector<std::string>& alt(record.GetAlt());
            alt.insert(alt.begin(),allele);
            record.SetSampleVal("GT","1/2");
        }
    }
#endif


#ifdef VDEBUG
    if (true) {
        std::cerr << "VDEBUG output: indel count: " << _indelIndex.size() << "\n";
        for (unsigned i(0); i<n_records; ++i) {
            _recordBuffer[i].WriteUnaltered(std::cerr);
        }
    }
#endif
}
static
void
adjust_overlap_record(const BlockerOptions& opt,
                      const region_info& rinfo,
                      const unsigned,
                      GatkVcfRecord& record,
                      bool&,
                      std::vector<refedit>&) {

    // apply filters:
    const unsigned n_filt(rinfo.filters.size());
    for (unsigned filt_index(0); filt_index<n_filt; ++filt_index) {
        record.AppendFilter(rinfo.filters[filt_index].c_str());
    }

    //apply quality minimums:
    if (rinfo.qual.is_valid) {
        double record_qual(0.);
        const bool is_valid(checked_double_parse(record.GetQual().c_str(),record_qual));
        if (is_valid && (rinfo.qual.val<record_qual)) {
            record.SetQual(rinfo.qual.str);
        }
    }

    if (rinfo.gq.is_valid) {
        double record_gq(0.);
        const bool is_valid(checked_double_parse(record.GetSampleVal("GQ"),record_gq));
        if (is_valid && (rinfo.gq.val<record_gq)) {
            record.SetSampleVal("GQ",rinfo.gq.str);
        }
    }

    // change gt conflict status based on region_copyn
    assert(rinfo.copyn<2);

    if (rinfo.copyn==1) {
        std::vector<int> gti;
        if (! record.GetGT().empty()) {
            parse_gt(record.GetGT().c_str(),gti);
        }

        if (gti.size() == 2) {
            if (gti[0]==gti[1]) {
                if       (gti[0]>=0) {
                    std::ostringstream oss;
                    oss << gti[0];
                    record.SetSampleVal("GT",oss.str().c_str());
                    record.DeleteSampleKeyVal("PL");
                } else {
                    set_record_to_unknown_gt(record);
                }
            } else {
                set_record_to_unknown_gt(record);
                record.AppendFilter(opt.site_conflict_label.c_str());
            }
        } else if (gti.size() != 1) {
            set_record_to_unknown_gt(record);
        }
    } else {
        set_record_to_unknown_gt(record);
    }
}
Example #8
0
/*
** parse_vararg: read & check a attribute value
*/
STRPTR parse_vararg( HSCVAR *var, INFILE *inpf )
{
    STRPTR str_vararg = NULL;          /* return value */
    int    ch;                         /* char read from input */

    /* TODO: handle "<>" (reset var->text to NULL) */

    infskip_ws( inpf );

    /* disable log */
    inflog_disable( inpf );

    /* read var->quote char */
    ch = infgetc( inpf );
    if ( !strchr( VQ_STR_QUOTE, ch ) )
        if ( ch != EOF )
            var->quote = VQ_NO_QUOTE;
        else
            err_eof( inpf, "reading attribute" );
    else
        var->quote = ch;

    /* warning if no quote */
    if ( ( var->quote == VQ_NO_QUOTE )
         && !( var->varflag & VF_NOQUOTE ) )
    {

        message( MSG_ARG_NO_QUOTE, inpf );
        errstr( "Argument without quote\n" );

    }

    /* read arg string */
    if ( var->quote == '<' ) {

        /*
        ** get arg from other var
        */
        STRPTR nw = infgetw( inpf );

        if ( nw ) {

            HSCVAR *refvar = find_varname( vars, nw );

            if ( refvar ) {

                /* TODO: type checking */
                var->quote = refvar->quote;
                str_vararg = refvar->text;

                /* check empty/circular reference */
                if ( !str_vararg ) {

                    message( MSG_EMPTY_SYMB_REF, inpf );
                    errstr( "Empty reference to" );
                    errsym( var->name );
                    errlf();

                }

                /* debugging message */
                DDA( fprintf( stderr, "**    %s refers to <%s>\n",
                              var->name, refvar->name ) );

            } else {

                /* reference to unknown var */
                message( MSG_UNKN_SYMB_REF, inpf );
                errstr( "reference to unknown" );
                errsym( nw );
                errlf();

            }

            if ( (!refvar) || (!str_vararg ) ) {

                /* return empty var */
                var->quote = '"';
                str_vararg = "";
            }

            parse_gt( inpf );

        } else
            err_eof( inpf, "reading attribute" );

    } else if ( var->quote != EOF ) {

        /*
        ** get arg from input file
        */
        BOOL   end = FALSE;

        /* clear vararg or set with first char read */
        if ( var->quote == VQ_NO_QUOTE )
            end = !set_estr( vararg, ch2str( ch ) );
        else
            end = !clr_estr( vararg );
        if ( end )
            err_mem( inpf );

        /*
        ** read next char from input file until a
        ** closing quote if found.
        ** if the arg had no quote, a white space
        ** or a '>' is used to detect end of arg.
        ** if a LF is found, view error message
        */
        while ( !end ) {

            ch = infgetc( inpf );

            end = TRUE;

            if ( ch == EOF )
                err_eof( inpf, "reading attribute" );
            else if ( (ch==var->quote)
                      || ( ch==CH_LF )
                      || ( (var->quote==VQ_NO_QUOTE)
                           && ( inf_isws(ch,inpf) || ( ch=='>' ) ) )
                    )
            {

                /* end of arg reached */
                str_vararg = estr2str( vararg );
                if ( var->quote == VQ_NO_QUOTE ) {

                    if ( ch==CH_LF )
                        err_streol( inpf );
                    inungetc( ch, inpf );

                }

            } else {

                /* append next char to vararg */
                if ( !app_estrch( vararg, ch ) )
                    err_mem( inpf );
                else
                    end = FALSE; /* continue loop */

            }
        }
    }

    if ( str_vararg && var )
        /*
        ** check enum type
        */
        if (var->vartype == VT_ENUM)
            check_enumstr( var, str_vararg, inpf );
        /*
        ** parse uri (only if no macro-attr)
        ** (convert abs.uris, check existence)
        */
        else if (var->vartype == VT_URI )

            if ( !(var->varflag & VF_MACRO) )
                str_vararg = parse_uri( str_vararg, inpf );
            else {

                DDA( fprintf( stderr, "**    didn't parse uri \"%s\"\n",
                              str_vararg ) );

            }

    /* update and enable log */
    if ( !fatal_error ) {

        BOOL ok = TRUE;

        if ( var->quote != VQ_NO_QUOTE )                   
            ok &= inflog_app( inpf, ch2str( var->quote ) );/* append quote */
        inflog_app( inpf, str_vararg );                    /* append arg */
        if ( var->quote != VQ_NO_QUOTE )
            ok &= inflog_app( inpf, ch2str( var->quote ) );/* append quote */
        inflog_enable( inpf );                             /* enable log */

        if ( !ok )
            err_mem( NULL );
    }

    return ( str_vararg );
}