/* ** def_tag_args ** */ BOOL def_tag_args( HSCPRC *hp, HSCTAG *tag ) { BOOL ok = FALSE; STRPTR nw; INFILE *inpf = hp->inpf; if ( tag ) { ok = TRUE; /* read args */ nw = infgetw( inpf ); /* ** set tag options */ while ( nw && ( ( !strcmp( nw, "/" ) ) || !strcmp( nw, "\n" ) ) ) { nw = infgetw( inpf ); if ( nw ) { if ( strcmp( nw, "\n" ) ) ok &= parse_tag_option( hp, nw, tag ); nw = infgetw( inpf ); } } /* ** set tag attributes */ while ( nw && ( ( strcmp( nw, ">" ) || !strcmp( nw, "\n" ) ) ) ) { if ( strcmp( nw, "\n" ) ) { inungetcw( inpf ); ok &= parse_tag_var( hp, tag ); } nw = infgetw( inpf ); } /* skip linefeeds */ while ( nw && !strcmp( nw, "\n" ) ) nw = infgetw( inpf ); /* check for ">" at end */ if ( nw ) { inungetcw( inpf ); ok = parse_gt( hp ); } } return( ok ); }
/* **------------------------------------- ** <$LET> set a new global attribute ** or overwrite a defined one **------------------------------------- */ BOOL handle_hsc_let( INFILE *inpf, HSCTAG *tag ) { STRPTR varname = infgetw( inpf ); BOOL ok = FALSE; /* create copy of varname */ if ( varname ) varname = strclone( varname ); else err_eof( inpf, "missing attribute name" ); if ( varname ) { ok = parse_wd( inpf, ":" ); if ( ok && define_var( varname, vars, inpf, 0 ) ) ok = TRUE; if ( ok ) ok = parse_gt( inpf ); } else err_mem( inpf ); /* release mem */ ufreestr( varname ); /* if error occured, skip rest of tag */ if ( !ok ) skip_until_eot( inpf ); return ( ok ); }
void make_record_haploid(VcfRecord& vcfr) const { const char* gt(vcfr.GetSampleVal("GT")); if(NULL == gt) return; parse_gt(gt,_gti); if(_gti.size() == 2) { // record is diploid if(_gti[0] == _gti[1]) { // change GT: static const char* unknown("."); const char* val(unknown); if(_gti[0]>=0) { val=_intstr.get32(_gti[0]); } vcfr.SetSampleVal("GT",val); // move PL field to 'backup' OPL field: const char* pl(vcfr.GetSampleVal("PL")); if(NULL != pl) { vcfr.SetSampleVal(_shopt.orig_pl_tag.c_str(),pl); vcfr.DeleteSampleKeyVal("PL"); } } else { vcfr.AppendFilter(_shopt.haploid_conflict_label.c_str()); } } }
static bool get_digt_code(const char* const* word, std::vector<int>& digt_code) { const char* gtstr(get_format_string_nocopy(word,"GT")); if (gtstr == NULL) { digt_code.clear(); digt_code.push_back(-1); } else { parse_gt(gtstr,digt_code,true); } return (digt_code.size()==2 && digt_code[0]>=0 && digt_code[1]>=0); }
/* * hsc_parse_tag * * parse tag (after "<") */ BOOL hsc_parse_tag(HSCPRC * hp) { INFILE *inpf = hp->inpf; STRPTR nxtwd = NULL; DLNODE *nd = NULL; HSCTAG *tag = NULL; ULONG tci = 0; /* tag_call_id returned by set_tag_args() */ BOOL(*hnd) (HSCPRC * hp, HSCTAG * tag) = NULL; BOOL open_tag; DLLIST *taglist = hp->deftag; BOOL rplc_lt = FALSE; /* TRUE, if replace spc. char "<" */ BOOL hnd_result = TRUE; /* result returned by handle */ BOOL unknown_tag = FALSE; /* TRUE, if tag has not been defined before */ BOOL preceeding_whtspc = estrlen(hp->whtspc); /* init strings used inside tag-handles */ set_estr(hp->tag_name_str, infgetcw(inpf)); clr_estr(hp->tag_attr_str); clr_estr(hp->tag_close_str); if (hp->smart_ent && preceeding_whtspc) { /* * check for special char "<" */ int ch = infgetc(inpf); /* check if next char is a white space */ if (hsc_whtspc(ch)) { rplc_lt = TRUE; /* write "<" and white spaces */ message_rplc(hp, "<", "<"); hsc_output_text(hp, "", "<"); } inungetc(ch, inpf); } if (!rplc_lt) { /* get tag id */ nxtwd = infget_tagid(hp); if (!hp->fatal) { /* append tag-name to tag_name_str */ app_estr(hp->tag_name_str, infgetcw(inpf)); /* check for hsctag; if not, enable output */ if (hp->suppress_output && upstrncmp(nxtwd, HSC_TAGID, strlen(HSC_TAGID)) && strcmp(nxtwd, HSC_COMMENT_STR) && strcmp(nxtwd, HSC_ONLYCOPY_STR) ) { hp_enable_output(hp, "non-hsctag occured"); } if (!hp->suppress_output) { D(fprintf(stderr, DHL "tag <")); } } } if (!hp->fatal && !rplc_lt) { BOOL write_tag = FALSE; /* flag: write tag text & attrs to output? */ if (strcmp("/", nxtwd)) /* is it a closing tag? */ { /* * * process start-tag * */ open_tag = TRUE; if (!hp->suppress_output) { D(fprintf(stderr, "%s>\n", nxtwd)); } /* search for tag in list */ nd = find_dlnode(taglist->first, (APTR) nxtwd, cmp_strtag); if (nd == NULL) { hsc_message(hp, MSG_UNKN_TAG, /* tag not found */ "unknown %t", nxtwd); tag = new_hsctag(nxtwd); tag->option |= HT_UNKNOWN; unknown_tag = TRUE; #if 0 /* TODO: remove */ /* NOTE: This one's a bit perverted, because * the closing ">" is appended to the * attribute string, and the closing string * is left empty; as there is nearly no code * between setting and writing the strings, * I think this is more reasonable than doing * some tricky string-manipulation... */ skip_until_eot(hp, hp->tag_attr_str); clr_estr(hp->tag_close_str); #endif } else { tag = (HSCTAG *) nd->data; } /* set handle-function */ hnd = tag->o_handle; /* * handle options */ /* check for obsolete tag */ if (tag->option & HT_OBSOLETE) { hsc_message(hp, MSG_TAG_OBSOLETE, "%T is obsolete", tag); } /* check for jerk-tag */ if (tag->option & HT_JERK) { hsc_message(hp, MSG_TAG_JERK, "%T is only used by %j", tag); } /* only-once-tag occured twice? */ if ((tag->option & HT_ONLYONCE) && (tag->occured)) { hsc_message(hp, MSG_TAG_TOO_OFTEN, "%T occured too often", tag); } /* set occured-flag */ if (tag->option & (HT_ONLYONCE | HT_REQUIRED)) tag->occured = TRUE; /* check for "must be inside"/"not allowed within"-tags */ if (!check_mbinaw(hp, tag)) hnd = NULL; /* clear (reset to default) attribute values of tag */ clr_varlist(tag->attr); /* set attributes or check for ">" */ if (!(tag->option & HT_SPECIAL)) { tci = set_tag_args(hp, tag); if (tci == MCI_ERROR) { skip_until_eot(hp, NULL); hnd = NULL; } if (!hp->fatal) { /* set ">" in string that contains closing text */ if (!hp->compact) { set_estr(hp->tag_close_str, infgetcws(inpf)); } else { clr_estr(hp->tag_close_str); } app_estr(hp->tag_close_str, infgetcw(inpf)); /* check for succeeding white-space */ if ((tag->option & HT_WHTSPC) && !infeof(inpf)) { int ch = infgetc(inpf); if (hsc_whtspc(ch)) { if (hp->strip_badws) { hp->strip_next2_whtspc = TRUE; } else { hsc_message(hp, MSG_SUCC_WHTSPC, "succeeding white-space for %T", tag); } } inungetc(ch, inpf); } } } /* end-tag required? */ if (tag->option & HT_CLOSE) app_ctag(hp, tag); } else { /* * * process end-tag * */ /* get tag id */ nxtwd = infget_tagid(hp); /* get tag id */ open_tag = FALSE; /* append tag-name to tag_name_str */ if (!hp->compact) { app_estr(hp->tag_name_str, infgetcws(inpf)); } app_estr(hp->tag_name_str, infgetcw(inpf)); if (!hp->suppress_output) { D(fprintf(stderr, "/%s>\n", nxtwd)); } /* search for tag in taglist */ /* (see if it exists at all) */ nd = find_dlnode(taglist->first, (APTR) nxtwd, cmp_strtag); if (nd == NULL) { /* closing tag is absolutely unknown */ hsc_message(hp, MSG_UNKN_TAG, /* tag not found */ "unknown %c", nxtwd); skip_until_eot(hp, hp->tag_attr_str); } else { tag = (HSCTAG *) nd->data; /* fitting tag in taglist */ /* check for preceding white-spaces */ if ((tag->option & HT_WHTSPC) && anyWhtspc(hp)) { if (hp->strip_badws) { hp->strip_next_whtspc = TRUE; } else { hsc_message(hp, MSG_PREC_WHTSPC, "preceding white space for %C", tag); } } if (tag->option & (HT_CLOSE | HT_AUTOCLOSE)) { /* set closing handle */ hnd = tag->c_handle; /* check for no args */ if (!parse_wd(hp, ">")) { hsc_message(hp, MSG_CL_TAG_ARG, "no attributes allowed for end-tags"); } else { /* set ">" in string that contains closing text */ if (!hp->compact) { set_estr(hp->tag_close_str, infgetcws(inpf)); } app_estr(hp->tag_close_str, infgetcw(inpf)); } /* set values of attributes stored * in end-tag, * remove end-tag from stack */ remove_ctag(hp, tag); } else { /* illegal closing tag */ hsc_message(hp, MSG_ILLG_CTAG, /* tag not found */ "illegal %c", nxtwd); parse_gt(hp); tag = NULL; } } } /* * processed for opening AND closing tag */ write_tag = (!(tag) || !(tag->option & HT_NOCOPY)); if (tag) { /* * check if tag should be stripped */ if (!postprocess_tagattr(hp, tag, open_tag)) { /* stripped tag with external reference */ if (open_tag) hsc_msg_stripped_tag(hp, tag, "external reference"); hnd = NULL; /* don't call handle */ write_tag = FALSE; /* don't output tag */ } else if (hp->strip_tags && strenum(tag->name, hp->strip_tags, '|', STEN_NOCASE)) { /* strip tag requested by user */ if (!(tag->option & HT_SPECIAL)) { if (open_tag) hsc_msg_stripped_tag(hp, tag, "as requested"); hnd = NULL; /* don't call handle */ write_tag = FALSE; /* don't output tag */ } else { hsc_message(hp, MSG_TAG_CANT_STRIP, "can not strip special tag %T", tag); } /* * get values for size from reference */ } else if (tag->uri_size && get_vartext(tag->uri_size)) get_attr_size(hp, tag); } /* call handle if available */ if (hnd && !hp->fatal) hnd_result = (*hnd) (hp, tag); /* write whole tag out */ if (write_tag && hnd_result) { VOID(*tag_callback) (struct hscprocess * hp, HSCTAG * tag, STRPTR tag_name, STRPTR tag_attr, STRPTR tag_close) = NULL; if (open_tag) tag_callback = hp->CB_start_tag; else tag_callback = hp->CB_end_tag; /* write white spaces */ hsc_output_text(hp, "", ""); if (tag_callback) { (*tag_callback) (hp, tag, estr2str(hp->tag_name_str), estr2str(hp->tag_attr_str), estr2str(hp->tag_close_str)); } } /* skip LF if requested */ if (tag && (tag->option & HT_SKIPLF)) { skip_next_lf(hp); /* TODO: really skip single lf */ } /* remove temporary created tag */ if (unknown_tag) del_hsctag(tag); #if (defined MSDOS && (!defined HSC_TRIGGER)) #define UNLIKELY (10*1024) /* crash randomly */ if ((rand() % UNLIKELY) == (UNLIKELY / 2)) { enforcerHit(); } #endif } return (BOOL) (!hp->fatal); }
// modify overlapping site and indel records to be self-consistent: void VcfRecordBlocker:: GroomRecordBuffer() { const unsigned n_records(_recordBuffer.size()); #ifdef VDEBUG if (true) { std::cerr << "VDEBUG input: indel count: " << _indelIndex.size() << "\n"; for (unsigned i(0); i<n_records; ++i) { _recordBuffer[i].WriteUnaltered(std::cerr); } } #endif // create a map of 'covered' ploidy through the indel region based // on the first indel, any additional inside of the first must be // conflict: region_info rinfo; if (_indelIndex.size() > 1) { rinfo.filters.push_back(_opt.indel_conflict_label); } else { // set additional indel filters: const GatkVcfRecord& record(_recordBuffer[_indelIndex[0]]); const std::vector<std::string>& filters(record.GetFilter()); const unsigned n_filt(filters.size()); if ((n_filt!=1) || filters[0] != "PASS") { rinfo.filters = filters; } // set additional minq: rinfo.qual.str=record.GetQual().c_str(); rinfo.qual.is_valid=checked_double_parse(rinfo.qual.str,rinfo.qual.val); rinfo.gq.str=record.GetSampleVal("GQ"); rinfo.gq.is_valid=checked_double_parse(rinfo.gq.str,rinfo.gq.val); _gti.clear(); if (! record.GetGT().empty()) { parse_gt(record.GetGT().c_str(),_gti); } if (_gti.size() == 2) { if ((_gti[0]==0 && _gti[1]>0) || (_gti[1]==0 && _gti[0]>0)) { rinfo.copyn=1; } } } // 2) modify site records according to overlapping filter status (or mark all as IndelConflict) // bool is_edit(true); std::vector<refedit> edits; for (unsigned record_index(0); record_index<n_records; ++record_index) { GatkVcfRecord& record(_recordBuffer[record_index]); const int pos(record.GetPos()); const bool is_in_indel((pos>=_bufferStartPos) && (pos<=_bufferEndPos)); if (! is_in_indel) continue; const unsigned offset(pos-_bufferStartPos); adjust_overlap_record(_opt,rinfo,offset,record,is_edit,edits); // regroom record to account for quality value changes, etc: GroomInputRecord(record); } // 3) modify indel records according to any site conflicts or hemizygous snps present: // this is 90% done, but no easy way to make the per-allele tag adjustment reliable w/o parsing // header for all cases first. not worth pursuing for now... #if 0 if (is_edit && (! edits.empty())) { // we should only get here for simple het deletions: GatkVcfRecord& record(_recordBuffer[_indelIndex[0]]); std::string allele(record.GetRef()); bool is_diff(false); for (unsigned i(0); i<edits.size(); ++i) { if (allele[edits[i].first+1] != edits[i].second) { allele[edits[i].first+1] = edits[i].second; is_diff=true; } } if (is_diff) { // 1) insert new alternate allele // 2) update GT // 3) modify or delete all other allele dependent tags (this might just be AD in practice) std::vector<std::string>& alt(record.GetAlt()); alt.insert(alt.begin(),allele); record.SetSampleVal("GT","1/2"); } } #endif #ifdef VDEBUG if (true) { std::cerr << "VDEBUG output: indel count: " << _indelIndex.size() << "\n"; for (unsigned i(0); i<n_records; ++i) { _recordBuffer[i].WriteUnaltered(std::cerr); } } #endif }
static void adjust_overlap_record(const BlockerOptions& opt, const region_info& rinfo, const unsigned, GatkVcfRecord& record, bool&, std::vector<refedit>&) { // apply filters: const unsigned n_filt(rinfo.filters.size()); for (unsigned filt_index(0); filt_index<n_filt; ++filt_index) { record.AppendFilter(rinfo.filters[filt_index].c_str()); } //apply quality minimums: if (rinfo.qual.is_valid) { double record_qual(0.); const bool is_valid(checked_double_parse(record.GetQual().c_str(),record_qual)); if (is_valid && (rinfo.qual.val<record_qual)) { record.SetQual(rinfo.qual.str); } } if (rinfo.gq.is_valid) { double record_gq(0.); const bool is_valid(checked_double_parse(record.GetSampleVal("GQ"),record_gq)); if (is_valid && (rinfo.gq.val<record_gq)) { record.SetSampleVal("GQ",rinfo.gq.str); } } // change gt conflict status based on region_copyn assert(rinfo.copyn<2); if (rinfo.copyn==1) { std::vector<int> gti; if (! record.GetGT().empty()) { parse_gt(record.GetGT().c_str(),gti); } if (gti.size() == 2) { if (gti[0]==gti[1]) { if (gti[0]>=0) { std::ostringstream oss; oss << gti[0]; record.SetSampleVal("GT",oss.str().c_str()); record.DeleteSampleKeyVal("PL"); } else { set_record_to_unknown_gt(record); } } else { set_record_to_unknown_gt(record); record.AppendFilter(opt.site_conflict_label.c_str()); } } else if (gti.size() != 1) { set_record_to_unknown_gt(record); } } else { set_record_to_unknown_gt(record); } }
/* ** parse_vararg: read & check a attribute value */ STRPTR parse_vararg( HSCVAR *var, INFILE *inpf ) { STRPTR str_vararg = NULL; /* return value */ int ch; /* char read from input */ /* TODO: handle "<>" (reset var->text to NULL) */ infskip_ws( inpf ); /* disable log */ inflog_disable( inpf ); /* read var->quote char */ ch = infgetc( inpf ); if ( !strchr( VQ_STR_QUOTE, ch ) ) if ( ch != EOF ) var->quote = VQ_NO_QUOTE; else err_eof( inpf, "reading attribute" ); else var->quote = ch; /* warning if no quote */ if ( ( var->quote == VQ_NO_QUOTE ) && !( var->varflag & VF_NOQUOTE ) ) { message( MSG_ARG_NO_QUOTE, inpf ); errstr( "Argument without quote\n" ); } /* read arg string */ if ( var->quote == '<' ) { /* ** get arg from other var */ STRPTR nw = infgetw( inpf ); if ( nw ) { HSCVAR *refvar = find_varname( vars, nw ); if ( refvar ) { /* TODO: type checking */ var->quote = refvar->quote; str_vararg = refvar->text; /* check empty/circular reference */ if ( !str_vararg ) { message( MSG_EMPTY_SYMB_REF, inpf ); errstr( "Empty reference to" ); errsym( var->name ); errlf(); } /* debugging message */ DDA( fprintf( stderr, "** %s refers to <%s>\n", var->name, refvar->name ) ); } else { /* reference to unknown var */ message( MSG_UNKN_SYMB_REF, inpf ); errstr( "reference to unknown" ); errsym( nw ); errlf(); } if ( (!refvar) || (!str_vararg ) ) { /* return empty var */ var->quote = '"'; str_vararg = ""; } parse_gt( inpf ); } else err_eof( inpf, "reading attribute" ); } else if ( var->quote != EOF ) { /* ** get arg from input file */ BOOL end = FALSE; /* clear vararg or set with first char read */ if ( var->quote == VQ_NO_QUOTE ) end = !set_estr( vararg, ch2str( ch ) ); else end = !clr_estr( vararg ); if ( end ) err_mem( inpf ); /* ** read next char from input file until a ** closing quote if found. ** if the arg had no quote, a white space ** or a '>' is used to detect end of arg. ** if a LF is found, view error message */ while ( !end ) { ch = infgetc( inpf ); end = TRUE; if ( ch == EOF ) err_eof( inpf, "reading attribute" ); else if ( (ch==var->quote) || ( ch==CH_LF ) || ( (var->quote==VQ_NO_QUOTE) && ( inf_isws(ch,inpf) || ( ch=='>' ) ) ) ) { /* end of arg reached */ str_vararg = estr2str( vararg ); if ( var->quote == VQ_NO_QUOTE ) { if ( ch==CH_LF ) err_streol( inpf ); inungetc( ch, inpf ); } } else { /* append next char to vararg */ if ( !app_estrch( vararg, ch ) ) err_mem( inpf ); else end = FALSE; /* continue loop */ } } } if ( str_vararg && var ) /* ** check enum type */ if (var->vartype == VT_ENUM) check_enumstr( var, str_vararg, inpf ); /* ** parse uri (only if no macro-attr) ** (convert abs.uris, check existence) */ else if (var->vartype == VT_URI ) if ( !(var->varflag & VF_MACRO) ) str_vararg = parse_uri( str_vararg, inpf ); else { DDA( fprintf( stderr, "** didn't parse uri \"%s\"\n", str_vararg ) ); } /* update and enable log */ if ( !fatal_error ) { BOOL ok = TRUE; if ( var->quote != VQ_NO_QUOTE ) ok &= inflog_app( inpf, ch2str( var->quote ) );/* append quote */ inflog_app( inpf, str_vararg ); /* append arg */ if ( var->quote != VQ_NO_QUOTE ) ok &= inflog_app( inpf, ch2str( var->quote ) );/* append quote */ inflog_enable( inpf ); /* enable log */ if ( !ok ) err_mem( NULL ); } return ( str_vararg ); }