IterationReport VarDBase::vcf_iterate( void (*f)(Variant&, void *) , void * data , Mask & mask ) { IterationReport irep( true , mask.any_grouping() , mask.variant_limit() ); // VCF file name is kept in the Mask, by the 'ex-vcf' attribute std::string filename = mask.external_vcf_filename(); if ( filename != "-" ) Helper::checkFileExists( filename ); // Use VCFReader, into a temporary :memory: database // Load, parse VCF file; store variant and genotype information, and // meta-information, in vardb File vcffile( filename , VCF ); VCFReader v( &vcffile , "" , &(GP->vardb) , NULL ); // hack if ( filename == "-" ) { v.observed_header( true ); v.set_number_individuals( GP->indmap.size() ); } if ( mask.fixxy() ) { v.set_fixxy( &mask , &(GP->locdb), &(GP->inddb) ); } // Selectively filter in/out meta-information? // or, add a region filter? // We might not want to load the entire VCF into memory; & thus allow // the includes/excludes and filters to bring regions/tags into view // Respect 'reg', and 'loc' from command line. // But not loc.subset; ereg, loc.req, loc.ex, etc std::set<Region> filter; std::string locinc = mask.loc_include_string(); int l2 = 0; if ( locinc != "" && locinc != "." ) { if ( ! Helper::str2int( locinc , l2 ) ) Helper::halt("could not parse 'loc' mask"); if ( l2 > 0 ) filter = GP->locdb.get_regions( l2 ); } std::set<Region> reginc = mask.included_reg(); std::set<Region>::iterator ii = reginc.begin(); while ( ii != reginc.end() ) { filter.insert( *ii ); ++ii; } // Add other "reg" from mask? if ( filter.size() > 0 ) v.set_region_mask( &filter ); if ( mask.id() ) v.add_id_filter( mask.included_id() ); // Misc. settings. downcode_mode = mask.downcode(); // Work through VCF GP->vardb.begin(); int inserted = 0; v.return_variant( true ); Variant * pv = NULL; while ( 1 ) { VCFReader::line_t l = v.parseLine( &pv ); if ( l == VCFReader::VCF_EOF ) break; if ( l == VCFReader::VCF_INVALID ) { continue; } // If a variant line has been processed and meets criteria, pv // will be non-NULL which also implies that a Variant has been // created and we are reponsible for cleaning up afterwards if ( l == VCFReader::VCF_VARIANT && pv ) { // bad line, or failed a loc mask filter if ( ! pv->valid() ) { irep.rejected_variant(); delete pv; continue; } // So that the Variant functions know not to look for data // in a BLOB; also, they they know how to parse it downstream pv->set_vcf_buffer( v.gt_field , &v.formats ); // Apply all mask filters, and decide whether to call function if ( eval_and_call( mask, &(GP->indmap) , *pv , f , data ) ) { if ( ! irep.accepted_variant() ) break; } else { irep.rejected_variant(); } // and now clean up delete pv; } } // Wrap up GP->vardb.commit(); // If we had to use any positional filters or grouping, now run the // actual iterate on the temporary database we've created return irep; }