// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? // 12 3 4 5 6 7 8 9 static IteratorT parse(IteratorT begin, IteratorT end, Components & components) { // (([^:/?#]+):)? IteratorT scheme_begin = begin; IteratorT scheme_end = parse_scheme(scheme_begin, end); if (scheme_end != scheme_begin) { components.scheme_begin = scheme_begin; components.scheme_end = scheme_end - 1; } IteratorT hierarchy_begin = parse_constant(":", scheme_end, end); IteratorT hierarchy_end = parse_hierarchy(hierarchy_begin, end, components.hierarchy); if (hierarchy_end == hierarchy_begin) { return hierarchy_begin; } components.hierarchy_begin = hierarchy_begin; components.hierarchy_end = hierarchy_end; IteratorT query_begin = hierarchy_end; IteratorT query_end = parse_meta("?", query_begin, end); if (query_end != query_begin) { components.query_begin = query_begin; components.query_end = query_end; } IteratorT fragment_begin = query_end; IteratorT fragment_end = parse_meta("#", fragment_begin, end); if (fragment_end != fragment_begin) { components.fragment_begin = fragment_begin; components.fragment_end = fragment_end; } components.complete = true; return fragment_end; }
void header::reparse() { unsigned int index = 0; has_idx = false; contig_index = 0; vector<string> old_lines(lines.size(),""); copy(lines.begin(), lines.end(), old_lines.begin()); lines.resize(0); INFO_map.clear(); INFO_reverse_map.clear(); FILTER_map.clear(); FILTER_reverse_map.clear(); FORMAT_map.clear(); FORMAT_reverse_map.clear(); CONTIG_map.clear(); CONTIG_reverse_map.clear(); index += add_FILTER_descriptor("ID=PASS,Description=PASS", index); for (unsigned int ui=0; ui<old_lines.size(); ui++) parse_meta(old_lines[ui],index); }
bool load_map(std::string filename) { Json::Value root; // will contains the root value after parsing. Json::Reader reader; std::ifstream file( filename.c_str(), std::ifstream::binary); bool parsingSuccessful = reader.parse( file, root ); if ( !parsingSuccessful ) { // report to the user the failure and their locations in the document. std::cout << "Failed to parse configuration\n" << reader.getFormattedErrorMessages(); return 0; } parse_meta(root); parse_layers(root); return 1; }
void lt_XMLParser::Impl::parse(const lt_XMLTags &tags, GURL *pdjvufile) { const GPList<lt_XMLTags> Body(tags.get_Tags(bodytag)); GPosition pos=Body; if(!pos || (pos != Body.lastpos())) { G_THROW( ERR_MSG("XMLAnno.extra_body") ); } const GP<lt_XMLTags> GBody(Body[pos]); if(!GBody) { G_THROW( ERR_MSG("XMLAnno.no_body") ); } GMap<GUTF8String,GP<lt_XMLTags> > Maps; lt_XMLTags::get_Maps(maptag,"name",Body,Maps); const GPList<lt_XMLTags> Objects(GBody->get_Tags(objecttag)); lt_XMLTags::get_Maps(maptag,"name",Objects,Maps); for(GPosition Objpos=Objects;Objpos;++Objpos) { lt_XMLTags &GObject=*Objects[Objpos]; // Map of attributes to value (e.g. "width" --> "500") const GMap<GUTF8String,GUTF8String> &args=GObject.get_args(); GURL codebase; { DEBUG_MSG("Setting up codebase... m_codebase = " << m_codebase << "\n"); GPosition codebasePos=args.contains("codebase"); // If user specified a codebase attribute, assume it is correct (absolute URL): // the GURL constructor will throw an exception if it isn't if(codebasePos) { codebase=GURL::UTF8(args[codebasePos]); }else if (m_codebase.is_dir()) { codebase=m_codebase; }else { codebase=GURL::Filename::UTF8(GOS::cwd()); } DEBUG_MSG("codebase = " << codebase << "\n"); } // the data attribute specifies the input file. This can be // either an absolute URL (starts with file:/) or a relative // URL (for now, just a path and file name). If it's absolute, // our GURL will adequately wrap it. If it's relative, we need // to use the codebase attribute to form an absolute URL first. GPosition datapos=args.contains("data"); if(datapos) { bool isDjVuType=false; GPosition typePos(args.contains("type")); if(typePos) { if(args[typePos] != mimetype) { // DjVuPrintErrorUTF8("Ignoring %s Object tag\n",mimetype); continue; } isDjVuType=true; } const GURL url = (pdjvufile) ? *pdjvufile : GURL::UTF8(args[datapos], (args[datapos][0] == '/') ? codebase.base() : codebase); int width; { GPosition widthPos=args.contains("width"); width=(widthPos)?args[widthPos].toInt():0; } int height; { GPosition heightPos=args.contains("height"); height=(heightPos)?args[heightPos].toInt():0; } GUTF8String gamma; GUTF8String dpi; GUTF8String page; GUTF8String do_ocr; { GPosition paramPos(GObject.contains(paramtag)); if(paramPos) { const GPList<lt_XMLTags> Params(GObject[paramPos]); for(GPosition loc=Params;loc;++loc) { const GMap<GUTF8String,GUTF8String> &pargs=Params[loc]->get_args(); GPosition namepos=pargs.contains("name"); if(namepos) { GPosition valuepos=pargs.contains("value"); if(valuepos) { const GUTF8String name=pargs[namepos].downcase(); const GUTF8String &value=pargs[valuepos]; if(name == "flags") { GMap<GUTF8String,GUTF8String> args; lt_XMLTags::ParseValues(value,args,true); if(args.contains("page")) { page=args["page"]; } if(args.contains("dpi")) { dpi=args["dpi"]; } if(args.contains("gamma")) { gamma=args["gamma"]; } if(args.contains("ocr")) { do_ocr=args["ocr"]; } }else if(name == "page") { page=value; }else if(name == "dpi") { dpi=value; }else if(name == "gamma") { gamma=value; }else if(name == "ocr") { do_ocr=value; } } } } } } const GP<DjVuFile> dfile(get_file(url,page)); if(dpi.is_int() || gamma.is_float()) { int pos=0; ChangeInfo(*dfile,dpi.toInt(),gamma.toDouble(pos,pos)); } parse_anno(width,height,GObject,Maps,*dfile); parse_meta(GObject,*dfile); parse_text(width,height,GObject,*dfile); ChangeTextOCR(do_ocr,width,height,dfile); } } }
// Read VCF file void vcf_file::scan_file(const string &chr, const string &exclude_chr, bool force_write_index) { bool filter_by_chr = (chr != ""); bool exclude_by_chr = (exclude_chr != ""); string index_filename = filename + ".vcfidx"; bool could_read_index_file = false; if (force_write_index == false) could_read_index_file = read_index_file(index_filename); string CHROM, last_CHROM=""; int POS, last_POS = -1; if (could_read_index_file == false) { printLOG("Building new index file.\n"); string line, CHROM, last_CHROM = ""; streampos filepos; char c; N_entries=0; N_indv = 0; while (!feof()) { filepos = get_filepos(); c = peek(); if ((c == '\n') || (c == '\r')) { read_line(line); continue; } else if (c == EOF) break; if (c == '#') { read_line(line); if (line[1] == '#') { // Meta information parse_meta(line); } else { // Must be header information: #CHROM POS ID REF ALT QUAL FILTER INFO (FORMAT NA00001 NA00002 ... ) parse_header(line); } } else { // Must be a data line read_CHROM_and_POS_and_skip_remainder_of_line(CHROM, POS); if (last_CHROM != CHROM) { printLOG("\tScanning Chromosome: " + CHROM + "\n"); last_CHROM = CHROM; } if (POS == last_POS) { one_off_warning("\tWarning - file contains entries with the same position. This is not supported by vcftools, and may cause unexpected behaviour.\n"); } last_POS = POS; entry_file_locations.push_back(filepos); N_entries++; } } write_index_file(index_filename); } printLOG("File contains " + int2str(N_entries) + " entries and " + int2str(N_indv) + " individuals.\n"); vector<string> meta_lines = meta; meta.resize(0); for (unsigned int ui=0; ui<meta_lines.size(); ui++) parse_meta(meta_lines[ui]); has_genotypes = (N_indv > 0); bool already_found_required_chr = false; bool already_filtered_required_chr = false; if ((exclude_by_chr == true) || (filter_by_chr == true)) { printLOG("Filtering by chromosome.\n"); for (unsigned int ui=0; ui<N_entries; ui++) { if (already_found_required_chr == true) { printLOG("Skipping Remainder.\n"); entry_file_locations.erase(entry_file_locations.begin()+ui, entry_file_locations.end()); break; } if (already_filtered_required_chr == true) { printLOG("Skipping Remainder.\n"); break; } set_filepos(entry_file_locations[ui]); read_CHROM_only(CHROM); if (last_CHROM != CHROM) { printLOG("\tChromosome: " + CHROM + "\n"); if ((filter_by_chr == true) && (last_CHROM == chr)) already_found_required_chr = true; if ((exclude_by_chr == true) && (last_CHROM == exclude_chr)) already_filtered_required_chr = true; last_CHROM = CHROM; } if ((exclude_by_chr == true) && (CHROM == exclude_chr)) { entry_file_locations[ui] = -1; continue; } if ((filter_by_chr == true) && (CHROM != chr)) { entry_file_locations[ui] = -1; continue; } } sort(entry_file_locations.begin(), entry_file_locations.end()); while((entry_file_locations.size() > 0) && (entry_file_locations[0] < 0)) entry_file_locations.pop_front(); N_entries = entry_file_locations.size(); printLOG("Keeping " + int2str(N_entries) + " entries on specified chromosomes.\n"); } include_indv.clear(); include_indv.resize(N_indv, true); include_entry.clear(); include_entry.resize(N_entries, true); include_genotype.clear(); include_genotype.resize(N_entries, vector<bool>(N_indv, true)); }
// Read VCF file void vcf_file::scan_file(const string &chr, const string &exclude_chr) { printLOG("Scanning " + filename + " ... \n"); bool filter_by_chr = (chr != ""); bool exclude_by_chr = (exclude_chr != ""); string line, tmp; N_indv = 0; unsigned int N_read = 0; istringstream ss; string last_CHROM = ""; N_entries=0; string CHROM; bool finish = false; int last_POS = -1; int POS; streampos filepos; while(!feof()) { filepos = get_filepos(); read_line(line); if (line.length() <= 2) continue; if (line[0] == '#') { if (line[1] == '#') { // Meta information parse_meta(line); } else { // Must be header information: #CHROM POS ID REF ALT QUAL FILTER INFO (FORMAT NA00001 NA00002 ... ) parse_header(line); } } else { // Must be a data line ss.clear(); ss.str(line); ss >> CHROM; N_read++; if ((filter_by_chr == true) && (last_CHROM == chr) && (CHROM != chr)) { // Presuming the file to be sorted (it should be), we have already found the chromosome we wanted, so there's no need to continue. printLOG("\tCompleted reading required chromosome. Skipping remainder of file.\n"); finish = true; break; } if (CHROM != last_CHROM) { printLOG("Currently scanning CHROM: " + CHROM); if ((exclude_by_chr == true) && (CHROM == exclude_chr)) printLOG(" - excluded."); printLOG("\n"); last_CHROM = CHROM; last_POS = -1; } if ((exclude_by_chr == true) && (CHROM == exclude_chr)) continue; if (filter_by_chr == true) { // For speed, only parse the entry if it's needed if (CHROM == chr) { ss >> POS; if (POS < last_POS) error("VCF file is not sorted at: " + CHROM + ":" + int2str(POS)); last_POS = POS; entry_file_locations.push_back(filepos); N_entries++; } } else { ss >> POS; if (POS < last_POS) error("VCF file is not sorted at: " + CHROM + ":" + int2str(POS)); last_POS = POS; entry_file_locations.push_back(filepos); N_entries++; } }