VcfRecord:: VcfRecord(const istream_line_splitter& vparse) { const unsigned ws(vparse.n_word()); if (static_cast<int>(ws) <= VCFID::INFO) { std::ostringstream oss; oss << "Too few fields (" << ws << ") in vcf record input:\n"; vparse.dump(oss); throw blt_exception(oss.str().c_str()); } _chrom = vparse.word[VCFID::CHROM]; const char* pos_ptr(vparse.word[VCFID::POS]); _pos = parse_unsigned(pos_ptr); _id = vparse.word[VCFID::ID]; _ref = vparse.word[VCFID::REF]; assert(_ref.size() > 0); Splitter(vparse.word[VCFID::ALT],',',_alt); for(unsigned i(0);i<_alt.size();++i) { assert(_alt[i].size() > 0); } _qual = vparse.word[VCFID::QUAL]; Splitter(vparse.word[VCFID::FILT],';',_filt); Splitter(vparse.word[VCFID::INFO],';',_info); if(ws > VCFID::FORMAT) { Splitter(vparse.word[VCFID::FORMAT],':',_format); } if(ws > VCFID::SAMPLE) { Splitter(vparse.word[VCFID::SAMPLE],':',_sample); } if(_format.size() != _sample.size()) { std::ostringstream oss; oss << "FORMAT and SAMPLE fields do not agree for vcf record:\n"; vparse.dump(oss); throw blt_exception(oss.str().c_str()); } }
/******************************************************************************* * Event handlers ******************************************************************************* */ void Client::on_connected() { Splitter channels = Splitter(config.irc_channels, " "); if (channels.size() > 0) { for (Splitter::size_type i = 0; i < channels.size(); i++) { join(channels[i]); } } else { std::cout << "No channels to join specified." << std::endl; exit(1); } }
/** * Parses a config file with lines of the form * * flag = value * * and sets flags as appropriate. */ Status ParseConfigFile() { Trace(); if (!g_configFile.wasSet()) { Log(LL::Trace) << "No flag config file specified."; return Status::ok(); } const std::string &filename = g_configFile.get(); Log(LL::Trace) << "Begining parse of config file: " << filename; std::ifstream ifile(filename.c_str()); if (!ifile.is_open()) { Log(LL::Trace) << "Config file " << filename << " not found."; return Status(Status::NOT_FOUND); } std::string line; int lineno = 0; while (std::getline(ifile, line)) { if (line.empty()) { lineno++; continue; } std::vector< std::string > argstr = Splitter().on('=').trimWhitespace().split(line, 2); RET_SM( argstr.size() == 2, Status::BAD_INPUT, "Error in config file \"" << filename << "\" " << "on line " << lineno << " is not a valid flag=value pair"); tFlagIter itr = GetGlobalFlags().find(argstr[0]); RET_SM( itr != GetGlobalFlags().end(), Status::BAD_INPUT, "Unknown flag: " << argstr[0] << " @ line " << lineno); iFlagBase *pFlag = itr->second; RET_SM( pFlag->fromString(TrimQuotes(argstr[1])), Status::BAD_INPUT, "Invalid flag value for " << argstr[0] << " at line " << lineno << ". can't parse: " << argstr[1]); lineno++; } return Status::ok(); }
Status ParseFlags(const int argc, const char **argv) { Trace(); for (int i = 0; i < argc; ++i) { Log(LL::Info) << "argv[" << i << "] = " << argv[i]; } for (int i = 1; i < argc; ++i) { const char *arg = argv[i]; if ((arg[0] != '-') || (arg[0] != '\0' && arg[1] != '-')) { Log(LL::Error) << "Unknown commandline input #" << i << ": " << arg; return Status::BAD_ARGUMENT; } std::vector< std::string > argstr = Splitter().on('=').trimWhitespace().split(std::string(arg + 2), 2); RET_SM(!argstr.empty(), Status::BAD_ARGUMENT, "Unknown flag: " << arg); tFlagIter itr = GetGlobalFlags().find(argstr[0]); RET_SM( itr != GetGlobalFlags().end(), Status::BAD_ARGUMENT, "Unknown flag: " << arg); iFlagBase *pFlag = itr->second; if (argstr.size() == 2) { // Parse case where we have [flag][=][value] RET_SM( pFlag->fromString(argstr.at(1)), Status::BAD_ARGUMENT, "Invalid flag value for " << argstr[0] << " can't parse " << argstr[1]); } else if (i + 1 < argc) { // Parse case where we have [flag][space][value] ++i; RET_SM( pFlag->fromString(std::string(argv[i])), Status::BAD_ARGUMENT, "Invalid flag value for " << argstr[0] << " can't parse " << argv[i]); } else { // Error on case where we have [flag] alone. Booleans must be specified // as [flag][=][true] Log(LL::Error) << "Found flag with no value: " << argstr[0]; return Status::BAD_ARGUMENT; } } return ParseConfigFile(); }
void KMerHamClusterer::cluster(const std::string &prefix, const KMerData &data, ConcurrentDSU &uf) { // First pass - split & sort the k-mers std::ostringstream tmp; tmp << prefix << ".first"; std::string fname(tmp.str()); std::ofstream ofs(fname, std::ios::out | std::ios::binary); VERIFY(ofs.good()); INFO("Serializing sub-kmers."); for (unsigned i = 0; i < tau_ + 1; ++i) { size_t from = (*Globals::subKMerPositions)[i]; size_t to = (*Globals::subKMerPositions)[i+1]; INFO("Serializing: [" << from << ", " << to << ")"); serialize(ofs, data, NULL, SubKMerPartSerializer(from, to)); } VERIFY(!ofs.fail()); ofs.close(); size_t big_blocks1 = 0; { INFO("Splitting sub-kmers, pass 1."); SubKMerSplitter Splitter(fname, fname + ".blocks"); std::pair<size_t, size_t> stat = Splitter.split(); INFO("Splitting done." " Processed " << stat.first << " blocks." " Produced " << stat.second << " blocks."); // Sanity check - there cannot be more blocks than tau + 1 times of total // kmer number. And on the first pass we have only tau + 1 input blocks! VERIFY(stat.first == tau_ + 1); VERIFY(stat.second <= (tau_ + 1) * data.size()); // Ok, now in the files we have everything grouped in blocks in the output files. std::vector<size_t> block; INFO("Merge sub-kmers, pass 1"); SubKMerBlockFile blocks(fname + ".blocks", /* unlink */ true); std::ostringstream tmp; tmp << prefix << ".second"; fname = tmp.str(); ofs.open(fname, std::ios::out | std::ios::binary); VERIFY(ofs.good()); while (blocks.get_block(block)) { unsigned block_thr = cfg::get().hamming_blocksize_quadratic_threshold; if (block.size() < block_thr) { // Merge small blocks. processBlockQuadratic(uf, block, data, tau_); } else { big_blocks1 += 1; // Otherwise - dump for next iteration. for (unsigned i = 0; i < tau_ + 1; ++i) { serialize(ofs, data, &block, SubKMerStridedSerializer(i, tau_ + 1)); } } } VERIFY(!ofs.fail()); ofs.close(); INFO("Merge done, total " << big_blocks1 << " new blocks generated."); } size_t big_blocks2 = 0; { INFO("Spliting sub-kmers, pass 2."); SubKMerSplitter Splitter(fname, fname + ".blocks"); std::pair<size_t, size_t> stat = Splitter.split(); INFO("Splitting done." " Processed " << stat.first << " blocks." " Produced " << stat.second << " blocks."); // Sanity check - there cannot be more blocks than tau + 1 times of total // kmer number. And there should be tau + 1 times big_blocks input blocks. VERIFY(stat.first == (tau_ + 1)*big_blocks1); VERIFY(stat.second <= (tau_ + 1) * (tau_ + 1) * data.size()); INFO("Merge sub-kmers, pass 2"); SubKMerBlockFile blocks(fname + ".blocks", /* unlink */ true); std::vector<size_t> block; size_t nblocks = 0; while (blocks.get_block(block)) { if (block.size() > 50) { big_blocks2 += 1; #if 0 for (size_t i = 0; i < block.size(); ++i) { std::string s(Globals::blob + data[block[i]], K); INFO("" << block[i] << ": " << s); } #endif } processBlockQuadratic(uf, block, data, tau_); nblocks += 1; } INFO("Merge done, saw " << big_blocks2 << " big blocks out of " << nblocks << " processed."); } }