예제 #1
0
VcfRecord::
VcfRecord(const istream_line_splitter& vparse)
{
    const unsigned ws(vparse.n_word());
    if (static_cast<int>(ws) <= VCFID::INFO) {
        std::ostringstream oss;
        oss << "Too few fields (" << ws << ") in vcf record input:\n";
        vparse.dump(oss);
        throw blt_exception(oss.str().c_str());
    }
    
    _chrom = vparse.word[VCFID::CHROM];
    
    const char* pos_ptr(vparse.word[VCFID::POS]);
    _pos = parse_unsigned(pos_ptr);

    _id = vparse.word[VCFID::ID];

    _ref = vparse.word[VCFID::REF];
    assert(_ref.size() > 0);

    Splitter(vparse.word[VCFID::ALT],',',_alt);

    for(unsigned i(0);i<_alt.size();++i) {
        assert(_alt[i].size() > 0);
    }

    _qual = vparse.word[VCFID::QUAL];

    Splitter(vparse.word[VCFID::FILT],';',_filt);

    Splitter(vparse.word[VCFID::INFO],';',_info);
    
    if(ws > VCFID::FORMAT) {
        Splitter(vparse.word[VCFID::FORMAT],':',_format);
    }
    
    if(ws > VCFID::SAMPLE) {
        Splitter(vparse.word[VCFID::SAMPLE],':',_sample);
    }
    
    if(_format.size() != _sample.size()) {
        std::ostringstream oss;
        oss << "FORMAT and SAMPLE fields do not agree for vcf record:\n";
        vparse.dump(oss);
        throw blt_exception(oss.str().c_str());
    }
}
예제 #2
0
파일: client.cpp 프로젝트: L4nz/cheetah
/*******************************************************************************
 * Event handlers
 *******************************************************************************
 */
void Client::on_connected() {
    Splitter channels = Splitter(config.irc_channels, " ");
    if (channels.size() > 0) {
        for (Splitter::size_type i = 0; i < channels.size(); i++) {
            join(channels[i]);
        }
    } else {
        std::cout << "No channels to join specified." << std::endl;
        exit(1);
    }
}
예제 #3
0
파일: config.cpp 프로젝트: kulseran/FISHY
/**
 * Parses a config file with lines of the form
 *
 *     flag = value
 *
 * and sets flags as appropriate.
 */
Status ParseConfigFile() {
  Trace();
  if (!g_configFile.wasSet()) {
    Log(LL::Trace) << "No flag config file specified.";
    return Status::ok();
  }
  const std::string &filename = g_configFile.get();
  Log(LL::Trace) << "Begining parse of config file: " << filename;

  std::ifstream ifile(filename.c_str());
  if (!ifile.is_open()) {
    Log(LL::Trace) << "Config file " << filename << " not found.";
    return Status(Status::NOT_FOUND);
  }

  std::string line;
  int lineno = 0;
  while (std::getline(ifile, line)) {
    if (line.empty()) {
      lineno++;
      continue;
    }

    std::vector< std::string > argstr =
        Splitter().on('=').trimWhitespace().split(line, 2);

    RET_SM(
        argstr.size() == 2,
        Status::BAD_INPUT,
        "Error in config file \"" << filename << "\" "
                                  << "on line " << lineno
                                  << " is not a valid flag=value pair");

    tFlagIter itr = GetGlobalFlags().find(argstr[0]);
    RET_SM(
        itr != GetGlobalFlags().end(),
        Status::BAD_INPUT,
        "Unknown flag: " << argstr[0] << " @ line " << lineno);

    iFlagBase *pFlag = itr->second;
    RET_SM(
        pFlag->fromString(TrimQuotes(argstr[1])),
        Status::BAD_INPUT,
        "Invalid flag value for " << argstr[0] << " at line " << lineno
                                  << ". can't parse: " << argstr[1]);

    lineno++;
  }
  return Status::ok();
}
예제 #4
0
파일: config.cpp 프로젝트: kulseran/FISHY
Status ParseFlags(const int argc, const char **argv) {
  Trace();

  for (int i = 0; i < argc; ++i) {
    Log(LL::Info) << "argv[" << i << "] = " << argv[i];
  }

  for (int i = 1; i < argc; ++i) {
    const char *arg = argv[i];
    if ((arg[0] != '-') || (arg[0] != '\0' && arg[1] != '-')) {
      Log(LL::Error) << "Unknown commandline input #" << i << ": " << arg;
      return Status::BAD_ARGUMENT;
    }

    std::vector< std::string > argstr =
        Splitter().on('=').trimWhitespace().split(std::string(arg + 2), 2);
    RET_SM(!argstr.empty(), Status::BAD_ARGUMENT, "Unknown flag: " << arg);

    tFlagIter itr = GetGlobalFlags().find(argstr[0]);
    RET_SM(
        itr != GetGlobalFlags().end(),
        Status::BAD_ARGUMENT,
        "Unknown flag: " << arg);

    iFlagBase *pFlag = itr->second;

    if (argstr.size() == 2) {
      // Parse case where we have [flag][=][value]
      RET_SM(
          pFlag->fromString(argstr.at(1)),
          Status::BAD_ARGUMENT,
          "Invalid flag value for " << argstr[0] << " can't parse "
                                    << argstr[1]);
    } else if (i + 1 < argc) {
      // Parse case where we have [flag][space][value]
      ++i;
      RET_SM(
          pFlag->fromString(std::string(argv[i])),
          Status::BAD_ARGUMENT,
          "Invalid flag value for " << argstr[0] << " can't parse " << argv[i]);
    } else {
      // Error on case where we have [flag] alone.  Booleans must be specified
      // as [flag][=][true]
      Log(LL::Error) << "Found flag with no value: " << argstr[0];
      return Status::BAD_ARGUMENT;
    }
  }

  return ParseConfigFile();
}
void KMerHamClusterer::cluster(const std::string &prefix,
                               const KMerData &data,
                               ConcurrentDSU &uf) {
  // First pass - split & sort the k-mers
  std::ostringstream tmp;
  tmp << prefix << ".first";
  std::string fname(tmp.str());
  std::ofstream ofs(fname, std::ios::out | std::ios::binary);
  VERIFY(ofs.good());

  INFO("Serializing sub-kmers.");
  for (unsigned i = 0; i < tau_ + 1; ++i) {
    size_t from = (*Globals::subKMerPositions)[i];
    size_t to = (*Globals::subKMerPositions)[i+1];

    INFO("Serializing: [" << from << ", " << to << ")");
    serialize(ofs, data, NULL,
              SubKMerPartSerializer(from, to));
  }
  VERIFY(!ofs.fail());
  ofs.close();

  size_t big_blocks1 = 0;
  {
    INFO("Splitting sub-kmers, pass 1.");
    SubKMerSplitter Splitter(fname, fname + ".blocks");
    std::pair<size_t, size_t> stat = Splitter.split();
    INFO("Splitting done."
            " Processed " << stat.first << " blocks."
            " Produced " << stat.second << " blocks.");

    // Sanity check - there cannot be more blocks than tau + 1 times of total
    // kmer number. And on the first pass we have only tau + 1 input blocks!
    VERIFY(stat.first == tau_ + 1);
    VERIFY(stat.second <= (tau_ + 1) * data.size());

    // Ok, now in the files we have everything grouped in blocks in the output files.

    std::vector<size_t> block;

    INFO("Merge sub-kmers, pass 1");
    SubKMerBlockFile blocks(fname + ".blocks", /* unlink */ true);

    std::ostringstream tmp;
    tmp << prefix << ".second";
    fname = tmp.str();

    ofs.open(fname, std::ios::out | std::ios::binary);
    VERIFY(ofs.good());
    while (blocks.get_block(block)) {
      unsigned block_thr = cfg::get().hamming_blocksize_quadratic_threshold;
      if (block.size() < block_thr) {
        // Merge small blocks.
        processBlockQuadratic(uf, block, data, tau_);
      } else {
        big_blocks1 += 1;
        // Otherwise - dump for next iteration.
        for (unsigned i = 0; i < tau_ + 1; ++i) {
          serialize(ofs, data, &block,
                    SubKMerStridedSerializer(i, tau_ + 1));
        }
      }
    }
    VERIFY(!ofs.fail());
    ofs.close();
    INFO("Merge done, total " << big_blocks1 << " new blocks generated.");
  }

  size_t big_blocks2 = 0;
  {
    INFO("Spliting sub-kmers, pass 2.");
    SubKMerSplitter Splitter(fname, fname + ".blocks");
    std::pair<size_t, size_t> stat = Splitter.split();
    INFO("Splitting done."
            " Processed " << stat.first << " blocks."
            " Produced " << stat.second << " blocks.");

    // Sanity check - there cannot be more blocks than tau + 1 times of total
    // kmer number. And there should be tau + 1 times big_blocks input blocks.
    VERIFY(stat.first == (tau_ + 1)*big_blocks1);
    VERIFY(stat.second <= (tau_ + 1) * (tau_ + 1) * data.size());

    INFO("Merge sub-kmers, pass 2");
    SubKMerBlockFile blocks(fname + ".blocks", /* unlink */ true);
    std::vector<size_t> block;

    size_t nblocks = 0;
    while (blocks.get_block(block)) {
      if (block.size() > 50) {
        big_blocks2 += 1;
#if 0
        for (size_t i = 0; i < block.size(); ++i) {
          std::string s(Globals::blob + data[block[i]], K);
          INFO("" << block[i] << ": " << s);
        }
#endif
      }
      processBlockQuadratic(uf, block, data, tau_);
      nblocks += 1;
    }
    INFO("Merge done, saw " << big_blocks2 << " big blocks out of " << nblocks << " processed.");
  }
}