void PhosimParser::readSegmentation(std::istream& in_stream) {
     // Read segmentation.txt
     std::string line;
     while (std::getline(in_stream, line, '\n')) {
         std::vector<std::string> tokens;
         bool goodLine = stringTokenize(line,tokens);
         if(!goodLine){
             continue;
         }
         std::string chipID(tokens[0]);
         std::string value(tokens[1]);
         for (size_t i(2); i < tokens.size(); i++) {
             value += " " + tokens[i];
         }
         std::vector<PhosimPar> values;
         values.push_back(PhosimPar(value));
         m_data.insert(std::make_pair(chipID, values));
         int numAmplifiers = std::atoi(tokens[1].c_str());
         for (int j(0); j < numAmplifiers; j++) {
             std::string line2;
             std::getline(in_stream, line2, '\n');
             std::vector<std::string> tokens2;
             bool goodLine = stringTokenize(line2,tokens2);
             if(!goodLine){
                 j--;
                 continue;
             }
             std::string value2(tokens2[0]);
             for (size_t i(1); i < tokens2.size(); i++) {
                 value2 += " " + tokens2[i];
             }
             m_data[chipID].push_back(PhosimPar(value2));
         }
     }
 }
int main(int argc, char *argv[]) {
  RangeList rl;
  rl.addRangeFile("RangeList_test_input");
  std::string range;
  for (unsigned int i = 0; i < rl.size(); i++) {
    rl.obtainRange(i, &range);
    printf("%s\n", range.c_str());
  }

  std::string s = "a b\"MID\" c d";
  std::vector<std::string> result;
  unsigned int ret = stringTokenize(s, ' ', &result);
  printf("ret = %d\n", ret);
  dumpStringVector(result);

  ret = stringTokenize(s, "\" ", &result);
  printf("ret = %d\n", ret);
  dumpStringVector(result);

  s = "";
  ret = stringTokenize(s, " ", &result);
  printf("ret = %d\n", ret);
  dumpStringVector(result);

  return 0;
}
Exemple #3
0
int FormulaVector::add(const std::string& response,
                       const std::string& predictor) {
  std::vector<std::string> y;
  std::vector<std::string> z;

  stringTokenize(response, ",+", &y);
  dedup(&y);

  stringTokenize(predictor, ",+", &z);
  dedup(&z);

  return add(y, z);
}
/**
 * @return -1: if there is missing genotypes
 */
int countVariant(const std::string& indv) {
  size_t pos = indv.find(":");
  if (pos == std::string::npos) {
    pos = indv.size();
  }
  std::string gt = indv.substr(0, pos);

  // if there are missing genotypes, we will count this genotype as missing
  if (gt.find(".") != std::string::npos) {
    return -1;
  }

  std::vector<std::string> fd;
  stringTokenize(gt, "/|", &fd);
  if (fd.size() > 2) {
    fprintf(stderr, "Strange genotype: %s\n", gt.c_str());
    return -1;
  }
  int count = 0;
  int v = 0;
  for (size_t i = 0; i < fd.size(); ++i) {
    if (str2int(fd[i], &v)) {
      count += v;
    } else {
      fprintf(stderr, "Strange genotype: %s\n", gt.c_str());
      return -1;
    }
  }
  return count;
}
 void PhosimParser::read_stream(std::istream & in_stream) {
     // Each line is parsed as a key-value pair assuming space-delimited
     // fields.  The first field is the key and the remaining fields are
     // re-concatenated as a single string.
     std::string line;
     while (std::getline(in_stream, line, '\n')) {
         std::vector<std::string> tokens;
         stringTokenize(line, " ", tokens);
         if (tokens.size() >= 2) {
             // Reject lines with 0 or 1 tokens.
             std::string key(tokens[0]);
             std::string value(tokens[1]);
             for (size_t i(2); i < tokens.size(); i++) {
                 value += " " + tokens[i];
             }
             if (has_key(key)) {
                 m_data[key].push_back(PhosimPar(value));
             } else {
                 std::vector<PhosimPar> values;
                 values.push_back(PhosimPar(value));
                 m_data.insert(std::make_pair(tokens[0], values));
             }
         }
     }
 }
 void PhosimParser::getNameMatrix(const std::string & key,
                                  std::vector<std::vector<double> > & names) const {
     const std::vector<PhosimPar> & values(getVector(key));
     for (size_t i(0); i < values.size(); i++) {
         std::vector<std::string> tokens;
         stringTokenize(values[i], " ", tokens);
         names[std::atoi(tokens[0].c_str())][std::atoi(tokens[1].c_str())] = atof(tokens[2].c_str());
     }
 }
    void PhosimParser::readStream(std::istream& inStream) 
    // ***********************************************************************
    // This method is just like read_stream except it ignores comments
    // ***********************************************************************
    // Each line is parsed as a key-value pair assuming space-delimited
    // fields.  The first field is the key and the remaining fields are
    // re-concatenated as a single string.
    // ************************************************************************
    // Note we also have ignore comments (anything beyond a # sign , may be 
    // whole lines
    {
        // ***********************************************************************
        // Each line is parsed as a key-value pair assuming white space delimited
        // fields.  (This allows for spaces or tab to seperate values. Both are used
        // in lsst files.) 
        // For Commands: the first field is the key and the remaining fields are
        // re-concatenated as a single string.
        // ***********************************************************************
        std::string line;

        while (std::getline(inStream, line, '\n')) {
            // ********************************************************************
            // seach the line for a # sign It deniotes all following chars are a 
            // comment. Delete the # and all followin chars from the string. 
            // *********************************************************************
            std::vector<std::string> tokens;
            std::string value;

            bool goodLine=stringTokenize(line,tokens);
            if(!goodLine){
                continue;
            }

            int numTokens=tokens.size();

            size_t j=1;
            std::string key=tokens[0];     //Normal single key, single value ocmmand
            if (numTokens > 1) {
                value=tokens[j];
            } else {
                value=tokens[0];
            }
            j++;
            for (size_t i=j; i <(size_t) numTokens; i++) {
                value += " " + tokens[i];
            }
            if (has_key(key)) {
                m_data[key].push_back(PhosimPar(value));
            } else {
                std::vector<PhosimPar> values;
                values.push_back(PhosimPar(value));
                m_data.insert(std::make_pair(key, values));
            }
        }
        return;
    }
 void PhosimParser::getNameVector(const std::string & key,
                                  std::vector<int> & names) const {
     const std::vector<PhosimPar> & values(getVector(key));
     if (names.size() < values.size()) names.resize(values.size());
     for (size_t i(0); i < values.size(); i++) {
         std::vector<std::string> tokens;
         stringTokenize(values[i], " ", tokens);
         names[std::atoi(tokens[0].c_str())] = atoi(tokens[1].c_str());
     }
 }
 int PhosimParser::getNameListSize(const std::string & key,
                                   int index1, int index2) {
     const std::vector<PhosimPar> & values(getVector(key));
     int num=0;
     for (size_t i(0); i < values.size(); i++) {
         std::vector<std::string> tokens;
         stringTokenize(values[i], " ", tokens);
         if (std::atoi(tokens[0].c_str()) == index1 && std::atoi(tokens[1].c_str()) == index2) num++;
     }
     return num;
 }
Exemple #10
0
std::vector<std::string> extractCovariate(const std::vector<std::string>& v) {
  std::vector<std::string> fd;
  std::vector<std::string> ret;
  for (size_t i = 0; i != v.size(); ++i) {
    stringTokenize(v[i], ",+", &fd);
    for (size_t j = 0; j != fd.size(); ++j) {
      if (fd[j] != "1") {
        ret.push_back(fd[j]);
      }
    }
  }
  dedup(&ret);
  return ret;
}
Exemple #11
0
/**
 * input range such as:
 * 1:100-200,3:200-300
 * X:150
 * MT
 */
void RangeList::addRangeList(const char* argRangeList) {
  if (!strlen(argRangeList)) return;

  std::string rangeList = argRangeList;
  std::vector<std::string> col;
  //col.AddTokens(arg, ',');
  stringTokenize(rangeList, ',', &col);
  for (size_t i = 0; i < col.size(); i++){
    std::string c;
    unsigned int b,e;
    if (!parseRangeFormat(col[i], &c, &b, &e)) {
      this->rangeCollection.addRange(c, b, e);
    } else {
      Rprintf("This range does not conform 1:100-200 format -- skip %s\n", col[i].c_str());
    }
  }
}
Exemple #12
0
int ModelManager::create(const std::string& type,
                         const std::string& modelList) {
  if (modelList.empty()) {
    return 0;
  }

  std::string modelName;
  std::vector<std::string> modelParams;
  std::vector<std::string> argModelName;
  ModelParser parser;

  stringTokenize(modelList, ",", &argModelName);
  for (size_t i = 0; i < argModelName.size(); i++) {
    // TODO: check parse results
    parser.parse(argModelName[i]);
    create(type, parser);
  }
  return 0;
}
Exemple #13
0
int _loadCovariate(const std::string& fn,
                   const std::vector<std::string>& includedSample,
                   const std::string& covNameToUse,
                   DataLoader::HandleMissingCov handleMissingCov,
                   SimpleMatrix* covariate, std::vector<std::string>* colNames,
                   std::set<std::string>* sampleToDrop) {
  std::vector<std::string> fd;
  if (covNameToUse.size()) {
    stringTokenize(covNameToUse, ',', &fd);
  }
  if (!isUnique(fd)) {
    logger->error("Remove duplicated covariates in the model before continue");
    return -1;
  }
  if (!isUnique(includedSample)) {
    logger->error("Unable to include duplicated samples");
    return -1;
  }
  return _loadCovariate(fn, includedSample, fd, handleMissingCov, covariate,
                        colNames, sampleToDrop);
}
    void simplePreprocess(
        vector<string> &allFileNames,
        const std::string &fileName,
        const vector<string> &input,
        vector<string> &output,
        const vector<string> *inputConstants,
        ostream *errorStream,
        int recursionLevel) {

        if(!errorStream) errorStream = &cout;

        if(recursionLevel > 20) {
            (*errorStream) << "Too many levels of #include recursion in shader: " << fileName << endl;
            return;
        }

        unsigned int lineNumber = 0;

        // Add header
        if(inputConstants) {
            for(unsigned int i = 0; i < (*inputConstants).size(); i++) {
                output.push_back(string("#define ") + (*inputConstants)[i]);
            }
        }

        // Correct the line number.
        output.push_back(
            getFileLineNumber(
                allFileNames, fileName, 0));

        while(lineNumber < input.size()) {

            int c = 0;

            // Read to the first non-whitespace thing in a line.
            while(input[lineNumber][c] && isWhiteSpace(input[lineNumber][c])) {
                c++;
            }

            // TODO: Extend this to understand "#pragma once" or at
            // the very least #ifdefs so we can make #include guards.

            if(input[lineNumber][c] == '#') {

                // Skip the '#'
                c++;

                // Tokenize it.
                std::vector<std::string> preProcTokens;
                stringTokenize(input[lineNumber].c_str() + c, " ", preProcTokens, false);

                if(preProcTokens.size()) {

                    if(preProcTokens[0] == "include") {

                        if(preProcTokens.size() > 1) {

                            string includeFileName = preProcTokens[1];

                            // Knock off quotes or brackets around the
                            // name. They don't really matter here.
                            if(includeFileName[0] == '"' || includeFileName[0] == '<') {

                                // Chop off the end.
                                includeFileName[includeFileName.size() - 1] = 0;

                                // Chop off the start.
                                includeFileName = includeFileName.c_str() + 1;

                                // Note, if this code changes, be
                                // aware that C++ strings will happily
                                // store the \0 in the string, and
                                // screw everything up later.
                            }

                            // Load the included file.
                            char *code = NULL;
                            int codeLength = 0;
                            code = FileSystem::loadFile(includeFileName, &codeLength, true);

                            if(code) {

                                // Process another file's contents
                                // straight into our own output list.
                                vector<string> inputLines;
                                stringTokenize(code, "\n", inputLines, true);
                                simplePreprocess(
                                    allFileNames,
                                    includeFileName,
                                    inputLines,
                                    output, NULL,
                                    errorStream,
                                    recursionLevel + 1);

                                // Get back to the correct line
                                // number.

                                // // FIXME: Apparently this doesn't work
                                // // with shaders on Intel's
                                // // drivers. Works on nVidia, need to
                                // // test ATI.
                                // ostringstream str;
                                // str << "#line " << lineNumber;
                                // output.push_back(str.str());

                                output.push_back(
                                    getFileLineNumber(
                                        allFileNames, fileName, lineNumber));

                            } else {
                                // Error: Bad #include
                                (*errorStream) << "Couldn't open " << includeFileName.c_str() << "." << endl;
                            }

                            delete[] code;

                        } else {
                            // Error: Bad #include
                        }

                    } else {
                        // If it's a directive we don't recognize
                        // here, it's probably something that GLSL
                        // already handles, so just pass it through.
                        output.push_back(input[lineNumber]);
                    }

                } else {
                    // Error: Had just a '#' on the line.
                }

            } else {

                // Normal line of code.
                output.push_back(input[lineNumber]);

            }

            lineNumber++;
        }
    }
/**
 * Create single chromosome index file
 * the file content is a 2-column matrix of int64_t type
 * line1:  num_sample  num_marker
 * line2:  0           bgzf_offset_for_#CHROM_line
 * line3:  var_1_pos   bgzf_offset_for_var_1
 * ...
 */
int SingleChromosomeBCFIndex::createIndex() {
  // const char* fn = bcfFile_.c_str();
  BGZF* fp = fBcfFile_;  // bgzf_open(fn, "rb");
  bgzf_seek(fp, 0, SEEK_SET);

  // check magic number
  char magic[5];
  if (5 != bgzf_read(fp, magic, 5)) {
    return -1; // exit(1);
  }
  if (!(magic[0] == 'B' && magic[1] == 'C' && magic[2] == 'F' &&
        magic[3] == 2 && (magic[4] == 1 || magic[4] == 2))) {
    return -1; // exit(1);
  }

  // read header
  uint32_t l_text;
  if (4 != bgzf_read(fp, &l_text, 4)) {
    return -1; // exit(1);
  }
  Rprintf("l_text = %d\n", l_text);

  std::string s;
  int64_t bgzf_offset_before_header = bgzf_tell(fp); // the beginning of header block
  s.resize(l_text);
  if (bgzf_read(fp, (void*)s.data(), l_text) != l_text) {
    REprintf( "Read failed!\n");
  }
  BCFHeader bcfHeader;
  if (bcfHeader.parseHeader(s,
                  &bcfHeader.header_contig_id,
                  &bcfHeader.header_id,
                  &bcfHeader.header_number,
                  &bcfHeader.header_type,
                  &bcfHeader.header_description)) {
    REprintf( "Parse header failed!\n");
    return -1; // exit(1);
  }

  // locate #CHROM line
  int64_t bgzf_offset_after_header = bgzf_tell(fp); // the end of header block
  size_t ptr_chrom_line = s.find("#CHROM"); // the index of "#CHROM", also the size between beginning of header to '#CHROM'
  if (ptr_chrom_line == std::string::npos) {
    REprintf( "Cannot find the \"#CHROM\" line!\n");
    return -1; // exit(1);
  }
  Rprintf("offset_header = %d\n", (int) ptr_chrom_line);

  bgzf_seek(fp, bgzf_offset_before_header, SEEK_SET); // rewind fp to the beginning of header
  s.resize(ptr_chrom_line);
  int64_t before_chrom_size = bgzf_read(fp, (void*) s.data(), ptr_chrom_line);
  int64_t bgzf_offset_before_chrom = bgzf_tell(fp); // the offset to #CHROM
  s.resize(l_text - before_chrom_size);
  int64_t after_chrom_size = bgzf_read(fp, (void*) s.data(), l_text - before_chrom_size);
  // load sample names
  while (s.back() == '\n' || s.back() == '\0') {
    s.resize(s.size() - 1);
  }
  stringTokenize(s, "\t", &bcfHeader.sample_names);
  const int64_t num_sample = (int)bcfHeader.sample_names.size() - 9; // vcf header has 9 columns CHROM...FORMAT before actual sample names
  Rprintf("sample size = %ld\n", num_sample);
  Rprintf("last character is s[after_chrom_size-1] = %d\n", s[after_chrom_size - 1]); // should be 0, the null terminator character
  // quality check
  if (bgzf_offset_after_header != bgzf_tell(fp)) {
    REprintf( "Messed up bgzf header\n");
    return -1; // exit(1);
  }

  // create index file
  FILE* fIndex = fopen(indexFile_.c_str(), "wb");
  int64_t num_marker = 0;
  int64_t pos = 0;
  fwrite(&num_sample, sizeof(int64_t), 1, fIndex);
  fwrite(&num_marker, sizeof(int64_t), 1, fIndex);
  fwrite(&pos, sizeof(int64_t), 1, fIndex);
  fwrite(&bgzf_offset_before_chrom, sizeof(int64_t), 1, fIndex);

  uint32_t l_shared;
  uint32_t l_indiv;
  std::vector<char> data;
  int64_t offset;
  do {
    offset = bgzf_tell(fp);
    if (4 != bgzf_read(fp, &l_shared, sizeof(uint32_t))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    if (4 != bgzf_read(fp, &l_indiv, sizeof(uint32_t))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    data.resize(l_shared + l_indiv);
    if (l_shared + l_indiv != bgzf_read(fp, data.data(), (l_shared+l_indiv) * sizeof(char))) {
      break; // REprintf( "Wrong read!\n"); exit(1);
    }
    memcpy(&pos, data.data() + 4, 4);
    fwrite(&pos, sizeof(int64_t), 1, fIndex);
    fwrite(&offset, sizeof(int64_t), 1, fIndex);

    num_marker++;
    if (num_marker % 10000 == 0) {
      Rprintf("\rprocessed %ld markers", num_marker);
    }
  } while (true);

  if (fseek(fIndex, 0, SEEK_SET)) {
    REprintf( "fseek failed\n!");
  }
  fwrite(&num_sample, sizeof(int64_t), 1, fIndex);
  fwrite(&num_marker, sizeof(int64_t), 1, fIndex);
  fclose(fIndex);
  Rprintf("Indexing finished with %ld samples and %ld markers\n", num_sample, num_marker);

  return 0;
}
    void PhosimParser::readCommandStream(std::istream& inStream, int& numExtra) 
    // ***********************************************************************
    // This version parses a stream that may have 2 types of commands:
    // A. Commands with a command verb and a single value
    // B. Commands that are "control" commands that have body and zernike
    //    specs. These have multiple values. We use a concatination of the first
    //    (Ex: M1 or L1 or F1E )with the tag (Ex: psi or theta or zdis or z11 ) 
    //    elements on the line (with a space between) as the string "key" in
    //    the m_data map.
    // ************************************************************************
    // Note we also have ignore comments (anything beyond a # sign , may be 
    // whole lines
    {
        // ***********************************************************************
        // Each line is parsed as a key-value pair assuming white space delimited
        // fields.  (This allows for spaces or tab to seperate values. Both are used
        // in lsst files.) 
        // For Commands: the first field is the key and the remaining fields are
        // re-concatenated as a single string.
        // For control statements: The first 2 are tags, concatinated with a 
        // seperating space as the key and all the remaining fields are
        // re-concatenated as a single string.
        // ***********************************************************************
        std::string line;
 
        numExtra=0;
        while (std::getline(inStream, line, '\n')) {
            // ********************************************************************
            // seach the line for a # sign It denotes all following chars are a 
            // comment. Delete the # and all following chars from the string. 
            // *********************************************************************
            std::vector<std::string> tokens;

            bool goodLine=stringTokenize(line,tokens);
            if(!goodLine){
                continue;
            }

            int numTokens=tokens.size();
    
            size_t j=1;
            std::string key=tokens[0];     //Normal single key, single value ocmmand
            if ( numTokens > 2 ){      //If a "control" comand make the key a
                key=key+" "+tokens[1];   //contatination(seperated by a space) of the
                j=2;                     //first 2 strings. Rest re-concatenated as a 
                numExtra++;
            }                          //single string (space delimited).
    
            std::string value=tokens[j];
            j++;
            for (size_t i=j; i <(size_t) numTokens; i++) {
                value += " " + tokens[i];
            }
            if (has_key(key)) {
                m_data[key].push_back(PhosimPar(value));
            } else {
                std::vector<PhosimPar> values;
                values.push_back(PhosimPar(value));
                m_data.insert(std::make_pair(key, values));
            }
        }
        return;
    }
int main(int argc, char** argv) {
  time_t currentTime = time(0);
  fprintf(stderr, "Analysis started at: %s", ctime(&currentTime));

  PARSE_PARAMETER(argc, argv);
  PARAMETER_STATUS();

  if (FLAG_REMAIN_ARG.size() > 0) {
    fprintf(stderr, "Unparsed arguments: ");
    for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++) {
      fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str());
    }
    fprintf(stderr, "\n");
    abort();
  }

  REQUIRE_STRING_PARAMETER(FLAG_inVcf,
                           "Please provide input file using: --inVcf");

  const char defaultDbSnp[] =
      "/net/fantasia/home/zhanxw/amd/data/umake-resources/dbSNP/"
      "dbsnp_129_b37.rod.map";
  if (FLAG_snp == "") {
    FLAG_snp = defaultDbSnp;
    fprintf(stderr, "Use default dbsnp: [ %s ]\n", defaultDbSnp);
  }
  SiteSet snpSet;
  snpSet.loadRodFile(FLAG_snp);
  fprintf(stderr, "%zu dbSNP sites loaded.\n", snpSet.getTotalSite());

  const char defaultHM3[] =
      "/net/fantasia/home/zhanxw/amd/data/umake-resources/HapMap3/"
      "hapmap3_r3_b37_fwd.consensus.qc.poly.bim";
  if (FLAG_hapmap == "") {
    FLAG_hapmap = defaultHM3;
    fprintf(stderr, "Use default HapMap: [ %s ]\n", defaultHM3);
  }
  SiteSet hmSet;
  hmSet.loadBimFile(FLAG_hapmap);
  fprintf(stderr, "%zu Hapmap sites loaded.\n", hmSet.getTotalSite());

  const char* fn = FLAG_inVcf.c_str();
  LineReader lr(fn);

  // // set range filters here
  // // e.g.
  // // vin.setRangeList("1:69500-69600");
  // vin.setRangeList(FLAG_rangeList.c_str());
  // vin.setRangeFile(FLAG_rangeFile.c_str());

  std::map<std::string, Variant> freq;
  std::string chrom;
  int pos;
  std::string filt;
  char ref, alt;
  bool inDbSnp;
  bool inHapmap;
  int lineNo = 0;
  std::vector<std::string> fd;
  while (lr.readLineBySep(&fd, " \t")) {
    lineNo++;
    if (fd[0][0] == '#') continue;  // skip header
    chrom = fd[0];                  // ref is on column 0 (0-based)
    pos = atoi(fd[1]);              // ref is on column 1 (0-based)
    ref = fd[3][0];                 // ref is on column 3 (0-based)
    alt = fd[4][0];                 // ref is on column 4 (0-based)
    filt = fd[6];                   // filt is on column 6 (0-based)
    inDbSnp = snpSet.isIncluded(chrom.c_str(), pos);
    inHapmap = hmSet.isIncluded(chrom.c_str(), pos);

    Variant& v = freq[filt];
    v.total++;
    if (isTs(ref, alt)) {
      v.ts++;
      if (inDbSnp) {
        v.tsInDbSnp++;
        v.dbSnp++;
      }
    } else if (isTv(ref, alt)) {
      v.tv++;
      if (inDbSnp) {
        v.tvInDbSnp++;
        v.dbSnp++;
      }
    };
    if (inHapmap) v.hapmap++;
  };
  fprintf(stdout, "Total %d VCF records have converted successfully\n", lineNo);

  //////////////////////////////////////////////////////////////////////
  std::string title = "Summarize per combined filter";
  int pad = (170 - title.size()) / 2;
  std::string outTitle = std::string(pad, '-') + title + std::string(pad, '-');
  puts(outTitle.c_str());
  printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter",
         "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall",
         "%TotalHM3", "%HMCalled");
  std::map<std::string, Variant> indvFreq;
  Variant pass;
  Variant fail;
  Variant total;
  std::vector<std::string> filters;  // individual filter
  for (std::map<std::string, Variant>::iterator i = freq.begin();
       i != freq.end(); ++i) {
    const std::string& filt = i->first;
    const Variant& v = i->second;
    v.print(filt, hmSet);

    // calculate indvFreq, pass, fail and total
    stringTokenize(filt, ';', &filters);
    for (unsigned int j = 0; j < filters.size(); j++) {
      const std::string& filt = filters[j];
      indvFreq[filt] += v;
    }
    if (filt == "PASS")
      pass += v;
    else
      fail += v;
    total += v;
  };
  //////////////////////////////////////////////////////////////////////
  title = "Summarize per individual filter";
  pad = (170 - title.size()) / 2;
  outTitle = std::string(pad, '-') + title + std::string(pad, '-');
  puts(outTitle.c_str());
  printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter",
         "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall",
         "%TotalHM3", "%HMCalled");
  for (std::map<std::string, Variant>::iterator i = indvFreq.begin();
       i != indvFreq.end(); ++i) {
    const std::string& filt = i->first;
    const Variant& v = i->second;
    v.print(filt, hmSet);
  }
  //////////////////////////////////////////////////////////////////////
  title = "Summarize per pass/fail filter";
  pad = (170 - title.size()) / 2;
  outTitle = std::string(pad, '-') + title + std::string(pad, '-');
  puts(outTitle.c_str());
  printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter",
         "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall",
         "%TotalHM3", "%HMCalled");

  pass.print("PASS", hmSet);
  fail.print("FAIL", hmSet);
  total.print("TOTAL", hmSet);

  currentTime = time(0);
  fprintf(stderr, "Analysis end at: %s", ctime(&currentTime));
  return 0;
};
Exemple #18
0
/*
 * Load operating configurations from a file
 */
void loadConfigurations(const std::string& fileName, std::map<std::string, std::vector<std::vector<double> > >& configurations, int& init_num_of_cores,
        int& min_num_of_cores) {
    //load configurations info from file
    int lineNumber = 0, i;
    std::string line;
    std::vector<std::string> tokens, deads;
    std::string config;
    std::vector<double> dagings;
    bool initNotRead = true;

    std::ifstream confFile(fileName.c_str());
    if (confFile.is_open()) {
        getline(confFile, line);
        lineNumber++;
        while (!confFile.eof()) {
            if (line != "") {
                //read current configuration
                stringTokenize(line, tokens, " ");
                config = tokens[0];
                if (tokens.size() < 2) {
                    std::cerr << "Line " << lineNumber << " - Missing data" << std::endl;
                    exit(1);
                }
                for (i = 1; i < tokens.size(); i++) {
                    dagings.push_back(atof(tokens[i].c_str()));
                }
                //REMOVED TO SUPPORT MULTIPLE PERIODIC CONFIGURATIONS
                //if (configurations.count(config) != 0) {
                //  std::cerr << "Line " << lineNumber << " - Configuration already specified: " << config << std::endl;
                //  exit(1);
                //}
                configurations[config].push_back(dagings);
                dagings.clear();

                //update min and max number of cores
                if (config == EMPTY_SET) {
                    init_num_of_cores = tokens.size() - 1; //-1 since we have to discard the configuration string
                    initNotRead = false;
                    min_num_of_cores = init_num_of_cores;
                } else {
                    if (initNotRead) {
                        std::cerr << "Line " << lineNumber << " - The first line of the file must be the initial situation" << std::endl;
                        exit(1);
                    }
                    stringTokenize(config, deads, ",");
                    //check of the current configuration
                    std::set<int> checksum;
                    for (int j = 0; j < deads.size(); j++) {
                        int currCore = atoi(deads[j].c_str());
                        if (currCore >= init_num_of_cores || currCore < 0) {
                            std::cerr << "Line " << lineNumber << " - Invalid core number: " << config << std::endl;
                            exit(1);
                        }
                        if (checksum.find(currCore) != checksum.end()) {
                            std::cerr << "Line " << lineNumber << " - Not valid sequence of deads: " << config << std::endl;
                            exit(1);
                        }
                        checksum.insert(currCore);
                    }
                    if (init_num_of_cores - deads.size() < min_num_of_cores)
                        min_num_of_cores = init_num_of_cores - deads.size();
                }
            }
            getline(confFile, line);
            lineNumber++;
        }
        confFile.close();
    } else {
        std::cerr << "Configuration file not found: " << fileName << std::endl;
        exit(1);
    }
}
Exemple #19
0
int main(int argc, char *argv[])
{
  {
    std::string s = "a b\"MID\" c d";
    std::vector<std::string> result;
    unsigned int ret = stringTokenize(s, ' ', &result);
    assert(ret == 4);    
    assert(result.size() == 4);
    assert(result[0] == "a");
    assert(result[1] == "b\"MID\"");
    assert(result[2] == "c");
    assert(result[3] == "d");

    ret = stringTokenize(s, "\" ", &result);
    assert(result.size() == 6);
    assert(result[0] == "a");
    assert(result[1] == "b");
    assert(result[2] == "MID");
    assert(result[3] == "");
    assert(result[4] == "c");
    assert(result[5] == "d");

    s = "";
    ret = stringTokenize(s, " ", &result);
    assert(result.size() == 1);
    assert(result[0] == "");
  }
  {
    std::string s = "a b\"MID\" c d";
    std::string piece;
    std::vector <std::string> result;
    StringTokenizer st1(s, ' ');
    while (st1.next(&piece)) {
      //printf("piece = %s\n", piece.c_str());
      result.push_back(piece);
    }
    assert(result.size() == 4);
    assert(result[0] == "a");
    assert(result[1] == "b\"MID\"");
    assert(result[2] == "c");
    assert(result[3] == "d");

    result.clear();
    StringTokenizer st2(s, "\" ");
    while (st2.next(&piece)) {
      printf("piece = %s\n", piece.c_str());
      result.push_back(piece);
    }
    assert(result.size() == 6);
    assert(result[0] == "a");
    assert(result[1] == "b");
    assert(result[2] == "MID");
    assert(result[3] == "");
    assert(result[4] == "c");
    assert(result[5] == "d");

    result.clear();
    s = "";
    StringTokenizer st3(s, " ");
    while (st3.next(&piece)) {
      result.push_back(piece);
    }
    assert(result.size() == 1);
    assert(result[0] == "");
  }  
  {
      std::string s = "";
      std::string res = stringStrip(s);
      assert(res.size() == 0);

      s = "  ";
      res = stringStrip(s);
      assert(res.size() == 0);
  }

  return 0;
}