void Player::GenerateResearchSiteInMap(uint32 mapId) { SiteSet tempSites; for (ResearchSiteDataMap::const_iterator itr = sResearchSiteDataMap.begin(); itr != sResearchSiteDataMap.end(); ++itr) { ResearchSiteEntry const* entry = itr->second.entry; if (!HasResearchSite(entry->ID) && entry->mapId == mapId && CanResearchWithLevel(entry->ID) && CanResearchWithSkillLevel(entry->ID)) tempSites.insert(entry->ID); } if (tempSites.empty()) return; SiteSet::const_iterator entry = tempSites.begin(); std::advance(entry, urand(0, tempSites.size() - 1)); _researchSites.insert((*entry)); _archaeologyChanged = true; ShowResearchSites(); }
void print(const SiteSet& hapmapSites) const { printf("%10d\t%10d\t%10.2f", total, dbSnp, 100.0 * dbSnp / total); if (tvInDbSnp) { printf("\t%10.2f", 1.0 * tsInDbSnp / tvInDbSnp); } else { printf("\t%10s", "Inf"); } if (tv - tvInDbSnp) { printf("\t%10.2f", 1.0 * (ts - tsInDbSnp) / (tv - tvInDbSnp)); } else { printf("\t%10s", "Inf"); } if (tv) { printf("\t%10.2f", 1.0 * ts / tv); } else { printf("\t%10s", "Inf"); } printf("\t%10.2f", 100.0 * hapmap / hapmapSites.getTotalSite()); printf("\t%10.2f", 100.0 * hapmap / total); putchar('\n'); };
int main(int argc, char** argv){ time_t currentTime = time(0); fprintf(stderr, "Analysis started at: %s", ctime(¤tTime)); //////////////////////////////////////////////// BEGIN_PARAMETER_LIST(pl) ADD_PARAMETER_GROUP(pl, "Input/Output") ADD_STRING_PARAMETER(pl, inVcf, "--inVcf", "input VCF File") ADD_STRING_PARAMETER(pl, outVcf, "--outVcf", "output VCF File") ADD_PARAMETER_GROUP(pl, "Site Filter") ADD_STRING_PARAMETER(pl, site, "--site", "input site file (.rod file: 0-based position)") ADD_BOOL_PARAMETER(pl, inverse, "--inverse", "Inverse site") ADD_STRING_PARAMETER(pl, rangeList, "--rangeList", "Specify some ranges to use, please use chr:begin-end format.") ADD_STRING_PARAMETER(pl, rangeFile, "--rangeFile", "Specify the file containing ranges, please use chr:begin-end format.") ADD_BOOL_PARAMETER(pl, snpOnly, "--snpOnly", "Specify only extract SNP site") END_PARAMETER_LIST(pl) ; pl.Read(argc, argv); pl.Status(); if (FLAG_REMAIN_ARG.size() > 0){ fprintf(stderr, "Unparsed arguments: "); for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++){ fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str()); } fprintf(stderr, "\n"); abort(); } REQUIRE_STRING_PARAMETER(FLAG_inVcf, "Please provide input file using: --inVcf"); REQUIRE_STRING_PARAMETER(FLAG_outVcf, "Please provide output file using: --outVcf"); const char defaultDbSnp[] = "/net/fantasia/home/zhanxw/amd/data/umake-resources/dbSNP/dbsnp_129_b37.rod.map"; if (FLAG_site == "") { FLAG_site = defaultDbSnp; fprintf(stderr, "Use default dbsnp: [ %s ]\n", defaultDbSnp); } SiteSet snpSet; snpSet.loadRodFile(FLAG_site); fprintf(stderr, "%zu dbSNP sites loaded.\n", snpSet.getTotalSite()); const char* fn = FLAG_inVcf.c_str(); VCFInputFile vin(fn); VCFOutputFile* vout = NULL; // PlinkOutputFile* pout = NULL; if (FLAG_outVcf.size() > 0) { vout = new VCFOutputFile(FLAG_outVcf.c_str()); }; if (vout) vout->writeHeader(vin.getVCFHeader()); // set range filters here // e.g. // vin.setRangeList("1:69500-69600"); vin.setRangeList(FLAG_rangeList.c_str()); vin.setRangeFile(FLAG_rangeFile.c_str()); std::string filt; /// char ref, alt; bool keep; int lineNo = 0; int lineOut = 0; while (vin.readRecord()){ lineNo ++; VCFRecord& r = vin.getVCFRecord(); keep = snpSet.isIncluded(r.getChrom(), r.getPos()); if (FLAG_inverse) { keep = !keep; } if (!keep) continue; if (FLAG_snpOnly) { if ( strlen(r.getRef()) != 1) continue; if ( strlen(r.getAlt()) != 1) continue; if ( r.getAlt()[0] == '.') continue; //deletion e.g. A -> . } if (vout) vout->writeRecord(& r); lineOut ++; }; delete vout; fprintf(stdout, "Total %d VCF records have converted successfully\n", lineNo); fprintf(stdout, "Total %d VCF records have outputted successfully\n", lineOut); currentTime = time(0); fprintf(stderr, "Analysis end at: %s", ctime(¤tTime)); return 0; };
int main(int argc, char** argv) { time_t currentTime = time(0); fprintf(stderr, "Analysis started at: %s", ctime(¤tTime)); PARSE_PARAMETER(argc, argv); PARAMETER_STATUS(); if (FLAG_REMAIN_ARG.size() > 0) { fprintf(stderr, "Unparsed arguments: "); for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++) { fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str()); } fprintf(stderr, "\n"); abort(); } REQUIRE_STRING_PARAMETER(FLAG_inVcf, "Please provide input file using: --inVcf"); const char defaultDbSnp[] = "/net/fantasia/home/zhanxw/amd/data/umake-resources/dbSNP/" "dbsnp_129_b37.rod.map"; if (FLAG_snp == "") { FLAG_snp = defaultDbSnp; fprintf(stderr, "Use default dbsnp: [ %s ]\n", defaultDbSnp); } SiteSet snpSet; snpSet.loadRodFile(FLAG_snp); fprintf(stderr, "%zu dbSNP sites loaded.\n", snpSet.getTotalSite()); const char defaultHM3[] = "/net/fantasia/home/zhanxw/amd/data/umake-resources/HapMap3/" "hapmap3_r3_b37_fwd.consensus.qc.poly.bim"; if (FLAG_hapmap == "") { FLAG_hapmap = defaultHM3; fprintf(stderr, "Use default HapMap: [ %s ]\n", defaultHM3); } SiteSet hmSet; hmSet.loadBimFile(FLAG_hapmap); fprintf(stderr, "%zu Hapmap sites loaded.\n", hmSet.getTotalSite()); const char* fn = FLAG_inVcf.c_str(); LineReader lr(fn); // // set range filters here // // e.g. // // vin.setRangeList("1:69500-69600"); // vin.setRangeList(FLAG_rangeList.c_str()); // vin.setRangeFile(FLAG_rangeFile.c_str()); std::map<std::string, Variant> freq; std::string chrom; int pos; std::string filt; char ref, alt; bool inDbSnp; bool inHapmap; int lineNo = 0; std::vector<std::string> fd; while (lr.readLineBySep(&fd, " \t")) { lineNo++; if (fd[0][0] == '#') continue; // skip header chrom = fd[0]; // ref is on column 0 (0-based) pos = atoi(fd[1]); // ref is on column 1 (0-based) ref = fd[3][0]; // ref is on column 3 (0-based) alt = fd[4][0]; // ref is on column 4 (0-based) filt = fd[6]; // filt is on column 6 (0-based) inDbSnp = snpSet.isIncluded(chrom.c_str(), pos); inHapmap = hmSet.isIncluded(chrom.c_str(), pos); Variant& v = freq[filt]; v.total++; if (isTs(ref, alt)) { v.ts++; if (inDbSnp) { v.tsInDbSnp++; v.dbSnp++; } } else if (isTv(ref, alt)) { v.tv++; if (inDbSnp) { v.tvInDbSnp++; v.dbSnp++; } }; if (inHapmap) v.hapmap++; }; fprintf(stdout, "Total %d VCF records have converted successfully\n", lineNo); ////////////////////////////////////////////////////////////////////// std::string title = "Summarize per combined filter"; int pad = (170 - title.size()) / 2; std::string outTitle = std::string(pad, '-') + title + std::string(pad, '-'); puts(outTitle.c_str()); printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter", "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall", "%TotalHM3", "%HMCalled"); std::map<std::string, Variant> indvFreq; Variant pass; Variant fail; Variant total; std::vector<std::string> filters; // individual filter for (std::map<std::string, Variant>::iterator i = freq.begin(); i != freq.end(); ++i) { const std::string& filt = i->first; const Variant& v = i->second; v.print(filt, hmSet); // calculate indvFreq, pass, fail and total stringTokenize(filt, ';', &filters); for (unsigned int j = 0; j < filters.size(); j++) { const std::string& filt = filters[j]; indvFreq[filt] += v; } if (filt == "PASS") pass += v; else fail += v; total += v; }; ////////////////////////////////////////////////////////////////////// title = "Summarize per individual filter"; pad = (170 - title.size()) / 2; outTitle = std::string(pad, '-') + title + std::string(pad, '-'); puts(outTitle.c_str()); printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter", "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall", "%TotalHM3", "%HMCalled"); for (std::map<std::string, Variant>::iterator i = indvFreq.begin(); i != indvFreq.end(); ++i) { const std::string& filt = i->first; const Variant& v = i->second; v.print(filt, hmSet); } ////////////////////////////////////////////////////////////////////// title = "Summarize per pass/fail filter"; pad = (170 - title.size()) / 2; outTitle = std::string(pad, '-') + title + std::string(pad, '-'); puts(outTitle.c_str()); printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter", "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall", "%TotalHM3", "%HMCalled"); pass.print("PASS", hmSet); fail.print("FAIL", hmSet); total.print("TOTAL", hmSet); currentTime = time(0); fprintf(stderr, "Analysis end at: %s", ctime(¤tTime)); return 0; };
int main(int argc, char** argv){ time_t currentTime = time(0); fprintf(stderr, "Analysis started at: %s", ctime(¤tTime)); //////////////////////////////////////////////// BEGIN_PARAMETER_LIST(pl) ADD_PARAMETER_GROUP(pl, "Input/Output") ADD_STRING_PARAMETER(pl, inVcf, "--inVcf", "input VCF File") ADD_STRING_PARAMETER(pl, snp, "--snp", "input dbSNP File (.rod)") ADD_STRING_PARAMETER(pl, hapmap, "--hapmap", "input HapMap File (.bim)") ADD_PARAMETER_GROUP(pl, "Site Filter") ADD_STRING_PARAMETER(pl, rangeList, "--rangeList", "Specify some ranges to use, please use chr:begin-end format.") ADD_STRING_PARAMETER(pl, rangeFile, "--rangeFile", "Specify the file containing ranges, please use chr:begin-end format.") END_PARAMETER_LIST(pl) ; pl.Read(argc, argv); pl.Status(); if (FLAG_REMAIN_ARG.size() > 0){ fprintf(stderr, "Unparsed arguments: "); for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++){ fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str()); } fprintf(stderr, "\n"); abort(); } REQUIRE_STRING_PARAMETER(FLAG_inVcf, "Please provide input file using: --inVcf"); const char defaultDbSnp[] = "/net/fantasia/home/zhanxw/amd/data/umake-resources/dbSNP/dbsnp_129_b37.rod.map"; if (FLAG_snp == "") { FLAG_snp = defaultDbSnp; fprintf(stderr, "Use default dbsnp: [ %s ]\n", defaultDbSnp); } SiteSet snpSet; snpSet.loadRodFile(FLAG_snp); fprintf(stderr, "%zu dbSNP sites loaded.\n", snpSet.getTotalSite()); const char defaultHM3[] = "/net/fantasia/home/zhanxw/amd/data/umake-resources/HapMap3/hapmap3_r3_b37_fwd.consensus.qc.poly.bim"; if (FLAG_hapmap == "") { FLAG_hapmap = defaultHM3; fprintf(stderr, "Use default HapMap: [ %s ]\n", defaultHM3); } SiteSet hmSet; hmSet.loadBimFile(FLAG_hapmap); fprintf(stderr, "%zu Hapmap sites loaded.\n", hmSet.getTotalSite()); const char* fn = FLAG_inVcf.c_str(); LineReader lr(fn); // // set range filters here // // e.g. // // vin.setRangeList("1:69500-69600"); // vin.setRangeList(FLAG_rangeList.c_str()); // vin.setRangeFile(FLAG_rangeFile.c_str()); std::map<std::string, Variant> freq; std::string chrom; int pos; // std::string filt; std::string anno; char ref, alt; bool inDbSnp; bool inHapmap; int lineNo = 0; std::vector<std::string> fd; while(lr.readLineBySep(&fd, " \t")){ lineNo ++; if (fd[0][0] == '#') continue; // skip header chrom = fd[0]; // ref is on column 0 (0-based) pos = atoi(fd[1]); // ref is on column 1 (0-based) ref = fd[3][0]; // ref is on column 3 (0-based) alt = fd[4][0]; // ref is on column 4 (0-based) // filt = fd[6]; // filt is on column 6 (0-based) anno = extractAnno(fd[7]); // info is on column 7 (0-based), we will extract ANNO= inDbSnp = snpSet.isIncluded(chrom.c_str(), pos); inHapmap = hmSet.isIncluded(chrom.c_str(), pos); Variant& v = freq[anno]; v.total++; if ( isTs(ref, alt) ) { v.ts ++; if (inDbSnp) { v.tsInDbSnp ++; v.dbSnp ++; } } else if (isTv(ref, alt)) { v.tv ++; if (inDbSnp) { v.tvInDbSnp ++; v.dbSnp ++; } }; if (inHapmap) v.hapmap ++; if (lineNo % 10000 == 0) { fprintf(stderr, "\rProcessed %d lines...\r", lineNo); } }; fprintf(stdout, "Total %d VCF records have been read successfully\n", lineNo); ////////////////////////////////////////////////////////////////////// std::string title = "Summarize per annotation type"; int pad = (170 - title.size() ) /2 ; std::string outTitle = std::string(pad, '-') + title + std::string(pad, '-'); puts(outTitle.c_str()); printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter", "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall", "%TotalHM3", "%HMCalled"); std::map<std::string, Variant> indvFreq; Variant total; for (std::map<std::string, Variant>::iterator i = freq.begin() ; i != freq.end(); ++i ){ i->second.print(i->first, hmSet); total += i->second; }; total.print("TOTAL", hmSet); currentTime = time(0); fprintf(stderr, "Analysis end at: %s", ctime(¤tTime)); return 0; };
int main(int argc, char** argv) { time_t currentTime = time(0); fprintf(stderr, "Analysis started at: %s", ctime(¤tTime)); PARSE_PARAMETER(argc, argv); PARAMETER_STATUS(); if (FLAG_REMAIN_ARG.size() > 0) { fprintf(stderr, "Unparsed arguments: "); for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++) { fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str()); } fprintf(stderr, "\n"); abort(); } REQUIRE_STRING_PARAMETER(FLAG_inVcf, "Please provide input file using: --inVcf"); const char defaultDbSnp[] = "/net/fantasia/home/zhanxw/amd/data/umake-resources/dbSNP/" "dbsnp_129_b37.rod.map"; if (FLAG_snp == "") { FLAG_snp = defaultDbSnp; fprintf(stderr, "Use default dbsnp: [ %s ]\n", defaultDbSnp); } SiteSet snpSet; snpSet.loadRodFile(FLAG_snp); fprintf(stderr, "%zu dbSNP sites loaded.\n", snpSet.getTotalSite()); const char defaultHM3[] = "/net/fantasia/home/zhanxw/amd/data/umake-resources/HapMap3/" "hapmap3_r3_b37_fwd.consensus.qc.poly.bim"; if (FLAG_hapmap == "") { FLAG_hapmap = defaultHM3; fprintf(stderr, "Use default HapMap: [ %s ]\n", defaultHM3); } SiteSet hmSet; hmSet.loadBimFile(FLAG_hapmap); fprintf(stderr, "%zu Hapmap sites loaded.\n", hmSet.getTotalSite()); const char* fn = FLAG_inVcf.c_str(); LineReader lr(fn); // // set range filters here // // e.g. // // vin.setRangeList("1:69500-69600"); // vin.setRangeList(FLAG_rangeList.c_str()); // vin.setRangeFile(FLAG_rangeFile.c_str()); std::map<std::string, Variant> freq; std::string chrom; int pos; // std::string filt; // std::string anno; std::string numVariant; char ref, alt; bool inDbSnp; bool inHapmap; int lineNo = 0; std::vector<std::string> fd; while (lr.readLineBySep(&fd, " \t")) { lineNo++; if (fd[0][0] == '#') continue; // skip header chrom = fd[0]; // ref is on column 0 (0-based) pos = atoi(fd[1]); // ref is on column 1 (0-based) ref = fd[3][0]; // ref is on column 3 (0-based) alt = fd[4][0]; // ref is on column 4 (0-based) // filt = fd[6]; // filt is on column 6 (0-based) // anno = extractAnno(fd[7]); // info is on column 7 (0-based), we will // extract ANNO= // obtain number of variants if (fd.size() <= 9) { // first 9 columns are not individuals numVariant = toString(0); } else { int numVar = 0; for (size_t i = 9; i < fd.size(); ++i) { int varCount = countVariant(fd[i]); if (varCount > 0) numVar += varCount; } numVariant = toString(numVar); } inDbSnp = snpSet.isIncluded(chrom.c_str(), pos); inHapmap = hmSet.isIncluded(chrom.c_str(), pos); Variant& v = freq[numVariant]; v.total++; if (isTs(ref, alt)) { v.ts++; if (inDbSnp) { v.tsInDbSnp++; v.dbSnp++; } } else if (isTv(ref, alt)) { v.tv++; if (inDbSnp) { v.tvInDbSnp++; v.dbSnp++; } }; if (inHapmap) v.hapmap++; if (lineNo % 10000 == 0) { fprintf(stderr, "\rProcessed %d lines...\r", lineNo); } }; fprintf(stdout, "Total %d VCF records have been read successfully\n", lineNo); ////////////////////////////////////////////////////////////////////// std::string title = "Summarize per annotation type"; int pad = (170 - title.size()) / 2; std::string outTitle = std::string(pad, '-') + title + std::string(pad, '-'); puts(outTitle.c_str()); printf("%40s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "Filter", "#SNPs", "#dbSNP", "%dbSNP", "Known Ts/Tv", "Novel Ts/Tv", "Overall", "%TotalHM3", "%HMCalled"); std::map<std::string, Variant> indvFreq; Variant total; // to sort variants by its integer order, we use a temporary map std::map<int, Variant> tmp; for (std::map<std::string, Variant>::iterator i = freq.begin(); i != freq.end(); ++i) { tmp[atoi(i->first)] = i->second; }; for (std::map<int, Variant>::iterator i = tmp.begin(); i != tmp.end(); ++i) { i->second.print(toString(i->first), hmSet); total += i->second; }; total.print("TOTAL", hmSet); currentTime = time(0); fprintf(stderr, "Analysis end at: %s", ctime(¤tTime)); return 0; };