// specific to upgrade from VCF 3.3 to VCF 4.0 void VcfFile::upgradeMetaLines() { std::vector<int> toDelete; for(int i=0; i < asMetaKeys.Length(); ++i) { if ( asMetaKeys[i].Compare("fileformat") == 0 ) { if ( asMetaValues[i].Compare("VCFv3.3") == 0 ) { asMetaValues[i] = "VCFv4.0"; } } else if ( asMetaKeys[i].Compare("FILTER") == 0 ) { toDelete.push_back(i); } else if ( asMetaKeys[i].Compare("FORMAT") == 0 ) { StringArray tok; tok.ReplaceColumns(asMetaValues[i],','); asMetaValues[i].printf("<ID=%s,Number=%s,Type=%s,Description=%s>",tok[0].c_str(),tok[2].c_str(),tok[1].c_str(),tok[3].c_str()); } } for(int i=0; i < (int)toDelete.size(); ++i) { asMetaValues.Delete(toDelete[toDelete.size()-i-1]); asMetaKeys.Delete(toDelete[toDelete.size()-i-1]); } // add INFO and FILTER? }
bool FilterStat::appendStatVcf(const char* file) { VcfFile vcf; vcf.setSiteOnly(false); vcf.setParseValues(true); vcf.setParseGenotypes(false); vcf.setParseDosages(false); vcf.openForRead(file,1); VcfMarker* pMarker; StringArray tok; for( int i=0, j=0; vcf.iterateMarker(); ++i, ++j ) { pMarker = vcf.getLastMarker(); if ( sChrom.Compare(pMarker->sChrom) != 0 ) { Logger::gLogger->error("Chromosome name does not match - %s vs %s",sChrom.c_str(),pMarker->sChrom.c_str()); } while ( vPos[j] < pMarker->nPos ) { ++j; } if ( vPos[j] > pMarker->nPos ) { Logger::gLogger->error("Position %s:%d is not observed in the anchor VCF",sChrom.c_str(),pMarker->nPos); } std::vector<int> vAlleles; std::vector<int> vStrands; //fprintf(stderr,"%s:%d\t%s\n",pMarker->sChrom.c_str(),pMarker->nPos,pMarker->asFormatKeys[0].c_str()); for(int k=0; k < pMarker->asFormatKeys.Length(); ++k) { if ( pMarker->asFormatKeys[k].Compare("BASE") == 0 ) { tok.ReplaceColumns(pMarker->asSampleValues[k],','); for(int l=0; l < tok.Length(); ++l) { if ( tok[l].Compare(vAl1[j].c_str()) == 0 ) { vAlleles.push_back(0); } else if ( tok[l].Compare(vAl2[j].c_str()) == 0 ) { vAlleles.push_back(1); } else { vAlleles.push_back(2); } } } else if ( pMarker->asFormatKeys[k].Compare("STRAND") == 0 ) { tok.ReplaceColumns(pMarker->asSampleValues[k],','); for(int l=0; l < tok.Length(); ++l) { if ( tok[l].Compare("F") == 0 ) { vStrands.push_back(0); } else { vStrands.push_back(1); } } } } //fprintf(stderr,"%s:%d\t%d",pMarker->sChrom.c_str(),pMarker->nPos,(int)vAlleles.size()); //for(int k=0; k < (int) vAlleles.size(); ++k) { // fprintf(stderr,"\t%d",vAlleles[k]*2+vStrands[k]); //} //fprintf(stderr,"\n"); if ( vAlleles.size() != vStrands.size() ) { Logger::gLogger->error("Alleles and Strands do not match in size at %s:%d, in %s",pMarker->sChrom.c_str(), pMarker->nPos, file); } // updates the counts - needs synchronization { //boost::mutex::scoped_lock lock(mutex); for(int k=0; k < (int) vAlleles.size(); ++k) { ++(vCounts[FILTER_STAT_COUNTS*j + vAlleles[k]*2 + vStrands[k]]); } } } return true; }
int VcfMac::execute(int argc, char **argv) { String inputVcf = ""; int minAC = -1; String sampleSubset = ""; String filterList = ""; bool params = false; IntervalTree<int> regions; std::vector<int> intersection; // Read in the parameters. ParameterList inputParameters; BEGIN_LONG_PARAMETERS(longParameterList) LONG_PARAMETER_GROUP("Required Parameters") LONG_STRINGPARAMETER("in", &inputVcf) LONG_PARAMETER_GROUP("Optional Parameters") LONG_STRINGPARAMETER("sampleSubset", &sampleSubset) LONG_INTPARAMETER("minAC", &minAC) LONG_STRINGPARAMETER("filterList", &filterList) LONG_PARAMETER("params", ¶ms) LONG_PHONEHOME(VERSION) END_LONG_PARAMETERS(); inputParameters.Add(new LongParameters ("Input Parameters", longParameterList)); inputParameters.Read(argc-1, &(argv[1])); // Check that all files were specified. if(inputVcf == "") { usage(); inputParameters.Status(); std::cerr << "Missing \"--in\", a required parameter.\n\n"; return(-1); } if(params) { inputParameters.Status(); } // Open the two input files. VcfFileReader inFile; VcfHeader header; VcfRecord record; // Open the file if(sampleSubset.IsEmpty()) { inFile.open(inputVcf, header); } else { inFile.open(inputVcf, header, sampleSubset, NULL, NULL); } // Add the discard rule for minor allele count. if(minAC >= 0) { inFile.addDiscardMinMinorAlleleCount(minAC, NULL); } if(!filterList.IsEmpty()) { // Open the filter list. IFILE regionFile = ifopen(filterList, "r"); String regionLine; StringArray regionColumn; int start; int end; int intervalVal = 1; if(regionFile == NULL) { std::cerr << "Failed to open " << filterList << ", so keeping all positions\n"; filterList.Clear(); } else { while( regionFile->isOpen() && !regionFile->ifeof()) { // Read the next interval regionLine.Clear(); regionLine.ReadLine(regionFile); if(regionLine.IsEmpty()) { // Nothing on this line, continue to the next. continue; } regionColumn.ReplaceColumns(regionLine, ' '); if(regionColumn.Length() != 2) { std::cerr << "Improperly formatted region line: " << regionLine << "; skipping to the next line.\n"; continue; } // Convert the columns to integers. if(!regionColumn[0].AsInteger(start)) { // The start position (1st column) is not an integer. std::cerr << "Improperly formatted region line, start position " << "(1st column) is not an integer: " << regionColumn[0] << "; Skipping to the next line.\n"; continue; } if(!regionColumn[1].AsInteger(end)) { // The start position (1st column) is not an integer. std::cerr << "Improperly formatted region line, end position " << "(2nd column) is not an integer: " << regionColumn[1] << "; Skipping to the next line.\n"; continue; } // Add 1-based inclusive intervals. regions.add(start,end, intervalVal); } } } int numReadRecords = 0; while( inFile.readRecord(record)) { if(!filterList.IsEmpty()) { // Check if the region should be kept. intersection.clear(); regions.get_intersecting_intervals(record.get1BasedPosition(), intersection); if(intersection.empty()) { // not in the interval, so continue to the next record. continue; } } ++numReadRecords; // Loop through the number of possible alternates. unsigned int numAlts = record.getNumAlts(); int minAlleleCount = -1; int curAlleleCount = 0; int totalAlleleCount = 0; for(unsigned int i = 0; i <= numAlts; i++) { curAlleleCount = record.getAlleleCount(i); if((minAlleleCount == -1) || (curAlleleCount < minAlleleCount)) { minAlleleCount = curAlleleCount; } totalAlleleCount += curAlleleCount; } if(totalAlleleCount != 0) { double maf = (double)minAlleleCount/totalAlleleCount; std::cout << record.getIDStr() << "\t" << minAlleleCount << "\t" << maf << "\n"; } } inFile.close(); // std::cerr << "\n\t# Records: " << numReadRecords << "\n"; // return success. return(0); }