int main(int argc, char** argv) { if (argc < 3) { cerr << "usage: " << argv[0] << " <vcf file> [FIELD1] [FIELD2] ..." << endl << "outputs each record in the vcf file, removing INFO fields not listed on the command line" << endl; return 1; } string filename = argv[1]; set<string> fieldsToKeep; for (int i = 2; i < argc; ++i) { fieldsToKeep.insert(argv[i]); } VariantCallFile variantFile; if (filename == "-") { variantFile.open(std::cin); } else { variantFile.open(filename); } if (!variantFile.is_open()) { return 1; } Variant var(variantFile); vector<string> fieldsToErase; vector<string> infoIds = variantFile.infoIds(); for (vector<string>::iterator i = infoIds.begin(); i != infoIds.end(); ++i) { if (!fieldsToKeep.count(*i)) { fieldsToErase.push_back(*i); variantFile.removeInfoHeaderLine(*i); } } // write the header cout << variantFile.header << endl; // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { for (vector<string>::iterator f = fieldsToErase.begin(); f != fieldsToErase.end(); ++f) { var.info.erase(*f); var.infoFlags.erase(*f); } cout << var << endl; } return 0; }
int main(int argc, char** argv) { if (argc > 1 && (argv[1] == "-h" || argv[1] == "--help")) { cerr << "usage: " << argv[0] << " <vcf file>" << endl << "outputs a VCF stream where AC and NS have been generated for each record using sample genotypes" << endl; return 1; } VariantCallFile variantFile; if (argc == 1 || (argc == 2 && argv[1] == "-")) { variantFile.open(std::cin); if (!variantFile.is_open()) { cerr << "vcffixup: could not open stdin" << endl; return 1; } } else { string filename = argv[1]; variantFile.open(filename); if (!variantFile.is_open()) { cerr << "vcffixup: could not open " << filename << endl; return 1; } } Variant var(variantFile); // remove header lines we're going to add variantFile.removeInfoHeaderLine("AC"); variantFile.removeInfoHeaderLine("AF"); variantFile.removeInfoHeaderLine("NS"); variantFile.removeInfoHeaderLine("AN"); // and add them back, so as not to duplicate them if they are already there variantFile.addHeaderLine("##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">"); variantFile.addHeaderLine("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Estimated allele frequency in the range (0,1]\">"); variantFile.addHeaderLine("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of samples with data\">"); variantFile.addHeaderLine("##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">"); // write the new header cout << variantFile.header << endl; // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { stringstream ns; ns << var.samples.size(); var.info["NS"].clear(); var.info["NS"].push_back(ns.str()); var.info["AC"].clear(); var.info["AF"].clear(); var.info["AN"].clear(); int allelecount = countAlleles(var); stringstream an; an << allelecount; var.info["AN"].push_back(an.str()); for (vector<string>::iterator a = var.alt.begin(); a != var.alt.end(); ++a) { string& allele = *a; int altcount = countAlts(var, var.getAltAlleleIndex(allele) + 1); stringstream ac; ac << altcount; var.info["AC"].push_back(ac.str()); stringstream af; af << (double) altcount / (double) allelecount; var.info["AF"].push_back(af.str()); } cout << var << endl; } return 0; }