Пример #1
0
int main(int argc, char** argv) {

    if (argc < 3) {
        cerr << "usage: " << argv[0] << " <vcf file> [FIELD1] [FIELD2] ..." << endl
             << "outputs each record in the vcf file, removing INFO fields not listed on the command line" << endl;
        return 1;
    }

    string filename = argv[1];

    set<string> fieldsToKeep;
    for (int i = 2; i < argc; ++i) {
        fieldsToKeep.insert(argv[i]);
    }

    VariantCallFile variantFile;
    if (filename == "-") {
        variantFile.open(std::cin);
    } else {
        variantFile.open(filename);
    }

    if (!variantFile.is_open()) {
        return 1;
    }

    Variant var(variantFile);

    vector<string> fieldsToErase;
    vector<string> infoIds = variantFile.infoIds();
    for (vector<string>::iterator i = infoIds.begin(); i != infoIds.end(); ++i) {
        if (!fieldsToKeep.count(*i)) {
            fieldsToErase.push_back(*i);
            variantFile.removeInfoHeaderLine(*i);
        }
    }

    // write the header
    cout << variantFile.header << endl;
 
    // print the records, filtering is done via the setting of varA's output sample names
    while (variantFile.getNextVariant(var)) {
        for (vector<string>::iterator f = fieldsToErase.begin(); f != fieldsToErase.end(); ++f) {
            var.info.erase(*f);
            var.infoFlags.erase(*f);
        }
        cout << var << endl;
    }

    return 0;

}
Пример #2
0
int main(int argc, char** argv) {

    if (argc > 1 && (argv[1] == "-h" || argv[1] == "--help")) {
        cerr << "usage: " << argv[0] << " <vcf file>" << endl
             << "outputs a VCF stream where AC and NS have been generated for each record using sample genotypes" << endl;
        return 1;
    }

    VariantCallFile variantFile;
    if (argc == 1 || (argc == 2 && argv[1] == "-")) {
        variantFile.open(std::cin);
        if (!variantFile.is_open()) {
            cerr << "vcffixup: could not open stdin" << endl;
            return 1;
        }
    } else {
        string filename = argv[1];
        variantFile.open(filename);
        if (!variantFile.is_open()) {
            cerr << "vcffixup: could not open " << filename << endl;
            return 1;
        }
    }

    Variant var(variantFile);

    // remove header lines we're going to add
    variantFile.removeInfoHeaderLine("AC");
    variantFile.removeInfoHeaderLine("AF");
    variantFile.removeInfoHeaderLine("NS");
    variantFile.removeInfoHeaderLine("AN");

    // and add them back, so as not to duplicate them if they are already there
    variantFile.addHeaderLine("##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">");
    variantFile.addHeaderLine("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Estimated allele frequency in the range (0,1]\">");
    variantFile.addHeaderLine("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of samples with data\">");
    variantFile.addHeaderLine("##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");

    // write the new header
    cout << variantFile.header << endl;
 
    // print the records, filtering is done via the setting of varA's output sample names
    while (variantFile.getNextVariant(var)) {
        stringstream ns;
        ns << var.samples.size();
        var.info["NS"].clear();
        var.info["NS"].push_back(ns.str());

        var.info["AC"].clear();
        var.info["AF"].clear();
        var.info["AN"].clear();

        int allelecount = countAlleles(var);
        stringstream an;
        an << allelecount;
        var.info["AN"].push_back(an.str());

        for (vector<string>::iterator a = var.alt.begin(); a != var.alt.end(); ++a) {
            string& allele = *a;
            int altcount = countAlts(var, var.getAltAlleleIndex(allele) + 1);
            stringstream ac;
            ac << altcount;
            var.info["AC"].push_back(ac.str());
            stringstream af;
            af << (double) altcount / (double) allelecount;
            var.info["AF"].push_back(af.str());
        }
        cout << var << endl;
    }

    return 0;

}