示例#1
0
// The VcfRecord passed in should already be set with a record.
bool findPos(bool newChrom, const char* chrom1, int pos1, VcfRecord& record2, VcfFileReader& vcf2)
{
    const char* chrom2 = record2.getChromStr();
    int pos2 = record2.get1BasedPosition();

    // Loop until the chrom/pos is found in vcf2.
    bool sameChrom = (strcmp(chrom2, chrom1) == 0);
    while(((pos2 < pos1) && sameChrom) ||
          (newChrom && !sameChrom))
    {
        if(vcf2.readRecord(record2))
        {
            chrom2 = record2.getChromStr();
            pos2 = record2.get1BasedPosition();
            sameChrom = (strcmp(chrom2, chrom1) == 0);
        }
        else
        {
            // no more records.
            chrom2 = NULL;
            pos2 = UNSET_POS;
            return(false);
        }
    }
    // If we wind up here, chrom2 is either at the correct 
    // position, or it is past the correct position.
    if((!sameChrom) || (pos2 != pos1))
    {
        // Position not found.
        return(false);
    }
    return(true);
}    
示例#2
0
int VcfCleaner::execute(int argc, char **argv)
{
    String refFile = "";
    String inputVcf = "";
    String outputVcf = "";
    bool uncompress = false;
    bool params = false;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_STRINGPARAMETER("out", &outputVcf)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("uncompress", &uncompress)
        LONG_PARAMETER("params", &params)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }
    if(outputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--out\", a required parameter.\n\n";
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    VcfFileReader inFile;
    VcfFileWriter outFile;
    VcfHeader header;
    VcfRecord record;

    // Open the file.
    inFile.open(inputVcf, header);
    if(uncompress)
    {
        outFile.open(outputVcf, header, InputFile::DEFAULT);
    }
    else
    {
        outFile.open(outputVcf, header);
    }

    int numReadRecords = 0;
    int numWrittenRecords = 0;
    int returnVal = 0;

    // Set to only store/write the GT field.
    VcfRecordGenotype::addStoreField("GT");
    while(inFile.readRecord(record))
    {
        ++numReadRecords;
        // Check if any samples are missing GT or if any are not phased.
        if(!record.hasAllGenotypeAlleles() || !record.allPhased())
        {
            // Missing a GT or not phased, so continue without writing.
            continue;
        }
        
        // Clear the INFO field.
        record.getInfo().clear();
        // Write the record.
        if(!outFile.writeRecord(record))
        {
            // Write error.
            std::cerr << "Failed writing a vcf record.\n";
            returnVal = -1;
        }
        ++numWrittenRecords;
    }
 
    inFile.close();   
    outFile.close();   

    std::cerr << "NumReadRecords: " << numReadRecords
              << "; NumWrittenRecords: " << numWrittenRecords << "\n";
    return(returnVal);
}
示例#3
0
int VcfConvert::execute(int argc, char **argv)
{
    String refFile = "";
    String inputVcf = "";
    String outputVcf = "";
    String refName = "";
    bool uncompress = false;
    bool params = false;
    bool noeof = false;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_STRINGPARAMETER("out", &outputVcf)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("uncompress", &uncompress)
        LONG_STRINGPARAMETER("refName", &refName)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }
    if(outputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--out\", a required parameter.\n\n";
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    VcfFileReader inFile;
    VcfFileWriter outFile;
    VcfHeader header;
    
    // Open the file.
    inFile.open(inputVcf, header);

    if(refName != "")
    {
        inFile.setReadSection(refName.c_str());
    }

    if(uncompress)
    {
        outFile.open(outputVcf, header, InputFile::DEFAULT);
    }
    else
    {
        outFile.open(outputVcf, header);
    }

    VcfRecord record;
    int numRecords = 0;

    while(inFile.readRecord(record))
    {
        ++numRecords;

        outFile.writeRecord(record);
    }
 
    inFile.close();   

    std::cerr << "NumRecords: " << numRecords << "\n";
    return(0);
}
示例#4
0
int VcfSplit::execute(int argc, char **argv)
{
    String refFile = "";
    String inputVcf = "";
    String outputVcfBase = "";
    String refName = "";
    bool uncompress = false;
    bool params = false;
    bool noeof = false;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_STRINGPARAMETER("obase", &outputVcfBase)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("uncompress", &uncompress)
        LONG_STRINGPARAMETER("refName", &refName)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }
    if(outputVcfBase == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--obase\", a required parameter.\n\n";
        return(-1);
    }
    outputVcfBase += ".";

    if(params)
    {
        inputParameters.Status();
    }

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    VcfFileReader inFile;
    std::map<std::string, VcfFileWriter*> outFiles;
    VcfHeader header;
    
    // Open the file.
    inFile.open(inputVcf, header);

    if(refName != "")
    {
        inFile.setReadSection(refName.c_str());
    }

    VcfRecord record;
    int numRecords = 0;

    std::string prevChr = "";
    std::string chr = "";
    VcfFileWriter* outFilePtr = 0;
    std::string outName = "";
    while(inFile.readRecord(record))
    {
        ++numRecords;

        chr = record.getChromStr();

        if((outFilePtr == 0) || (chr != prevChr))
        {
            outFilePtr = outFiles[chr];
            if(outFilePtr == 0)
            {
                outFilePtr = new VcfFileWriter();
                outFiles[chr] = outFilePtr;
                outName = outputVcfBase.c_str();
                if(chr.substr(0,3) != "chr")
                {
                    outName += "chr";
                }
                outName += chr + ".vcf";
                // chr not in outFile list.
                if(uncompress)
                {
                    outFilePtr->open(outName.c_str(), header, InputFile::DEFAULT);
                }
                else
                {
                    outName += ".gz";
                    outFilePtr->open(outName.c_str(), header);
                }
            }
        }
        outFilePtr->writeRecord(record);
    }
 
    inFile.close();   

    for (std::map<std::string,VcfFileWriter*>::iterator it = outFiles.begin();
         it != outFiles.end(); ++it)
    {
        if(it->second != 0)
        {
            it->second->close();
            it->second = 0;
        }
    }
  

    std::cerr << "NumRecords: " << numRecords << "\n";
    return(0);
}
示例#5
0
int VcfMac::execute(int argc, char **argv)
{
    String inputVcf = "";
    int minAC = -1;
    String sampleSubset = "";
    String filterList = "";
    bool params = false;

    IntervalTree<int> regions;
    std::vector<int> intersection;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_STRINGPARAMETER("sampleSubset", &sampleSubset)
        LONG_INTPARAMETER("minAC", &minAC)
        LONG_STRINGPARAMETER("filterList", &filterList)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the two input files.
    VcfFileReader inFile;
    VcfHeader header;
    VcfRecord record;

    // Open the file
    if(sampleSubset.IsEmpty())
    {
        inFile.open(inputVcf, header);        
    }
    else
    {
        inFile.open(inputVcf, header, sampleSubset, NULL, NULL);
    }
    
    // Add the discard rule for minor allele count.
    if(minAC >= 0)
    {
        inFile.addDiscardMinMinorAlleleCount(minAC, NULL);
    }
    
    if(!filterList.IsEmpty())
    {
        // Open the filter list.
        IFILE regionFile = ifopen(filterList, "r");
        String regionLine;
        StringArray regionColumn;
        int start;
        int end;
        int intervalVal = 1;
        if(regionFile == NULL)
        {
            std::cerr << "Failed to open " << filterList 
                      << ", so keeping all positions\n";
            filterList.Clear();
        }
        else
        {
            while( regionFile->isOpen() && !regionFile->ifeof())
            {
                // Read the next interval
                regionLine.Clear();
                regionLine.ReadLine(regionFile);
                if(regionLine.IsEmpty())
                {
                    // Nothing on this line, continue to the next.
                    continue;
                }
                regionColumn.ReplaceColumns(regionLine, ' ');
                if(regionColumn.Length() != 2)
                {
                    std::cerr << "Improperly formatted region line: " 
                              << regionLine << "; skipping to the next line.\n";
                    continue;
                }
                // Convert the columns to integers.
                if(!regionColumn[0].AsInteger(start))
                {
                    // The start position (1st column) is not an integer.
                    std::cerr << "Improperly formatted region line, start position "
                              << "(1st column) is not an integer: "
                              << regionColumn[0]
                              << "; Skipping to the next line.\n";
                    continue;
                }
                if(!regionColumn[1].AsInteger(end))
                {
                    // The start position (1st column) is not an integer.
                    std::cerr << "Improperly formatted region line, end position "
                              << "(2nd column) is not an integer: "
                              << regionColumn[1]
                              << "; Skipping to the next line.\n";
                    continue;
                }
                // Add 1-based inclusive intervals.
                regions.add(start,end, intervalVal);
            }
        }
    }


    int numReadRecords = 0;

    while( inFile.readRecord(record))
    {
        if(!filterList.IsEmpty())
        {
            // Check if the region should be kept.
            intersection.clear();
            regions.get_intersecting_intervals(record.get1BasedPosition(), intersection);
            
            if(intersection.empty())
            {
                // not in the interval, so continue to the next record.
                continue;
            }
        }

        ++numReadRecords;

        // Loop through the number of possible alternates.
        unsigned int numAlts = record.getNumAlts();
        int minAlleleCount = -1;
        int curAlleleCount = 0;
        int totalAlleleCount = 0;
        for(unsigned int i = 0; i <= numAlts; i++)
        {
            curAlleleCount = record.getAlleleCount(i);
            if((minAlleleCount == -1) ||
               (curAlleleCount < minAlleleCount))
            {
                minAlleleCount = curAlleleCount;
            }
            totalAlleleCount += curAlleleCount;
        }
        if(totalAlleleCount != 0)
        {
            double maf = (double)minAlleleCount/totalAlleleCount;
            std::cout << record.getIDStr()
                      << "\t" << minAlleleCount
                      << "\t" << maf << "\n";
        }
    }
    
    inFile.close();

    //    std::cerr << "\n\t# Records: " << numReadRecords << "\n";

    // return success.
    return(0);
}