示例#1
0
int main (int argc, char * argv[])
{
    vector<string> inputFilenames;
    string combinedOutFilename, alignmentsOutFilename;

    try
    {
        TCLAP::CmdLine cmd("Program description", ' ', VERSION);

        TCLAP::ValueArg<string> combinedOutputArg("o", "out", 
            "Combined output filename (BAM format)", true, "", "combined.bam", cmd);

        TCLAP::ValueArg<int> minInsertArg("n", "min-insert", 
            "Minimum insert size", false, DEFAULT_MIN_GAP, "min insert size", cmd);

        TCLAP::ValueArg<int> maxInsertArg("x", "max-insert", 
            "Maximum insert size", false, DEFAULT_MAX_GAP, "max insert size", cmd);

        TCLAP::MultiArg<string> inputArgs("b", "bam",
            "Input BAM file", true,
            "input.bam", cmd);

        cmd.parse(argc, argv);

        combinedOutFilename = combinedOutputArg.getValue();

        MIN_GAP = minInsertArg.getValue();
        MAX_GAP = maxInsertArg.getValue();
        inputFilenames = inputArgs.getValue();

    } catch (TCLAP::ArgException &e) {
        cerr << "Error: " << e.error() << " " << e.argId() << endl;
    }

    // TODO require that alignments are sorted by name

    BamMultiReader reader;
    reader.Open(inputFilenames);

    if (!ValidOut.Open(combinedOutFilename, reader.GetHeader(),
                       reader.GetReferenceData()))
    {
        cerr << ValidOut.GetErrorString() << endl;
        return 1;
    }

    string current, prev;
    char mateID;
    Group group;
    set<string> references;

    Alignment a;
    while (reader.GetNextAlignment(a))
    {
        parseID(a.Name, current, mateID);

        if (current.compare(prev) && prev.size() > 0)
        {
            processGroup(group, references);
            group.clear();
            references.clear();
        }

        references.insert(a.RefName);

        GroupKey key;
        key.refID = a.RefName;
        key.mateID = mateID;
        key.rev = a.IsReverseStrand();

        group.insert( std::make_pair( key, a ) );

        prev = current;
    }
    processGroup(group, references);
}
示例#2
0
int FileReader::runInternal()
{
    ogeNameThread("am_FileReader");

    if(!format_specified)
        format = deduceFileFormat();

    if(format == FORMAT_BAM)
    {
        BamMultiReader reader;
        
        if(!reader.Open(filenames)) {
            cerr << "Error opening BAM files." << endl;
            reader.Close();
            return -1;
        }
        
        header = reader.GetHeader();
        references = reader.GetReferenceData();
        open = true;
        
        BamAlignment * al;
        
        while(true)
        {
            if(load_string_data)
                al = reader.GetNextAlignment();
            else
                al = reader.GetNextAlignmentCore();

            if(!al)
                break;
            
            putOutputAlignment(al);
        }
        
        reader.Close();
    } else if(format == FORMAT_SAM) {
        
        vector<SamReader> readers;
        
        SamHeader first_header;

        // before doing any reading, open the files to
        // verify they are the right format, etc.
        for(int i = 0; i < filenames.size(); i++) {
            SamReader reader;
            
            if(!reader.Open(filenames[i])) {
                cerr << "Error opening SAM file: " << filenames[i] << endl;
                return -1;
            }

            if(filenames.size() > 1 && i == 0)
                first_header = header;
            
            // TODO: We can probably find a better way to deal with multiple SAM file headers,
            // but for now we should disallow different headers to avoid issues.
            if(i > 0 && header.ToString() != first_header.ToString())
                cerr << "Warning! SAM input files have different headers." << endl;
            
            reader.Close();
        }

        for(int i = 0; i < filenames.size(); i++) {
            SamReader reader;
            
            if(!reader.Open(filenames[i])) {
                cerr << "Error opening SAM file: " << filenames[i] << endl;
                return -1;
            }
            
            header = reader.GetHeader();
            references = reader.GetReferenceData();
            open = true;
            
            if(filenames.size() > 1 && i == 0)
                first_header = header;

            BamAlignment * al = NULL;
            while(true)
            {
                al = reader.GetNextAlignment();
                
                if(NULL == al)
                    break;
                
                putOutputAlignment(al);
            }

            reader.Close();
        }
    } else {
        cerr << "FileReader couldn't detect file format. Aborting." << endl;
        exit(-1);
        return -1;
    }

    return 0;
}