Exemple #1
0
// Given a SMRT sequence and a subread interval, make the subread.
// Input:
//   smrtRead         - a SMRT sequence
//   subreadInterval  - a subread interval
//   params           - mapping parameters
// Output:
//   subreadSequence - the constructed subread
void MakeSubreadOfInterval(SMRTSequence & subreadSequence,
                           SMRTSequence & smrtRead,
                           ReadInterval & subreadInterval,
                           MappingParameters & params)
{
    int start = subreadInterval.start;
    int end   = subreadInterval.end;

    assert(smrtRead.length >= subreadSequence.length);
    smrtRead.MakeSubreadAsMasked(subreadSequence, start, end);

    if (!params.preserveReadTitle) {
        smrtRead.SetSubreadTitle(subreadSequence,
                subreadSequence.SubreadStart(),
                subreadSequence.SubreadEnd());
    }
    else {
        subreadSequence.CopyTitle(smrtRead.title);
    }
    subreadSequence.zmwData = smrtRead.zmwData;
}
Exemple #2
0
void MakeVirtualRead(SMRTSequence & smrtRead,
                     const vector<SMRTSequence> & subreads)
{
    assert(subreads.size() > 0);
    DNALength hqStart = 0, hqEnd = 0;
    for(auto subread: subreads) {
        hqStart = min(DNALength(subread.SubreadStart()), hqStart);
        hqEnd   = max(DNALength(subread.SubreadEnd()),   hqEnd);
    }
    smrtRead.Free();
    smrtRead.Allocate(hqEnd);
    memset(smrtRead.seq, 'N', sizeof(char) * hqEnd);
    smrtRead.lowQualityPrefix = hqStart;
    smrtRead.lowQualitySuffix = smrtRead.length - hqEnd;
    smrtRead.highQualityRegionScore = subreads[0].highQualityRegionScore;
    smrtRead.HoleNumber(subreads[0].HoleNumber());
    stringstream ss;
    ss << SMRTTitle(subreads[0].GetTitle()).MovieName() << "/" << subreads[0].HoleNumber();
    smrtRead.CopyTitle(ss.str());
    for (auto subread: subreads) {
        memcpy(&smrtRead.seq[subread.SubreadStart()],
               &subread.seq[0], sizeof(char) * subread.length);
    }
}
Exemple #3
0
void SMRTSequence::SetSubreadTitle(SMRTSequence &subread, DNALength subreadStart, DNALength  subreadEnd) {
    stringstream titleStream;
    titleStream << title << "/"<< subreadStart << "_" << subreadEnd;
    subread.CopyTitle(titleStream.str());
}    
Exemple #4
0
int main(int argc, char* argv[]) {

    string inputFileName, outputFileName;

    if (argc < 2) {
        PrintUsage();
        exit(0);
    }
    vector<string> inputFileNames;
    inputFileName = argv[1];
    outputFileName = argv[2];
    int argi = 3;
    RegionTable regionTable;
    string regionsFOFNName = "";
    vector<string> regionFileNames;
    bool splitSubreads = true;
    bool useCCS = false;
    int minSubreadLength = 1;
    while (argi < argc) {
        if (strcmp(argv[argi], "-regionTable") == 0) {
            regionsFOFNName = argv[++argi];
        }
        else if (strcmp(argv[argi], "-noSplitSubreads") == 0) {
            splitSubreads = false;
        }
        else if (strcmp(argv[argi], "-minSubreadLength") == 0) {
            minSubreadLength = atoi(argv[++argi]);
        }
        else if (strcmp(argv[argi], "-useccsdenovo") == 0) {
            useCCS = true;
        }
        else {
            PrintUsage();
            cout << "ERROR! Option " << argv[argi] << " is not supported." << endl;
        }
        argi++;
    }
         
    if (FileOfFileNames::IsFOFN(inputFileName)) {
        FileOfFileNames::FOFNToList(inputFileName, inputFileNames);
    }
    else {
        inputFileNames.push_back(inputFileName);
    }
    if (regionsFOFNName == "") {
        regionFileNames = inputFileNames;
    }
    else {
        if (FileOfFileNames::IsFOFN(regionsFOFNName)) {
            FileOfFileNames::FOFNToList(regionsFOFNName, regionFileNames);
        }
        else {
            regionFileNames.push_back(regionsFOFNName);
        }
    }


    ofstream fastaOut;
    CrucialOpen(outputFileName, fastaOut);
    int plsFileIndex;
    HDFRegionTableReader hdfRegionReader;
    AfgBasWriter afgWriter;
    afgWriter.Initialize(outputFileName);

    for (plsFileIndex = 0; plsFileIndex < inputFileNames.size(); plsFileIndex++) {
        if (splitSubreads) {
            hdfRegionReader.Initialize(regionFileNames[plsFileIndex]);
            hdfRegionReader.ReadTable(regionTable);
            regionTable.SortTableByHoleNumber();
        }
        
        ReaderAgglomerate reader;
        // reader.SkipReadQuality(); // should have been taken care of by *Filter modules
        if (useCCS){
            reader.UseCCS();
        } else {
            reader.IgnoreCCS();
        }
        reader.Initialize(inputFileNames[plsFileIndex]);
        CCSSequence seq; 
        int seqIndex = 0;
        int numRecords = 0;
        vector<ReadInterval> subreadIntervals;
        while (reader.GetNext(seq)){ 
            ++seqIndex;
            if (splitSubreads == false) {
                if (seq.length >= minSubreadLength) {
                    afgWriter.Write(seq);
                }
                seq.Free();
                continue;
            }

            DNALength hqReadStart, hqReadEnd;
            int score;
            GetReadTrimCoordinates(seq, seq.zmwData, regionTable, hqReadStart, hqReadEnd, score);
            subreadIntervals.clear(); // clear old, new intervals are appended.
            CollectSubreadIntervals(seq,&regionTable, subreadIntervals);

            if (seq.length == 0 and subreadIntervals.size() > 0) {
                cout << "WARNING! A high quality interval region exists for a read of length 0." <<endl;
                cout << "  The offending ZMW number is " << seq.zmwData.holeNumber << endl;
                seq.Free();
                continue;
            }

            for (int intvIndex = 0; intvIndex < subreadIntervals.size(); intvIndex++) {
                SMRTSequence subreadSequence;
                
                int subreadStart = subreadIntervals[intvIndex].start > hqReadStart ? 
                                   subreadIntervals[intvIndex].start : hqReadStart;
                int subreadEnd   = subreadIntervals[intvIndex].end < hqReadEnd ?
                                   subreadIntervals[intvIndex].end : hqReadEnd;
                int subreadLength = subreadEnd - subreadStart;

                if (subreadLength < minSubreadLength) continue;

                subreadSequence.subreadStart = subreadStart;
                subreadSequence.subreadEnd   = subreadEnd;
                subreadSequence.ReferenceSubstring(seq, subreadStart, subreadLength);      
            
                stringstream titleStream;
                titleStream << seq.title << "/" << subreadIntervals[intvIndex].start 
                                         << "_" << subreadIntervals[intvIndex].end;
                subreadSequence.CopyTitle(titleStream.str());
                afgWriter.Write(subreadSequence);
                delete[] subreadSequence.title;
            }
            seq.Free();
        }
        reader.Close();
        hdfRegionReader.Close();
    }
}