Example #1
0
    void ReadAlignment(int alignmentIndex, AlignmentCandidate<FASTASequence, FASTASequence> &alignment) {
        CmpAlignment cmpAln;
        ReadAlignment(alignmentIndex, cmpAln);

        string   refSequence;
        string   readSequence;
        readSequence.resize(cmpAln.alignmentArray.size());
        refSequence.resize(cmpAln.alignmentArray.size());

        ByteAlignmentToQueryString(&cmpAln.alignmentArray[0], cmpAln.alignmentArray.size(), &readSequence[0]);
        ByteAlignmentToRefString(&cmpAln.alignmentArray[0], cmpAln.alignmentArray.size(), &refSequence[0]);				
        string ungappedRead, ungappedRef;    
        RemoveGaps(readSequence, ungappedRead);
        RemoveGaps(refSequence, ungappedRef);

        GappedStringsToAlignment(readSequence, refSequence, alignment);
        FASTASequence qAlignedSeq, rAlignedSeq;
        qAlignedSeq.seq = (Nucleotide*) &ungappedRead[0];
        qAlignedSeq.length = ungappedRead.size();
        rAlignedSeq.seq = (Nucleotide*) &ungappedRef[0];
        rAlignedSeq.length = ungappedRef.size();

        alignment.tAlignedSeq.Copy(rAlignedSeq);
        alignment.qAlignedSeq.Copy(qAlignedSeq);

        unsigned int qStart = cmpAln.GetQueryStart();
        unsigned int tStart = cmpAln.GetRefStart();

        alignment.tPos = cmpAln.GetRefStart();
        alignment.qPos = cmpAln.GetQueryStart();
        alignment.nIns   = cmpAln.GetNInsertions();
        alignment.nDel   = cmpAln.GetNDeletions();
        alignment.nMatch = cmpAln.GetNMatch();
        alignment.nMismatch=cmpAln.GetNMismatch();
        alignment.qStrand= 0;
        alignment.tStrand = cmpAln.GetTStrand();
        alignment.pctSimilarity = ((float)alignment.nMatch) / (alignment.nMatch + alignment.nMismatch + alignment.nIns + alignment.nDel);
        alignment.mapQV  = cmpAln.GetMapQV();
    }
Example #2
0
    void ReadAlignmentArray(int alignmentIndex, ByteAlignment &alignmentArray) {
        CmpAlignment cmpAlignment;
        alnInfoGroup.ReadCmpAlignment(alignmentIndex, cmpAlignment);

        //
        // Cache some stats about the read, and where it was aligned to.
        //
        int queryStart = cmpAlignment.GetQueryStart();
        int queryEnd   = cmpAlignment.GetQueryEnd();
        int refGroupId = cmpAlignment.GetRefGroupId();
        int alnGroupId = cmpAlignment.GetAlnGroupId();

        int refGroupIndex    = refGroupIdToArrayIndex[refGroupId];
        if (alnGroupIdToReadGroupName.find(alnGroupId) == 
                alnGroupIdToReadGroupName.end()) {
            cout << "INTERNAL ERROR! Could not find read group name for alignment "
                << "group with Id " << alnGroupId << "." << endl;
            assert(0);
        }
        string readGroupName = alnGroupIdToReadGroupName[alnGroupId]; 
        if (refAlignGroups[refGroupIndex]->experimentNameToIndex.find(readGroupName) ==
                refAlignGroups[refGroupIndex]->experimentNameToIndex.end()) {
            cout << "Internal ERROR! The read group name " << readGroupName << " is specified as part of "
                << " the path in alignment " << alignmentIndex
                << " though it does not exist in the ref align group specified for this alignment." << endl;
            assert(0);
        }
        int readGroupIndex   = refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName];

        HDFCmpExperimentGroup* expGroup = refAlignGroups[refGroupIndex]->readGroups[readGroupIndex];

        int offsetBegin = cmpAlignment.GetOffsetBegin();
        int offsetEnd   = cmpAlignment.GetOffsetEnd();

        int alignedSequenceLength = offsetEnd - offsetBegin;

        if (alignedSequenceLength >= 0) {
            alignmentArray.resize(alignedSequenceLength);
        }
        else {
            return;
        }
        //
        // Read the alignment string.  All alignments 
        //
        refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, 
                offsetEnd, 
                &alignmentArray[0]);
    }
Example #3
0
    void ImportReadFromCmpH5(int alignmentIndex, SMRTSequence &read) {
        CmpAlignment cmpAlignment;
        alnInfoGroup.ReadCmpAlignment(alignmentIndex, cmpAlignment);

        //
        // Cache some stats about the read, and where it was aligned to.
        //
        int queryStart = cmpAlignment.GetQueryStart();
        int queryEnd   = cmpAlignment.GetQueryEnd();
        read.holeNumber = cmpAlignment.GetHoleNumber();
        int refGroupId = cmpAlignment.GetRefGroupId();
        int alnGroupId = cmpAlignment.GetAlnGroupId();
        int refGroupIndex  = refGroupIdToArrayIndex[refGroupId];
        if (alnGroupIdToReadGroupName.find(alnGroupId) == alnGroupIdToReadGroupName.end()) {
            cout << "INTERNAL ERROR! Could not find read group name for alignment "
                << "group with Id " << alnGroupId << "." << endl;
            assert(0);
        }
        string readGroupName = alnGroupIdToReadGroupName[alnGroupId];

        if (refAlignGroups[refGroupIndex]->experimentNameToIndex.find(readGroupName) ==
                refAlignGroups[refGroupIndex]->experimentNameToIndex.end()) {
            cout << "Internal ERROR! The read group name " << readGroupName << " is specified as part of "
                << " the path in alignment " << alignmentIndex
                << " though it does not exist in the ref align group specified for this alignment." << endl;
            assert(0);
        }

        int readGroupIndex = refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName];
        HDFCmpExperimentGroup* expGroup = refAlignGroups[refGroupIndex]->readGroups[readGroupIndex];

        int offsetBegin = cmpAlignment.GetOffsetBegin();
        int offsetEnd   = cmpAlignment.GetOffsetEnd();

        int alignedSequenceLength = offsetEnd - offsetBegin;
        string   alignedSequence;
        string   readSequence;
        vector<unsigned char> byteAlignment;

        if (alignedSequenceLength >= 0) {
            alignedSequence.resize(alignedSequenceLength);
            byteAlignment.resize(alignedSequenceLength);
        }

        //
        // Read the alignment string.  All alignments 
        //
        refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, 
                offsetEnd, 
                &byteAlignment[0]);

        //
        // Convert to something we can compare easily.
        //
        ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &alignedSequence[0]);


        // 
        // Initialize the sequence of the read.
        //
        RemoveGaps(alignedSequence, readSequence);

        //
        // Make space for the sequence and all fields.
        //
        read.length = readSequence.size();
        read.Allocate(read.length);
        memcpy(read.seq, readSequence.c_str(), readSequence.size() * sizeof(char));

        vector<int> baseToAlignmentMap;
        CreateSequenceToAlignmentMap(byteAlignment, baseToAlignmentMap);

        //
        // Read in the quality values
        //


        vector<unsigned char> storedQVArray;

        vector<UChar> qvValues;
        vector<HalfWord> frameValues;
        int length = offsetEnd - offsetBegin;
        qvValues.resize(length);
        frameValues.resize(length);
        int i;


        if (expGroup->experimentGroup.ContainsObject("QualityValue")) {
            expGroup->qualityValue.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.qual.data[0]);
            int i;
            for (i= 0; i < read.length; i++) {
                assert(read.qual[i] < 100);
            }
        }

        if (expGroup->experimentGroup.ContainsObject("InsertionQV")) {
            expGroup->insertionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.insertionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("SubstitutionQV")) {
            expGroup->substitutionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.substitutionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("DeletionQV")) {
            expGroup->deletionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.deletionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("DeletionTag")) {
            vector<char> deletionTagValues;
            deletionTagValues.resize(offsetEnd-offsetBegin);
            expGroup->deletionTag.Read(offsetBegin, offsetEnd, &deletionTagValues[0]);
            StoreQualityValueFromAlignment(deletionTagValues, baseToAlignmentMap, read.deletionTag);
        }

        if (expGroup->experimentGroup.ContainsObject("SubstitutionTag")) {
            vector<char> substitutionTagValues;
            substitutionTagValues.resize(offsetEnd-offsetBegin);
            expGroup->substitutionTag.Read(offsetBegin, offsetEnd, &substitutionTagValues[0]);
            StoreQualityValueFromAlignment(substitutionTagValues, baseToAlignmentMap, read.substitutionTag);
        }

        if (expGroup->experimentGroup.ContainsObject("PulseIndex")) {
            vector<uint32_t> pulseIndexValues;
            pulseIndexValues.resize(offsetEnd-offsetBegin);
            expGroup->pulseIndex.Read(offsetBegin, offsetEnd, &pulseIndexValues[0]);
            StoreQualityValueFromAlignment(pulseIndexValues, baseToAlignmentMap, read.pulseIndex);
        }

        if (expGroup->experimentGroup.ContainsObject("PreBaseFrames")) {
            expGroup->preBaseFrames.Read(offsetBegin, offsetEnd, &frameValues[0]);
            StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.preBaseFrames);
        }

        if (expGroup->experimentGroup.ContainsObject("WidthInFrames")) {
            expGroup->widthInFrames.Read(offsetBegin, offsetEnd, &frameValues[0]);
            StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.widthInFrames);
        }

    }