Esempio n. 1
0
    void ReadAlignmentArray(int alignmentIndex, ByteAlignment &alignmentArray) {
        CmpAlignment cmpAlignment;
        alnInfoGroup.ReadCmpAlignment(alignmentIndex, cmpAlignment);

        //
        // Cache some stats about the read, and where it was aligned to.
        //
        int queryStart = cmpAlignment.GetQueryStart();
        int queryEnd   = cmpAlignment.GetQueryEnd();
        int refGroupId = cmpAlignment.GetRefGroupId();
        int alnGroupId = cmpAlignment.GetAlnGroupId();

        int refGroupIndex    = refGroupIdToArrayIndex[refGroupId];
        if (alnGroupIdToReadGroupName.find(alnGroupId) == 
                alnGroupIdToReadGroupName.end()) {
            cout << "INTERNAL ERROR! Could not find read group name for alignment "
                << "group with Id " << alnGroupId << "." << endl;
            assert(0);
        }
        string readGroupName = alnGroupIdToReadGroupName[alnGroupId]; 
        if (refAlignGroups[refGroupIndex]->experimentNameToIndex.find(readGroupName) ==
                refAlignGroups[refGroupIndex]->experimentNameToIndex.end()) {
            cout << "Internal ERROR! The read group name " << readGroupName << " is specified as part of "
                << " the path in alignment " << alignmentIndex
                << " though it does not exist in the ref align group specified for this alignment." << endl;
            assert(0);
        }
        int readGroupIndex   = refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName];

        HDFCmpExperimentGroup* expGroup = refAlignGroups[refGroupIndex]->readGroups[readGroupIndex];

        int offsetBegin = cmpAlignment.GetOffsetBegin();
        int offsetEnd   = cmpAlignment.GetOffsetEnd();

        int alignedSequenceLength = offsetEnd - offsetBegin;

        if (alignedSequenceLength >= 0) {
            alignmentArray.resize(alignedSequenceLength);
        }
        else {
            return;
        }
        //
        // Read the alignment string.  All alignments 
        //
        refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, 
                offsetEnd, 
                &alignmentArray[0]);
    }
Esempio n. 2
0
    void ImportReadFromCmpH5(int alignmentIndex, SMRTSequence &read) {
        CmpAlignment cmpAlignment;
        alnInfoGroup.ReadCmpAlignment(alignmentIndex, cmpAlignment);

        //
        // Cache some stats about the read, and where it was aligned to.
        //
        int queryStart = cmpAlignment.GetQueryStart();
        int queryEnd   = cmpAlignment.GetQueryEnd();
        read.holeNumber = cmpAlignment.GetHoleNumber();
        int refGroupId = cmpAlignment.GetRefGroupId();
        int alnGroupId = cmpAlignment.GetAlnGroupId();
        int refGroupIndex  = refGroupIdToArrayIndex[refGroupId];
        if (alnGroupIdToReadGroupName.find(alnGroupId) == alnGroupIdToReadGroupName.end()) {
            cout << "INTERNAL ERROR! Could not find read group name for alignment "
                << "group with Id " << alnGroupId << "." << endl;
            assert(0);
        }
        string readGroupName = alnGroupIdToReadGroupName[alnGroupId];

        if (refAlignGroups[refGroupIndex]->experimentNameToIndex.find(readGroupName) ==
                refAlignGroups[refGroupIndex]->experimentNameToIndex.end()) {
            cout << "Internal ERROR! The read group name " << readGroupName << " is specified as part of "
                << " the path in alignment " << alignmentIndex
                << " though it does not exist in the ref align group specified for this alignment." << endl;
            assert(0);
        }

        int readGroupIndex = refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName];
        HDFCmpExperimentGroup* expGroup = refAlignGroups[refGroupIndex]->readGroups[readGroupIndex];

        int offsetBegin = cmpAlignment.GetOffsetBegin();
        int offsetEnd   = cmpAlignment.GetOffsetEnd();

        int alignedSequenceLength = offsetEnd - offsetBegin;
        string   alignedSequence;
        string   readSequence;
        vector<unsigned char> byteAlignment;

        if (alignedSequenceLength >= 0) {
            alignedSequence.resize(alignedSequenceLength);
            byteAlignment.resize(alignedSequenceLength);
        }

        //
        // Read the alignment string.  All alignments 
        //
        refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, 
                offsetEnd, 
                &byteAlignment[0]);

        //
        // Convert to something we can compare easily.
        //
        ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &alignedSequence[0]);


        // 
        // Initialize the sequence of the read.
        //
        RemoveGaps(alignedSequence, readSequence);

        //
        // Make space for the sequence and all fields.
        //
        read.length = readSequence.size();
        read.Allocate(read.length);
        memcpy(read.seq, readSequence.c_str(), readSequence.size() * sizeof(char));

        vector<int> baseToAlignmentMap;
        CreateSequenceToAlignmentMap(byteAlignment, baseToAlignmentMap);

        //
        // Read in the quality values
        //


        vector<unsigned char> storedQVArray;

        vector<UChar> qvValues;
        vector<HalfWord> frameValues;
        int length = offsetEnd - offsetBegin;
        qvValues.resize(length);
        frameValues.resize(length);
        int i;


        if (expGroup->experimentGroup.ContainsObject("QualityValue")) {
            expGroup->qualityValue.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.qual.data[0]);
            int i;
            for (i= 0; i < read.length; i++) {
                assert(read.qual[i] < 100);
            }
        }

        if (expGroup->experimentGroup.ContainsObject("InsertionQV")) {
            expGroup->insertionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.insertionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("SubstitutionQV")) {
            expGroup->substitutionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.substitutionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("DeletionQV")) {
            expGroup->deletionQV.Read(offsetBegin, offsetEnd, &qvValues[0]);
            StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.deletionQV.data[0]);
        }

        if (expGroup->experimentGroup.ContainsObject("DeletionTag")) {
            vector<char> deletionTagValues;
            deletionTagValues.resize(offsetEnd-offsetBegin);
            expGroup->deletionTag.Read(offsetBegin, offsetEnd, &deletionTagValues[0]);
            StoreQualityValueFromAlignment(deletionTagValues, baseToAlignmentMap, read.deletionTag);
        }

        if (expGroup->experimentGroup.ContainsObject("SubstitutionTag")) {
            vector<char> substitutionTagValues;
            substitutionTagValues.resize(offsetEnd-offsetBegin);
            expGroup->substitutionTag.Read(offsetBegin, offsetEnd, &substitutionTagValues[0]);
            StoreQualityValueFromAlignment(substitutionTagValues, baseToAlignmentMap, read.substitutionTag);
        }

        if (expGroup->experimentGroup.ContainsObject("PulseIndex")) {
            vector<uint32_t> pulseIndexValues;
            pulseIndexValues.resize(offsetEnd-offsetBegin);
            expGroup->pulseIndex.Read(offsetBegin, offsetEnd, &pulseIndexValues[0]);
            StoreQualityValueFromAlignment(pulseIndexValues, baseToAlignmentMap, read.pulseIndex);
        }

        if (expGroup->experimentGroup.ContainsObject("PreBaseFrames")) {
            expGroup->preBaseFrames.Read(offsetBegin, offsetEnd, &frameValues[0]);
            StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.preBaseFrames);
        }

        if (expGroup->experimentGroup.ContainsObject("WidthInFrames")) {
            expGroup->widthInFrames.Read(offsetBegin, offsetEnd, &frameValues[0]);
            StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.widthInFrames);
        }

    }