void ReadAlignment(int alignmentIndex, AlignmentCandidate<FASTASequence, FASTASequence> &alignment) { CmpAlignment cmpAln; ReadAlignment(alignmentIndex, cmpAln); string refSequence; string readSequence; readSequence.resize(cmpAln.alignmentArray.size()); refSequence.resize(cmpAln.alignmentArray.size()); ByteAlignmentToQueryString(&cmpAln.alignmentArray[0], cmpAln.alignmentArray.size(), &readSequence[0]); ByteAlignmentToRefString(&cmpAln.alignmentArray[0], cmpAln.alignmentArray.size(), &refSequence[0]); string ungappedRead, ungappedRef; RemoveGaps(readSequence, ungappedRead); RemoveGaps(refSequence, ungappedRef); GappedStringsToAlignment(readSequence, refSequence, alignment); FASTASequence qAlignedSeq, rAlignedSeq; qAlignedSeq.seq = (Nucleotide*) &ungappedRead[0]; qAlignedSeq.length = ungappedRead.size(); rAlignedSeq.seq = (Nucleotide*) &ungappedRef[0]; rAlignedSeq.length = ungappedRef.size(); alignment.tAlignedSeq.Copy(rAlignedSeq); alignment.qAlignedSeq.Copy(qAlignedSeq); unsigned int qStart = cmpAln.GetQueryStart(); unsigned int tStart = cmpAln.GetRefStart(); alignment.tPos = cmpAln.GetRefStart(); alignment.qPos = cmpAln.GetQueryStart(); alignment.nIns = cmpAln.GetNInsertions(); alignment.nDel = cmpAln.GetNDeletions(); alignment.nMatch = cmpAln.GetNMatch(); alignment.nMismatch=cmpAln.GetNMismatch(); alignment.qStrand= 0; alignment.tStrand = cmpAln.GetTStrand(); alignment.pctSimilarity = ((float)alignment.nMatch) / (alignment.nMatch + alignment.nMismatch + alignment.nIns + alignment.nDel); alignment.mapQV = cmpAln.GetMapQV(); }
void ReadAlignmentArray(int alignmentIndex, ByteAlignment &alignmentArray) { CmpAlignment cmpAlignment; alnInfoGroup.ReadCmpAlignment(alignmentIndex, cmpAlignment); // // Cache some stats about the read, and where it was aligned to. // int queryStart = cmpAlignment.GetQueryStart(); int queryEnd = cmpAlignment.GetQueryEnd(); int refGroupId = cmpAlignment.GetRefGroupId(); int alnGroupId = cmpAlignment.GetAlnGroupId(); int refGroupIndex = refGroupIdToArrayIndex[refGroupId]; if (alnGroupIdToReadGroupName.find(alnGroupId) == alnGroupIdToReadGroupName.end()) { cout << "INTERNAL ERROR! Could not find read group name for alignment " << "group with Id " << alnGroupId << "." << endl; assert(0); } string readGroupName = alnGroupIdToReadGroupName[alnGroupId]; if (refAlignGroups[refGroupIndex]->experimentNameToIndex.find(readGroupName) == refAlignGroups[refGroupIndex]->experimentNameToIndex.end()) { cout << "Internal ERROR! The read group name " << readGroupName << " is specified as part of " << " the path in alignment " << alignmentIndex << " though it does not exist in the ref align group specified for this alignment." << endl; assert(0); } int readGroupIndex = refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName]; HDFCmpExperimentGroup* expGroup = refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]; int offsetBegin = cmpAlignment.GetOffsetBegin(); int offsetEnd = cmpAlignment.GetOffsetEnd(); int alignedSequenceLength = offsetEnd - offsetBegin; if (alignedSequenceLength >= 0) { alignmentArray.resize(alignedSequenceLength); } else { return; } // // Read the alignment string. All alignments // refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, offsetEnd, &alignmentArray[0]); }
void ImportReadFromCmpH5(int alignmentIndex, SMRTSequence &read) { CmpAlignment cmpAlignment; alnInfoGroup.ReadCmpAlignment(alignmentIndex, cmpAlignment); // // Cache some stats about the read, and where it was aligned to. // int queryStart = cmpAlignment.GetQueryStart(); int queryEnd = cmpAlignment.GetQueryEnd(); read.holeNumber = cmpAlignment.GetHoleNumber(); int refGroupId = cmpAlignment.GetRefGroupId(); int alnGroupId = cmpAlignment.GetAlnGroupId(); int refGroupIndex = refGroupIdToArrayIndex[refGroupId]; if (alnGroupIdToReadGroupName.find(alnGroupId) == alnGroupIdToReadGroupName.end()) { cout << "INTERNAL ERROR! Could not find read group name for alignment " << "group with Id " << alnGroupId << "." << endl; assert(0); } string readGroupName = alnGroupIdToReadGroupName[alnGroupId]; if (refAlignGroups[refGroupIndex]->experimentNameToIndex.find(readGroupName) == refAlignGroups[refGroupIndex]->experimentNameToIndex.end()) { cout << "Internal ERROR! The read group name " << readGroupName << " is specified as part of " << " the path in alignment " << alignmentIndex << " though it does not exist in the ref align group specified for this alignment." << endl; assert(0); } int readGroupIndex = refAlignGroups[refGroupIndex]->experimentNameToIndex[readGroupName]; HDFCmpExperimentGroup* expGroup = refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]; int offsetBegin = cmpAlignment.GetOffsetBegin(); int offsetEnd = cmpAlignment.GetOffsetEnd(); int alignedSequenceLength = offsetEnd - offsetBegin; string alignedSequence; string readSequence; vector<unsigned char> byteAlignment; if (alignedSequenceLength >= 0) { alignedSequence.resize(alignedSequenceLength); byteAlignment.resize(alignedSequenceLength); } // // Read the alignment string. All alignments // refAlignGroups[refGroupIndex]->readGroups[readGroupIndex]->alignmentArray.Read(offsetBegin, offsetEnd, &byteAlignment[0]); // // Convert to something we can compare easily. // ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &alignedSequence[0]); // // Initialize the sequence of the read. // RemoveGaps(alignedSequence, readSequence); // // Make space for the sequence and all fields. // read.length = readSequence.size(); read.Allocate(read.length); memcpy(read.seq, readSequence.c_str(), readSequence.size() * sizeof(char)); vector<int> baseToAlignmentMap; CreateSequenceToAlignmentMap(byteAlignment, baseToAlignmentMap); // // Read in the quality values // vector<unsigned char> storedQVArray; vector<UChar> qvValues; vector<HalfWord> frameValues; int length = offsetEnd - offsetBegin; qvValues.resize(length); frameValues.resize(length); int i; if (expGroup->experimentGroup.ContainsObject("QualityValue")) { expGroup->qualityValue.Read(offsetBegin, offsetEnd, &qvValues[0]); StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.qual.data[0]); int i; for (i= 0; i < read.length; i++) { assert(read.qual[i] < 100); } } if (expGroup->experimentGroup.ContainsObject("InsertionQV")) { expGroup->insertionQV.Read(offsetBegin, offsetEnd, &qvValues[0]); StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.insertionQV.data[0]); } if (expGroup->experimentGroup.ContainsObject("SubstitutionQV")) { expGroup->substitutionQV.Read(offsetBegin, offsetEnd, &qvValues[0]); StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.substitutionQV.data[0]); } if (expGroup->experimentGroup.ContainsObject("DeletionQV")) { expGroup->deletionQV.Read(offsetBegin, offsetEnd, &qvValues[0]); StoreQualityValueFromAlignment(qvValues, baseToAlignmentMap, &read.deletionQV.data[0]); } if (expGroup->experimentGroup.ContainsObject("DeletionTag")) { vector<char> deletionTagValues; deletionTagValues.resize(offsetEnd-offsetBegin); expGroup->deletionTag.Read(offsetBegin, offsetEnd, &deletionTagValues[0]); StoreQualityValueFromAlignment(deletionTagValues, baseToAlignmentMap, read.deletionTag); } if (expGroup->experimentGroup.ContainsObject("SubstitutionTag")) { vector<char> substitutionTagValues; substitutionTagValues.resize(offsetEnd-offsetBegin); expGroup->substitutionTag.Read(offsetBegin, offsetEnd, &substitutionTagValues[0]); StoreQualityValueFromAlignment(substitutionTagValues, baseToAlignmentMap, read.substitutionTag); } if (expGroup->experimentGroup.ContainsObject("PulseIndex")) { vector<uint32_t> pulseIndexValues; pulseIndexValues.resize(offsetEnd-offsetBegin); expGroup->pulseIndex.Read(offsetBegin, offsetEnd, &pulseIndexValues[0]); StoreQualityValueFromAlignment(pulseIndexValues, baseToAlignmentMap, read.pulseIndex); } if (expGroup->experimentGroup.ContainsObject("PreBaseFrames")) { expGroup->preBaseFrames.Read(offsetBegin, offsetEnd, &frameValues[0]); StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.preBaseFrames); } if (expGroup->experimentGroup.ContainsObject("WidthInFrames")) { expGroup->widthInFrames.Read(offsetBegin, offsetEnd, &frameValues[0]); StoreQualityValueFromAlignment(frameValues, baseToAlignmentMap, read.widthInFrames); } }