STRAlleleName :: STRAlleleName (const RGString& name) { size_t position; RGString repeats; RGString variant; size_t len; if (name.FindNextSubstring (0, ".", position)) { repeats = name.ExtractSubstring (0, position - 1); mRepeats = repeats.ConvertToInteger (); len = name.Length () - 1; if (position <= len) { variant = name.ExtractSubstring (position + 1, len); mMicroVariant = variant.ConvertToInteger (); } else mMicroVariant = 0; } else { mRepeats = name.ConvertToInteger (); mMicroVariant = 0; } }
RGString STRBaseLocus :: ReconstructAlleleName (int id, Allele* nearAllele) { int difference = id - nearAllele->GetBioID (); RGString nearName = nearAllele->GetAlleleName (); if (difference == 0) return nearName; int KK = CoreRepeatNumber; double nearDouble; int nearInt; int temp; RGString intString; RGString fractString; RGString returnValue; nearDouble = nearName.ConvertToDouble (); nearInt = nearName.ConvertToInteger (); temp = (int) floor (10 * (nearDouble - nearInt) + 0.5); // temp is fractional part...should be integer mod 4 if (difference > 0) { nearInt += (difference / KK); temp += difference%KK; nearInt += (temp / KK); temp = temp%KK; } else { difference = - difference; nearInt -= (difference / KK); temp -= difference%KK; if (temp < 0) { temp += KK; nearInt--; } } if (nearInt < 1) { returnValue = "$"; return returnValue; } intString.Convert (nearInt, 10); if (temp == 0) return intString; fractString.Convert (temp, 10); returnValue = intString + "." + fractString; return returnValue; }
STRBaseAllele :: STRBaseAllele (const RGString& xmlInput) : BaseAllele (xmlInput) { RGString Input (xmlInput); RGBracketStringSearch BPToken ("<BP>", "</BP>", Input); RGString bpString; size_t EndPosition; if (!BPToken.FindNextBracketedString (0, EndPosition, bpString)) { BP = -1; Valid = FALSE; Msg << "Allele invalid...could not find BP string for allele named " << AlleleName << "\n"; // cout << "Allele invalid...could not find BP string...name: " << AlleleName << endl; } else BP = bpString.ConvertToInteger (); }
STRBaseLocus :: STRBaseLocus (const RGString& xmlInput) : BaseLocus (xmlInput), LowerBoundGridLSIndex (-1.0), UpperBoundGridLSIndex (-1.0), MinimumGridTime (-1.0), MaximumGridTime (-1.0), LowerBoundGridLSBasePair (-1.0), UpperBoundGridLSBasePair (-1.0), mNoExtension (false) { mSampleLocusSpecificStutterThreshold = Locus::GetSampleStutterThreshold (); mSampleLocusSpecificPlusStutterThreshold = Locus::GetSamplePlusStutterThreshold (); mSampleLocusSpecificAdenylationThreshold = Locus::GetSampleAdenylationThreshold (); mSampleLocusSpecificFractionalFilter = Locus::GetSampleFractionalFilter (); mSampleLocusSpecificPullupFractionalFilter = Locus::GetSamplePullupFractionalFilter (); mSampleLocusSpecificHeterozygousImbalanceThreshold = Locus::GetHeterozygousImbalanceLimit (); mSampleLocusSpecificMinBoundForHomozygote = Locus::GetMinBoundForHomozygote (); mLadderLocusSpecificStutterThreshold = Locus::GetGridStutterThreshold (); mLadderLocusSpecificAdenylationThreshold = Locus::GetGridAdenylationThreshold (); mLadderLocusSpecificFractionalFilter = Locus::GetGridFractionalFilter (); mLadderLocusSpecificPullupFractionalFilter = Locus::GetGridPullupFractionalFilter (); RGString Input (xmlInput); RGBracketStringSearch MinToken ("<MinBP>", "</MinBP>", Input); RGBracketStringSearch MaxToken ("<MaxBP>", "</MaxBP>", Input); RGBracketStringSearch LowerGridIndex ("<MinGridLSIndex>", "</MinGridLSIndex>", Input); RGBracketStringSearch UpperGridIndex ("<MaxGridLSIndex>", "</MaxGridLSIndex>", Input); RGBracketStringSearch CoreRepeatSearch ("<CoreRepeatNumber>", "</CoreRepeatNumber>", Input); RGBracketStringSearch LowerGridBasePair ("<MinGridLSBasePair>", "</MinGridLSBasePair>", Input); RGBracketStringSearch UpperGridBasePair ("<MaxGridLSBasePair>", "</MaxGridLSBasePair>", Input); RGBracketStringSearch NoExtensionSearch ("<NoExtension>", "</NoExtension>", Input); RGXMLTagSearch yLinkedSearch ("YLinked", Input); RGXMLTagSearch maxExpectedAllelesSearch ("MaxExpectedAlleles", Input); RGXMLTagSearch minExpectedAllelesSearch ("MinExpectedAlleles", Input); size_t EndPosition; RGString BPString; RGString extString; bool lowerBoundFound = false; bool upperBoundFound = false; RGString trueString ("true"); RGString familyName = BaseLocus::GetILSFamilyName (); // cout << "Current Family Name = " << familyName.GetData () << endl; // This was a test to make sure the family name is known at this point. Test succeeded 12/31/2015. bool useILSFamilies = PopulationCollection::UseILSFamiliesInLadderFile (); // if true, use families; otherwise, use the old way. bool isValid = true; size_t StartPosition = 0; if (NoExtensionSearch.FindNextBracketedString (0, EndPosition, extString)) { if (extString.FindNextSubstringCaseIndependent (0, trueString, EndPosition)) mNoExtension = true; else mNoExtension = false; } if (!MinToken.FindNextBracketedString (0, EndPosition, BPString)) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Minimum Base Pair token\n"; MinimumBP = -1; } else MinimumBP = BPString.ConvertToInteger (); if (!MaxToken.FindNextBracketedString (0, EndPosition, BPString)) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Maximum Base Pair token\n"; MaximumBP = -1; } else MaximumBP = BPString.ConvertToInteger (); if (LowerGridIndex.FindNextBracketedString (0, EndPosition, BPString)) { LowerBoundGridLSIndex = BPString.ConvertToDouble (); lowerBoundFound = true; } if (UpperGridIndex.FindNextBracketedString (0, EndPosition, BPString)) { UpperBoundGridLSIndex = BPString.ConvertToDouble (); upperBoundFound = true; } // test for ladder with ILS Family names here and, if found, use family name instead of the following four if's ******* if (useILSFamilies) { isValid = GetLadderSearchRegion (StartPosition, Input, familyName); if (!isValid) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find family-based search region\n"; } } else { if (LowerGridBasePair.FindNextBracketedString (0, EndPosition, BPString)) { LowerBoundGridLSBasePair = BPString.ConvertToDouble (); lowerBoundFound = true; } if (UpperGridBasePair.FindNextBracketedString (0, EndPosition, BPString)) { UpperBoundGridLSBasePair = BPString.ConvertToDouble (); upperBoundFound = true; } if (!lowerBoundFound) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Minimum Grid Internal Lane Standard token, either index or base pair\n"; } if (!upperBoundFound) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Maximum Grid Internal Lane Standard token, either index or base pair\n"; } } if (!CoreRepeatSearch.FindNextBracketedString (0, EndPosition, BPString)) CoreRepeatNumber = 4; else CoreRepeatNumber = BPString.ConvertToInteger (); if (!yLinkedSearch.FindNextTag (0, EndPosition, BPString)) mIsYLinked = false; else { if (BPString == "false") mIsYLinked = false; else mIsYLinked = true; } if (!maxExpectedAllelesSearch.FindNextTag (0, EndPosition, BPString)) mMaxExpectedAlleles = 2; else mMaxExpectedAlleles = BPString.ConvertToInteger (); if (!minExpectedAllelesSearch.FindNextTag (0, EndPosition, BPString)) mMinExpectedAlleles = 1; else mMinExpectedAlleles = BPString.ConvertToInteger (); if (!Valid) { Msg << "XML Input:\n" << Input << "\n"; } }
STRBaseLocus :: STRBaseLocus (const RGString& xmlInput) : BaseLocus (xmlInput), LowerBoundGridLSIndex (-1.0), UpperBoundGridLSIndex (-1.0), MinimumGridTime (-1.0), MaximumGridTime (-1.0), LowerBoundGridLSBasePair (-1.0), UpperBoundGridLSBasePair (-1.0) { mSampleLocusSpecificStutterThreshold = Locus::GetSampleStutterThreshold (); mSampleLocusSpecificPlusStutterThreshold = Locus::GetSamplePlusStutterThreshold (); mSampleLocusSpecificAdenylationThreshold = Locus::GetSampleAdenylationThreshold (); mSampleLocusSpecificFractionalFilter = Locus::GetSampleFractionalFilter (); mSampleLocusSpecificPullupFractionalFilter = Locus::GetSamplePullupFractionalFilter (); mSampleLocusSpecificHeterozygousImbalanceThreshold = Locus::GetHeterozygousImbalanceLimit (); mSampleLocusSpecificMinBoundForHomozygote = Locus::GetMinBoundForHomozygote (); mLadderLocusSpecificStutterThreshold = Locus::GetGridStutterThreshold (); mLadderLocusSpecificAdenylationThreshold = Locus::GetGridAdenylationThreshold (); mLadderLocusSpecificFractionalFilter = Locus::GetGridFractionalFilter (); mLadderLocusSpecificPullupFractionalFilter = Locus::GetGridPullupFractionalFilter (); RGString Input (xmlInput); RGBracketStringSearch MinToken ("<MinBP>", "</MinBP>", Input); RGBracketStringSearch MaxToken ("<MaxBP>", "</MaxBP>", Input); RGBracketStringSearch LowerGridIndex ("<MinGridLSIndex>", "</MinGridLSIndex>", Input); RGBracketStringSearch UpperGridIndex ("<MaxGridLSIndex>", "</MaxGridLSIndex>", Input); RGBracketStringSearch CoreRepeatSearch ("<CoreRepeatNumber>", "</CoreRepeatNumber>", Input); RGBracketStringSearch LowerGridBasePair ("<MinGridLSBasePair>", "</MinGridLSBasePair>", Input); RGBracketStringSearch UpperGridBasePair ("<MaxGridLSBasePair>", "</MaxGridLSBasePair>", Input); RGXMLTagSearch yLinkedSearch ("YLinked", Input); RGXMLTagSearch maxExpectedAllelesSearch ("MaxExpectedAlleles", Input); RGXMLTagSearch minExpectedAllelesSearch ("MinExpectedAlleles", Input); size_t EndPosition; RGString BPString; bool lowerBoundFound = false; bool upperBoundFound = false; if (!MinToken.FindNextBracketedString (0, EndPosition, BPString)) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Minimum Base Pair token\n"; MinimumBP = -1; } else MinimumBP = BPString.ConvertToInteger (); if (!MaxToken.FindNextBracketedString (0, EndPosition, BPString)) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Maximum Base Pair token\n"; MaximumBP = -1; } else MaximumBP = BPString.ConvertToInteger (); if (LowerGridIndex.FindNextBracketedString (0, EndPosition, BPString)) { LowerBoundGridLSIndex = BPString.ConvertToDouble (); lowerBoundFound = true; } if (UpperGridIndex.FindNextBracketedString (0, EndPosition, BPString)) { UpperBoundGridLSIndex = BPString.ConvertToDouble (); upperBoundFound = true; } if (LowerGridBasePair.FindNextBracketedString (0, EndPosition, BPString)) { LowerBoundGridLSBasePair = BPString.ConvertToDouble (); lowerBoundFound = true; } if (UpperGridBasePair.FindNextBracketedString (0, EndPosition, BPString)) { UpperBoundGridLSBasePair = BPString.ConvertToDouble (); upperBoundFound = true; } if (!lowerBoundFound) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Minimum Grid Internal Lane Standard token, either index or base pair\n"; } if (!upperBoundFound) { Valid = FALSE; Msg << "Locus named " << LocusName << " could not find Maximum Grid Internal Lane Standard token, either index or base pair\n"; } if (!CoreRepeatSearch.FindNextBracketedString (0, EndPosition, BPString)) CoreRepeatNumber = 4; else CoreRepeatNumber = BPString.ConvertToInteger (); if (!yLinkedSearch.FindNextTag (0, EndPosition, BPString)) mIsYLinked = false; else { if (BPString == "false") mIsYLinked = false; else mIsYLinked = true; } if (!maxExpectedAllelesSearch.FindNextTag (0, EndPosition, BPString)) mMaxExpectedAlleles = 2; else mMaxExpectedAlleles = BPString.ConvertToInteger (); if (!minExpectedAllelesSearch.FindNextTag (0, EndPosition, BPString)) mMinExpectedAlleles = 1; else mMinExpectedAlleles = BPString.ConvertToInteger (); if (!Valid) { Msg << "XML Input:\n" << Input << "\n"; } }
int Ladder :: AmendLadderData (LadderInputFile* inFile, RGString& oldLadderString) { RGString newLadderString; // Parse oldLadder data into pieces for individual edits RGString locusString; RGString* newLocusString; RGDList locusStrings; size_t startPos = 0; size_t endPos; RGXMLTagSearch locusSearch ("Locus", oldLadderString); RGXMLTagSearch searchRegionsSearch ("SearchRegions", oldLadderString); RGString* ilsName = (RGString*)inFile->GetILSNameList ().First (); endPos = 0; oldLadderString.FindNextSubstring (0, "\t\t\t<Locus>", endPos); RGString insertBase; insertBase << "\t\t\t\t\t<ILSName>" << ilsName->GetData () << "</ILSName>\n"; insertBase << "\t\t\t\t</LSBases>"; RGString leadString = oldLadderString.ExtractSubstring (0, endPos - 1); //cout << "Lead string = \n" << leadString.GetData () << endl; endPos = 0; leadString.FindAndReplaceNextSubstring ("\t\t\t\t</LSBases>", insertBase, endPos); //cout << "Lead string = \n" << leadString.GetData () << endl; newLadderString << leadString; startPos = 0; while (locusSearch.FindNextTag (startPos, endPos, locusString)) { newLocusString = new RGString (locusString); locusStrings.Append (newLocusString); startPos = endPos; } if (mLocusList.size () != locusStrings.Entries ()) { cout << "Number of loci in bins file does not match number of loci in ladder file" << endl; return -152; } Locus* nextLocus; RGString locusInsert; RGString currentLocusString; RGString nameString; RGXMLTagSearch locusNameSearch ("Name", currentLocusString); RGXMLTagSearch coreRepeatSearch ("CoreRepeatNumber", currentLocusString); double minSearch; double maxSearch; RGString repeatString; int repeatNumber; while (locusStrings.Entries () > 0) { newLocusString = (RGString*) locusStrings.GetFirst (); currentLocusString = *newLocusString; locusNameSearch.ResetSearch (); coreRepeatSearch.ResetSearch (); locusNameSearch.FindNextTag (0, endPos, nameString); nextLocus = FindLocusByName (nameString); if (nextLocus == NULL) { cout << "Could not find locus named " << nameString.GetData () << ". Exiting..." << endl; return -155; } if (!coreRepeatSearch.FindNextTag (0, endPos, repeatString)) repeatNumber = 4; else repeatNumber = repeatString.ConvertToInteger (); locusInsert = ""; minSearch = nextLocus->GetMinSearchILSBP () - repeatNumber + 1; maxSearch = nextLocus->GetMaxSearchILSBP () + repeatNumber -1; locusInsert << "\t\t\t\t\t<Region>\n"; locusInsert << "\t\t\t\t\t\t<ILSName>" << ilsName->GetData () << "</ILSName>\n"; locusInsert << "\t\t\t\t\t\t<MinGrid>" << 0.01 * floor (100.0 * minSearch + 0.5) << "</MinGrid>\n"; locusInsert << "\t\t\t\t\t\t<MaxGrid>" << 0.01 * floor (100.0 * maxSearch + 0.5) << "</MaxGrid>\n"; locusInsert << "\t\t\t\t\t</Region>\n"; locusInsert << "\t\t\t\t</SearchRegions>"; endPos = 0; currentLocusString.FindAndReplaceNextSubstring ("\t\t\t\t</SearchRegions>", locusInsert, endPos); newLadderString << "\t\t\t<Locus>" << currentLocusString << "</Locus>\n"; delete newLocusString; } newLadderString << "\t\t</Set>\n"; newLadderString << "\t</Kits>\n"; newLadderString << "</KitData>\n"; RGString ladderPath = inFile->GetOutputConfigDirectoryPath () + "/" + inFile->GetLadderFileName (); RGTextOutput ladderOutput (ladderPath, FALSE); if (!ladderOutput.FileIsValid ()) { cout << "Could not open ladder output file: " << ladderPath.GetData () << endl; return -161; } ladderOutput << newLadderString; cout << "Ladder update completed successfully..." << endl; return 0; }