bool Locus::operator>( const Locus& rhs ) const { int chrComp = chr_.compare(rhs.chr()); bool result = ( chrComp > 0 ) || ( chrComp == 0 && (this->decimalPos() > rhs.decimalPos()) ); return result; }
void write_plink_file_gz(const std::vector<Individual> &individuals, const std::vector<Locus> &loci, bool print_header, std::string output_path, std::auto_ptr<DoseWriter> writer) { gz::ogzstream output_file( output_path.c_str( ) ); output_file << std::fixed << std::setprecision( 3 ); /* Write header. */ if( print_header ) { output_file << "SNP\tA1\tA2"; for(unsigned int i = 0; i < individuals.size( ); i++) { output_file << "\t" << individuals[ i ].get_fid( ) << " " << individuals[ i ].get_iid( ); } output_file << std::endl; } /* Write doses */ for(unsigned int i = 0; i < loci.size( ); i++) { Locus locus = loci[ i ]; output_file << locus.get_name( ) << "\t" << locus.get_a1( ) << "\t" << locus.get_a2( ); for(unsigned int j = 0; j < individuals.size( ); j++) { writer->write( output_file, individuals[ j ], i ); } output_file << std::endl; } }
GenomeTemplate BlanketResolver::resolve( std::vector<Genome*> blanketGenomes, std::vector<bool> & usedGenomes, unsigned int target ) { GenomeTemplate resolved; GenomeTemplate unresolved = blanketGenomes[target]->getTemplate(); for (unsigned int i = 0; i < unresolved.genomeLength(); i++) { Locus * temp = unresolved.getLocus(i); if (!temp->isConstructive()) { resolved.add(unresolved.getGene(i)); } else { resolved.add(BlanketResolver::resolve( blanketGenomes, usedGenomes, BlanketResolver::findMetaComponentIndex( blanketGenomes, usedGenomes, ((PopulationLocus*)temp)->getNode() ) )); } } return resolved; }
void Ladder :: OutputTo (RGTextOutput& xmlFile, LadderInputFile& inputFile) { RGString link ("http://www.w3.org/2001/XMLSchema-instance"); xmlFile << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; xmlFile << "<KitData xmlns:xsi=\"" << link.GetData () << "\" xsi:noNamespaceSchemaLocation=\"MarkerSet.xsd\">\n"; xmlFile << "\t<Version>2.0</Version>\n"; xmlFile << "\t<Kits>\n"; xmlFile << "\t\t<Set>\n"; xmlFile << "\t\t\t<Name>" << mMarkerSetName.GetData () << "</Name>\n"; xmlFile << "\t\t\t<NChannels>" << mNumberOfChannels << "</NChannels>\n"; xmlFile << "\t\t\t<LS>\n"; OutputILSListTo (xmlFile); xmlFile << "\t\t\t\t<ChannelNo>" << mChannelForILS << "</ChannelNo>\n"; xmlFile << "\t\t\t</LS>\n"; xmlFile << "\t\t\t<FileNameSuffix>" << mSuffix << "</FileNameSuffix>\n"; xmlFile << "\t\t\t<GenotypeSuffix>" << mSuffix << "</GenotypeSuffix>\n"; xmlFile << "\t\t\t<DirectorySearchString>" << mSuffix << "</DirectorySearchString>\n"; OutputChannelMapTo (xmlFile, inputFile); list <Locus*>::const_iterator locusIterator; Locus* nextLocus; for (locusIterator = mLocusList.begin(); locusIterator != mLocusList.end(); locusIterator++) { nextLocus = *locusIterator; nextLocus->OutputTo (xmlFile); } xmlFile << "\t\t</Set>\n"; xmlFile << "\t</Kits>\n"; xmlFile << "</KitData>"; }
int Ladder :: MergeThisLadderInto (Ladder* ladder) { list <Locus*>::const_iterator locusIterator; Locus* nextLocus; Locus* prevLocus = NULL; double prevMax; double nextMin; double nextOriginal; double prevOriginal; for (locusIterator = mLocusList.begin(); locusIterator != mLocusList.end(); locusIterator++) { nextLocus = *locusIterator; ladder->MergeLocusIntoLadder (nextLocus); } // To do: test that search regions do not overlap neighboring loci for (locusIterator = mLocusList.begin(); locusIterator != mLocusList.end(); locusIterator++) { nextLocus = *locusIterator; if (prevLocus == NULL) { prevLocus = nextLocus; continue; } if (nextLocus->GetChannel () != prevLocus->GetChannel ()) { prevLocus = nextLocus; continue; } prevMax = prevLocus->GetMaxSearchILSBP (); nextMin = nextLocus->GetMinSearchILSBP (); if (prevMax <= nextMin) { prevLocus = nextLocus; continue; } nextOriginal = nextLocus->GetOriginalMinSearchILSBP () - 0.55; prevOriginal = prevLocus->GetOriginalMaxSearchILSBP () + 0.55; if (prevOriginal > nextOriginal) prevOriginal = nextOriginal = 0.5 * (prevOriginal + nextOriginal); if (prevMax >= nextOriginal) prevLocus->SetMaxSearhILSBP (nextOriginal); if (nextMin <= prevOriginal) nextLocus->SetMinSearhILSBP (prevOriginal); prevLocus = nextLocus; } return 0; }
static void update_dmr_name(Locus &dmr, double lod_sum, double mindiff_sum) { ostringstream name_stream; name_stream << "dmr:" << lod_sum / dmr.score() << ":" << mindiff_sum / dmr.score(); dmr.set_name(name_stream.str()); }
bool Ladder :: TestAllLociMerged () { list <Locus*>::const_iterator locusIterator; Locus* nextLocus; for (locusIterator = mLocusList.begin(); locusIterator != mLocusList.end(); locusIterator++) { nextLocus = *locusIterator; if (!nextLocus->GetMerged ()) return false; } return true; }
Locus* Ladder :: FindLocusByName (const RGString& locusName) { Locus* nextLocus; list <Locus*>::const_iterator locusIterator; for (locusIterator = mLocusList.begin(); locusIterator != mLocusList.end(); locusIterator++) { nextLocus = *locusIterator; if (nextLocus->GetName () == locusName) return nextLocus; } return NULL; }
Gene* GaussianMutation::newLocusValue(Gene* current) { double addend = HierRNG::gaussian(this->mean, this->stdDev); double newIndex = current->getIndex() + addend; Locus* locus = current->getLocus(); if (!this->endReflection) return current->copy(locus->closestIndex(newIndex)); while (locus->outOfRange(newIndex)) { double topIndex = locus->topIndex(); double bottomIndex = locus->bottomIndex(); if (newIndex > topIndex) { newIndex = topIndex - (newIndex - topIndex); } else if (newIndex < bottomIndex) { newIndex = bottomIndex - (newIndex - bottomIndex); } } return current->copy(newIndex); }
int Ladder :: AddLocus (Locus* newLocus) { list <Locus*>::const_iterator locusIterator; Locus* nextLocus; int status = 0; for (locusIterator = mLocusList.begin(); locusIterator != mLocusList.end(); locusIterator++) { nextLocus = *locusIterator; if (nextLocus->isEqual (newLocus)) { status = -1; break; } } if (status == 0) mLocusList.push_back (newLocus); return status; }
Ladder* Bins :: AssembleAllLoci (RGDList& doNotExtend) { int nLoci; Ladder* newLadder = new Ladder (); Locus* nextLocus; nLoci = SplitFileIntoLocusStrings (); int i; int status; RGString name; if (nLoci <= 0) { cout << "Could not complete ladder setup" << endl; delete newLadder; return NULL; } for (i=1; i<=nLoci; i++) { SplitLocusStringIntoLines (i); nextLocus = AssembleLocusFromLineStrings (); status = newLadder->AddLocus (nextLocus); if (status < 0) { cout << "Locus name repeated in bins file: " << nextLocus->GetName ().GetData () << endl; delete newLadder; return NULL; } name = nextLocus->GetName (); if (doNotExtend.Contains (&name)) nextLocus->SetDoNotExtend (); } return newLadder; }
int Ladder :: MergeLocusIntoLadder (const Locus* locus) { // This combines info and then computes the rest Locus* matchingLocus = FindLocusByName (locus->GetName ()); if (matchingLocus == NULL) { cout << "Could not find locus matching name: " << locus->GetName ().GetData () << endl; return -1; } matchingLocus->SetFirstCoreLocusBP (locus->GetFirstCoreLocusBP ()); matchingLocus->SetFirstExtendedAllele (locus->GetFirstExtendedAllele ()); matchingLocus->SetLastExtendedAllele (locus->GetLastExtendedAllele ()); matchingLocus->ComputeAllBPs (); matchingLocus->SetMinMaxSearchILSBP (locus->GetMinSearchILSBP (), locus->GetMaxSearchILSBP ()); matchingLocus->AdjustSearchRegion (); matchingLocus->SetMerged (); return 0; }
int CoreBioComponent :: InitializeSM (SampleData& fileData, PopulationCollection* collection, const RGString& markerSetName, Boolean isGrid) { // // This is ladder and sample stage 1 // mTime = fileData.GetCollectionStartTime (); mDate = fileData.GetCollectionStartDate (); mName = fileData.GetName (); mRunStart = mDate.GetOARString () + mTime.GetOARString (); mMarkerSet = collection->GetNamedPopulationMarkerSet (markerSetName); Progress = 0; smMarkerSetNameUnknown noNamedMarkerSet; smNamedILSUnknown noNamedILS; smChannelIsILS channelIsILS; if (mMarkerSet == NULL) { ErrorString = "*******COULD NOT FIND MARKER SET NAMED "; ErrorString << markerSetName << " IN POPULATION COLLECTION********\n"; SetMessageValue (noNamedMarkerSet, true); AppendDataForSmartMessage (noNamedMarkerSet, markerSetName); return -1; } mLaneStandard = mMarkerSet->GetLaneStandard (); mNumberOfChannels = mMarkerSet->GetNumberOfChannels (); if ((mLaneStandard == NULL) || !mLaneStandard->IsValid ()) { ErrorString = "Could not find named internal lane standard associated with marker set named "; ErrorString << markerSetName << "\n"; cout << "Could not find named internal lane standard associated with marker set named " << (char*)markerSetName.GetData () << endl; SetMessageValue (noNamedILS, true); AppendDataForSmartMessage (noNamedILS, markerSetName); return -1; } mDataChannels = new ChannelData* [mNumberOfChannels + 1]; int i; const int* fsaChannelMap = mMarkerSet->GetChannelMap (); for (i=0; i<=mNumberOfChannels; i++) mDataChannels [i] = NULL; mLaneStandardChannel = mMarkerSet->GetLaneStandardChannel (); for (i=1; i<=mNumberOfChannels; i++) { if (i == mLaneStandardChannel) mDataChannels [i] = GetNewLaneStandardChannel (i, mLaneStandard); else { if (isGrid) mDataChannels [i] = GetNewGridDataChannel (i, mLaneStandard); else mDataChannels [i] = GetNewDataChannel (i, mLaneStandard); } mDataChannels [i]->SetFsaChannel (fsaChannelMap [i]); } mLSData = mDataChannels [mLaneStandardChannel]; mLSData->SetMessageValue (channelIsILS, true); mMarkerSet->ResetLocusList (); Locus* nextLocus; while (nextLocus = mMarkerSet->GetNextLocus ()) mDataChannels [nextLocus->GetLocusChannel ()]->AddLocus (nextLocus); Progress = 1; return 0; }
void CoreBioComponent :: ReportXMLSmartSampleTableRowWithLinks (RGTextOutput& text, RGTextOutput& tempText) { RGString type; if (mIsNegativeControl) type = "-Control"; else if (mIsPositiveControl) type = "+Control"; else type = "Sample"; RGString pResult; RGString SimpleFileName (mName); size_t startPos = 0; size_t endPos; size_t length = SimpleFileName.Length (); if (SimpleFileName.FindLastSubstringCaseIndependent (DirectoryManager::GetDataFileType (), startPos, endPos)) { if (endPos == length - 1) SimpleFileName.ExtractAndRemoveLastCharacters (4); } SimpleFileName.FindAndReplaceAllSubstrings ("\\", "/"); startPos = endPos = 0; if (SimpleFileName.FindLastSubstring ("/", startPos, endPos)) { SimpleFileName.ExtractAndRemoveSubstring (0, startPos); } text << CLevel (1) << "\t\t<Sample>\n"; text << "\t\t\t<Name>" << xmlwriter::EscAscii (SimpleFileName, &pResult) << "</Name>\n"; text << "\t\t\t<SampleName>" << xmlwriter::EscAscii (mSampleName, &pResult) << "</SampleName>\n"; text << "\t\t\t<RunStart>" << mRunStart.GetData () << "</RunStart>\n"; text << "\t\t\t<Type>" << type.GetData () << "</Type>\n" << PLevel (); int trigger = Notice::GetMessageTrigger (); // int channelHighestLevel; // bool channelAlerts = false; int cbcHighestMsgLevel = GetHighestMessageLevelWithRestrictionSM (); RGDListIterator it (*mSmartMessageReporters); SmartMessageReporter* nextNotice; bool includesExportInfo = false; while (nextNotice = (SmartMessageReporter*) it ()) { if (nextNotice->HasViableExportInfo ()) { includesExportInfo = true; break; } } if (((cbcHighestMsgLevel > 0) && (cbcHighestMsgLevel <= trigger)) || includesExportInfo) { // text << CLevel (1) << "\t\t\t<SampleAlerts>\n" << PLevel (); // get message numbers and report ReportXMLSmartNoticeObjects (text, tempText, " "); // text << CLevel (1) << "\t\t\t</SampleAlerts>\n" << PLevel (); } mDataChannels [mLaneStandardChannel]->ReportXMLILSSmartNoticeObjects (text, tempText, " "); int i; //for (i=1; i<=mNumberOfChannels; i++) { // //if (i == mLaneStandardChannel) // // continue; // channelHighestLevel = mDataChannels [i]->GetHighestMessageLevelWithRestrictionSM (); // if ((channelHighestLevel > 0) && (channelHighestLevel <= trigger)) { // channelAlerts = true; // break; // } //} // if (channelAlerts) { text << CLevel (1) << "\t\t\t<ChannelAlerts>\n" << PLevel (); for (i=1; i<=mNumberOfChannels; i++) { if (i == mLaneStandardChannel) continue; mDataChannels [i]->ReportXMLSmartNoticeObjects (text, tempText, " "); } text << CLevel (1) << "\t\t\t</ChannelAlerts>\n" << PLevel (); // } mMarkerSet->ResetLocusList (); Locus* nextLocus; while (nextLocus = mMarkerSet->GetNextLocus ()) { nextLocus->ReportXMLSmartSampleTableRowWithLinks (text, tempText, " "); } if (mIsPositiveControl) text << CLevel (1) << "\t\t\t<PositiveControl>" << mPositiveControlName << "</PositiveControl>\n"; text << CLevel (1) << "\t\t</Sample>\n" << PLevel (); }
Locus* Bins :: AssembleLocusFromLineStrings () { // uses current values in mLocusLineStrings Locus* newLocus = new Locus (); int nLines = mLocusLineStrings.Length (); SplitLocusLine (mLocusLineStrings [0]); // This should give us the name field RGString name = mLocusLineFieldString [1]; cout << "Locus name = " << name.GetData () << endl; //cout << "Bins locus line 1: "; int i; int len = mLocusLineFieldString.Length (); //for (i=0; i<len; i++) { // cout << mLocusLineFieldString [i].GetData (); // if (i < len - 1) // cout << ", "; //} //cout << endl; RGString firstAlleleName; int firstLocusBP; double firstILSBP; double lastILSBP; RGString firstExtendedAlleleName; RGString lastExtendedAlleleName; int lineForFirstAllele = 0; for (i=1; i<nLines; i++) { SplitLocusLine (mLocusLineStrings [i]); if (mLocusLineFieldString.Length () < 4) { cout << "Could not find first allele for locus " << name.GetData () << endl; delete newLocus; return NULL; } if (mLocusLineFieldString.Length () >= 5) { if (mLocusLineFieldString [4] == "virtual") { if (i == 1) { firstExtendedAlleleName = mLocusLineFieldString [0]; } continue; } } firstAlleleName = mLocusLineFieldString [0]; firstILSBP = mLocusLineFieldString [1].ConvertToDouble (); firstLocusBP = (int) floor (firstILSBP + 0.5); lineForFirstAllele = i; break; } if (firstExtendedAlleleName.Length () == 0) firstExtendedAlleleName = firstAlleleName; bool foundLastExtendedAllele = false; for (i=nLines-1; i>lineForFirstAllele; i--) { SplitLocusLine (mLocusLineStrings [i]); if (mLocusLineFieldString.Length () < 4) continue; if (!foundLastExtendedAllele) { lastExtendedAlleleName = mLocusLineFieldString [0]; foundLastExtendedAllele = true; } if ((mLocusLineFieldString.Length () >= 5) && (mLocusLineFieldString [4] == "virtual")) continue; lastILSBP = mLocusLineFieldString [1].ConvertToDouble (); break; } if (lastExtendedAlleleName.Length () == 0) { cout << "Could not locate last extended allele name for locus: " << name.GetData () << endl; delete newLocus; return NULL; } newLocus->SetFirstCoreAllele (firstAlleleName); newLocus->SetFirstExtendedAllele (firstExtendedAlleleName); newLocus->SetLastExtendedAllele (lastExtendedAlleleName); newLocus->SetMinMaxSearchILSBP (firstILSBP, lastILSBP); // These will have to be adjusted later when core repeat number is known newLocus->SetFirstCoreLocusBP (firstLocusBP); newLocus->SetName (name); // Need to add to Locus ability to compute first and last extended bp return newLocus; }
void merge(istream &cpg_stream, ostream &dmr_stream, double cutoff) { bool skipped_last_cpg; Locus dmr; if (!read_next_significant_cpg(cpg_stream, dmr, cutoff, skipped_last_cpg)) return; dmr.set_score(1); double lod_sum, mindiff_sum, unadjusted_pval; extract_scores(dmr.name(), lod_sum, mindiff_sum, unadjusted_pval); if (unadjusted_pval >= cutoff) { lod_sum = 0; mindiff_sum = 0; } Locus cpg; while(read_next_significant_cpg(cpg_stream, cpg, cutoff, skipped_last_cpg)) { double lod, mindiff; extract_scores(cpg.name(), lod, mindiff, unadjusted_pval); if (skipped_last_cpg || cpg.chrom() != dmr.chrom()) { update_dmr_name(dmr, lod_sum, mindiff_sum); if (lod_sum != 0) { dmr_stream << dmr << std::endl; } dmr = cpg; dmr.set_score(1); if (unadjusted_pval >= cutoff) { lod_sum = 0; mindiff_sum = 0; } else { lod_sum = lod; mindiff_sum = mindiff; } } else { dmr.set_end(cpg.end()); dmr.set_score(dmr.score() + 1); if (unadjusted_pval < cutoff) { lod_sum += lod; mindiff_sum += mindiff; } } } update_dmr_name(dmr, lod_sum, mindiff_sum); dmr_stream << dmr << std::endl; }
int Ladder :: AmendLadderData (LadderInputFile* inFile, RGString& oldLadderString) { RGString newLadderString; // Parse oldLadder data into pieces for individual edits RGString locusString; RGString* newLocusString; RGDList locusStrings; size_t startPos = 0; size_t endPos; RGXMLTagSearch locusSearch ("Locus", oldLadderString); RGXMLTagSearch searchRegionsSearch ("SearchRegions", oldLadderString); RGString* ilsName = (RGString*)inFile->GetILSNameList ().First (); endPos = 0; oldLadderString.FindNextSubstring (0, "\t\t\t<Locus>", endPos); RGString insertBase; insertBase << "\t\t\t\t\t<ILSName>" << ilsName->GetData () << "</ILSName>\n"; insertBase << "\t\t\t\t</LSBases>"; RGString leadString = oldLadderString.ExtractSubstring (0, endPos - 1); //cout << "Lead string = \n" << leadString.GetData () << endl; endPos = 0; leadString.FindAndReplaceNextSubstring ("\t\t\t\t</LSBases>", insertBase, endPos); //cout << "Lead string = \n" << leadString.GetData () << endl; newLadderString << leadString; startPos = 0; while (locusSearch.FindNextTag (startPos, endPos, locusString)) { newLocusString = new RGString (locusString); locusStrings.Append (newLocusString); startPos = endPos; } if (mLocusList.size () != locusStrings.Entries ()) { cout << "Number of loci in bins file does not match number of loci in ladder file" << endl; return -152; } Locus* nextLocus; RGString locusInsert; RGString currentLocusString; RGString nameString; RGXMLTagSearch locusNameSearch ("Name", currentLocusString); RGXMLTagSearch coreRepeatSearch ("CoreRepeatNumber", currentLocusString); double minSearch; double maxSearch; RGString repeatString; int repeatNumber; while (locusStrings.Entries () > 0) { newLocusString = (RGString*) locusStrings.GetFirst (); currentLocusString = *newLocusString; locusNameSearch.ResetSearch (); coreRepeatSearch.ResetSearch (); locusNameSearch.FindNextTag (0, endPos, nameString); nextLocus = FindLocusByName (nameString); if (nextLocus == NULL) { cout << "Could not find locus named " << nameString.GetData () << ". Exiting..." << endl; return -155; } if (!coreRepeatSearch.FindNextTag (0, endPos, repeatString)) repeatNumber = 4; else repeatNumber = repeatString.ConvertToInteger (); locusInsert = ""; minSearch = nextLocus->GetMinSearchILSBP () - repeatNumber + 1; maxSearch = nextLocus->GetMaxSearchILSBP () + repeatNumber -1; locusInsert << "\t\t\t\t\t<Region>\n"; locusInsert << "\t\t\t\t\t\t<ILSName>" << ilsName->GetData () << "</ILSName>\n"; locusInsert << "\t\t\t\t\t\t<MinGrid>" << 0.01 * floor (100.0 * minSearch + 0.5) << "</MinGrid>\n"; locusInsert << "\t\t\t\t\t\t<MaxGrid>" << 0.01 * floor (100.0 * maxSearch + 0.5) << "</MaxGrid>\n"; locusInsert << "\t\t\t\t\t</Region>\n"; locusInsert << "\t\t\t\t</SearchRegions>"; endPos = 0; currentLocusString.FindAndReplaceNextSubstring ("\t\t\t\t</SearchRegions>", locusInsert, endPos); newLadderString << "\t\t\t<Locus>" << currentLocusString << "</Locus>\n"; delete newLocusString; } newLadderString << "\t\t</Set>\n"; newLadderString << "\t</Kits>\n"; newLadderString << "</KitData>\n"; RGString ladderPath = inFile->GetOutputConfigDirectoryPath () + "/" + inFile->GetLadderFileName (); RGTextOutput ladderOutput (ladderPath, FALSE); if (!ladderOutput.FileIsValid ()) { cout << "Could not open ladder output file: " << ladderPath.GetData () << endl; return -161; } ladderOutput << newLadderString; cout << "Ladder update completed successfully..." << endl; return 0; }