bool constructSafeLabels(const NxsTaxaBlock & tb, std::vector<std::string> * vs, std::set<std::string> & safeLabelSet) { if (vs == 0L) return false; const unsigned numTaxa = tb.GetNTaxTotal(); bool changed = false; for (unsigned i = 0; i < numTaxa; ++i) { NxsString oldName = tb.GetTaxonLabel(i); unsigned oldOffset = 0; unsigned newOffset = 0; std::string prefix; for (; newOffset < 10 && oldOffset < oldName.length(); ++oldOffset) { char c = oldName[oldOffset]; if ((isgraph(c)) && (strchr("!@#$%^&*.?|()[]{}/\\,;:=*\'\"`-+<>", c) == NULL)) { if (newOffset == 0 && isdigit(c)) { prefix.append(1, 't'); newOffset += 1; } prefix.append(1, toupper(c)); newOffset += 1; } } std::string newLabel = assignUniqueName(prefix, 10, safeLabelSet); safeLabelSet.insert(newLabel); vs->push_back(newLabel); if (newLabel != oldName) changed = true; } return changed; }
/** * Build a Nexus version of the block's data. */ NxsTaxaBlock NxsNexmlTaxa::getNexusBlock()const{ NxsTaxaBlock ret = NxsTaxaBlock(); for ( unsigned i = 0; i < this->GetNTaxTotal(); ++i ){ ret.AddTaxonLabel( this->GetTaxonLabel( i ) ); } return ret; }
NxsTaxaBlock *NxsTaxaBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *) { if (reader == NULL || idneeded != "TAXA") return NULL; NxsTaxaBlock * nb = new NxsTaxaBlock(); nb->SetImplementsLinkAPI(false); return nb; }
NxsNexmlTaxa::NxsNexmlTaxa( NxsTaxaBlock nTax ){ vector< Otu* > otus = vector< Otu* >(); for ( unsigned i = 0; i < nTax.GetNTaxTotal(); ++i ){ otus.push_back( new Otu( Glib::ustring( nTax.GetTaxonLabel( i ).c_str() ) ) ); } this->otus_ = new Otus( otus ); this->block_name_ = "Taxa Block"; //<------data-model--------------------------------> this->inactive_taxa_ = set< unsigned int >(); this->active_taxa_ = set< unsigned int >(); this->partitions_ = map< string, NxsPartition >(); this->index_sets_ = map< string, NxsUnsignedSet >(); }
bool substituteSafeTaxaLabels(PublicNexusReader &nexusReader, std::string filepath, bool evenTrivial) { std::ostream * outStrPtr = 0L; std::ofstream outStream; const unsigned nTaxaBlocks = nexusReader.GetNumTaxaBlocks(); bool someChanged = false; std::set<std::string> safeNameSet; try { for (unsigned t = 0; t < nTaxaBlocks; ++t) { NxsTaxaBlock * tb = nexusReader.GetTaxaBlock(t); std::vector<std::string> safeLabelsVec; bool changed = constructSafeLabels(*tb, &safeLabelsVec, safeNameSet); someChanged = someChanged | changed; if (changed || evenTrivial) { if (outStrPtr == 0L) { outStream.open(filepath.c_str()); if (!outStream.good()) { NxsString eMessage; eMessage << "Could not open the file \"" << filepath << "\""; throw NxsException(eMessage); } outStrPtr = &outStream; outStream << "#NEXUS\n"; } tb->WriteAsNexus(*outStrPtr); std::vector<NxsString> oldNameVec; const unsigned numTaxa = tb->GetNTaxTotal(); for (unsigned i = 0; i < numTaxa; ++i) { NxsString oldName = tb->GetTaxonLabel(i); oldNameVec.push_back(oldName); NxsString newName; newName << safeLabelsVec[i]; tb->ChangeTaxonLabel(i, newName); } NxsString title; title << tb->GetTitle(); NxsString nt; nt << "Renamed " << title; tb->SetTitle(nt, false); tb->WriteAsNexus(*outStrPtr); // We can't just create a TaxaAssociationBlock instance here, // and call WriteAsNexus because it first and second taxa block pointers // would refer to the same instance... NxsString taTitle; taTitle << "Renaming " << title; *outStrPtr << "BEGIN TaxaAssociation;\n Title " << NxsString::GetEscaped(taTitle) << " ;\n"; *outStrPtr << " Taxa " << NxsString::GetEscaped(title) << " , " << NxsString::GetEscaped(nt) << " ;\n"; *outStrPtr << " Associates "; for (unsigned i = 0; i < numTaxa; ++i) { if (i > 0) *outStrPtr << " ,\n "; *outStrPtr << NxsString::GetEscaped(oldNameVec[i]) << " / " << NxsString::GetEscaped(safeLabelsVec[i]); } *outStrPtr << " ;\nEND;\n" ; } } } catch (...) { if (outStrPtr != 0L) outStream.close(); throw; } if (outStrPtr != 0L) outStream.close(); return someChanged; }
void DataMatrix::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock = reader.GetCharactersBlock(); // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; if(modSpec.IsAminoAcid() == false){ for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str()); int j = 0; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); for(int s=0;s<nstates;s++){ datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s)); } } SetMatrix( i, j++, datum ); } } i++; } } } else{ for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str()); int j = 0; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); assert(nstates == 1); datum = CharToAminoAcidNumber(charblock->GetState(origTaxIndex, origIndex, 0)); /* for(int s=0;s<nstates;s++){ datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s)); } */ } SetMatrix( i, j++, datum ); } } i++; } } } }
void AminoacidData::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock; int num=0, numNuc = -1; do{ charblock = reader.GetCharactersBlock(num); if(charblock->GetDataType() == NxsCharactersBlock::protein){ if(numNuc < 0) numNuc = num; else{ throw ErrorException("Multiple characters/data blocks containing protein data found in Nexus datafile!\n\tEither combine the blocks or comment one out."); } } else outman.UserMessage("Ignoring non-protein characters block from Nexus datafile"); num++; }while(num < reader.NumCharBlocks()); if(numNuc < 0) throw ErrorException("No characters/data blocks containing protein data found in Nexus datafile!"); charblock = reader.GetCharactersBlock(numNuc); if(charblock->GetNumActiveChar() < charblock->GetNChar()){ outman.UserMessageNoCR("Excluded characters:\n\t"); for(int c=0;c<charblock->GetNCharTotal();c++) if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1); outman.UserMessage(""); } // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ //internally, blanks in taxon names will be stored as underscores NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex); tlabel.BlanksToUnderscores(); SetTaxonLabel( i, tlabel.c_str()); int j = 0; bool firstAmbig = true; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); //assert(nstates == 1); //need to deal with the possibility of multiple states represented in matrix //just convert to full ambiguity if(nstates == 1) datum = CharToDatum(charblock->GetState(origTaxIndex, origIndex, 0)); else{ if(firstAmbig){ outman.UserMessageNoCR("Partially ambiguous characters of taxon %s converted to full ambiguity:\n\t", TaxonLabel(origTaxIndex)); firstAmbig = false; } outman.UserMessageNoCR("%d ", origIndex+1); datum = CharToDatum('?'); } } SetMatrix( i, j++, datum ); } } if(firstAmbig == false) outman.UserMessage(""); i++; } } }
void NucleotideData::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock; int num=0, numNuc = -1; do{ charblock = reader.GetCharactersBlock(num); if(charblock->GetDataType() == NxsCharactersBlock::nucleotide || charblock->GetDataType() == NxsCharactersBlock::dna || charblock->GetDataType() == NxsCharactersBlock::rna){ if(numNuc < 0) numNuc = num; else{ throw ErrorException("Multiple characters/data blocks containing nucleotide data found in Nexus datafile!\n\tEither combine the blocks or comment one out."); } } else outman.UserMessage("Ignoring non-nucleotide characters block from Nexus datafile"); num++; }while(num < reader.NumCharBlocks()); if(numNuc < 0) throw ErrorException("No characters/data blocks containing nucleotide data found in Nexus datafile!"); charblock = reader.GetCharactersBlock(numNuc); if(charblock->GetNumActiveChar() < charblock->GetNChar()){ outman.UserMessageNoCR("Excluded characters:\n\t"); for(int c=0;c<charblock->GetNCharTotal();c++) if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1); outman.UserMessage(""); } // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ //internally, blanks in taxon names will be stored as underscores NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex); tlabel.BlanksToUnderscores(); SetTaxonLabel( i, tlabel.c_str()); int j = 0; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); for(int s=0;s<nstates;s++){ datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s)); } } SetMatrix( i, j++, datum ); } } i++; } } }