Exemple #1
0
bool constructSafeLabels(const NxsTaxaBlock & tb, std::vector<std::string> * vs, std::set<std::string> & safeLabelSet)
{
    if (vs == 0L)
        return false;
    const unsigned numTaxa = tb.GetNTaxTotal();
    bool changed = false;
    for (unsigned i = 0; i < numTaxa; ++i)
    {
        NxsString oldName = tb.GetTaxonLabel(i);
        unsigned oldOffset = 0;
        unsigned newOffset = 0;
        std::string prefix;
        for (; newOffset < 10 && oldOffset < oldName.length(); ++oldOffset)
        {
            char c = oldName[oldOffset];
            if ((isgraph(c)) && (strchr("!@#$%^&*.?|()[]{}/\\,;:=*\'\"`-+<>", c) == NULL))
            {
                if (newOffset == 0 && isdigit(c))
                {
                    prefix.append(1, 't');
                    newOffset += 1;
                }
                prefix.append(1, toupper(c));
                newOffset += 1;
            }
        }
        std::string newLabel = assignUniqueName(prefix, 10, safeLabelSet);
        safeLabelSet.insert(newLabel);
        vs->push_back(newLabel);
        if (newLabel != oldName)
            changed = true;

    }
    return changed;
}
/**
 * Build a Nexus version of the block's data.
 */
NxsTaxaBlock NxsNexmlTaxa::getNexusBlock()const{
   NxsTaxaBlock ret = NxsTaxaBlock();
   for ( unsigned i = 0; i < this->GetNTaxTotal(); ++i ){
      ret.AddTaxonLabel( this->GetTaxonLabel( i ) );
   }
   return ret;
}
Exemple #3
0
NxsTaxaBlock *NxsTaxaBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
	{
	if (reader == NULL || idneeded != "TAXA")
		return NULL;
	NxsTaxaBlock * nb = new NxsTaxaBlock();
	nb->SetImplementsLinkAPI(false);
	return nb;
	}
NxsNexmlTaxa::NxsNexmlTaxa( NxsTaxaBlock nTax ){
  vector< Otu* > otus = vector< Otu* >();
  for ( unsigned i = 0; i < nTax.GetNTaxTotal(); ++i ){
     otus.push_back( new Otu( Glib::ustring( nTax.GetTaxonLabel( i ).c_str() ) ) );
  }
  this->otus_ = new Otus( otus );
  this->block_name_ = "Taxa Block";
  //<------data-model-------------------------------->
  this->inactive_taxa_ = set< unsigned int >();
  this->active_taxa_ = set< unsigned int >();
  this->partitions_ = map< string, NxsPartition >();
  this->index_sets_ = map< string, NxsUnsignedSet >();

}
Exemple #5
0
bool substituteSafeTaxaLabels(PublicNexusReader &nexusReader, std::string filepath, bool evenTrivial)
{
    std::ostream * outStrPtr = 0L;
    std::ofstream outStream;
    const unsigned nTaxaBlocks = nexusReader.GetNumTaxaBlocks();
    bool someChanged = false;
    std::set<std::string> safeNameSet;
    try
    {
        for (unsigned t = 0; t < nTaxaBlocks; ++t)
        {
            NxsTaxaBlock * tb = nexusReader.GetTaxaBlock(t);
            std::vector<std::string> safeLabelsVec;

            bool changed = constructSafeLabels(*tb, &safeLabelsVec, safeNameSet);
            someChanged = someChanged | changed;
            if (changed || evenTrivial)
            {
                if (outStrPtr == 0L)
                {
                    outStream.open(filepath.c_str());
                    if (!outStream.good())
                    {
                        NxsString eMessage;
                        eMessage << "Could not open the file \"" << filepath << "\"";
                        throw NxsException(eMessage);
                    }
                    outStrPtr = &outStream;
                    outStream << "#NEXUS\n";
                }
                tb->WriteAsNexus(*outStrPtr);
                std::vector<NxsString> oldNameVec;
                const unsigned numTaxa = tb->GetNTaxTotal();
                for (unsigned i = 0; i < numTaxa; ++i)
                {
                    NxsString oldName = tb->GetTaxonLabel(i);
                    oldNameVec.push_back(oldName);
                    NxsString newName;
                    newName << safeLabelsVec[i];
                    tb->ChangeTaxonLabel(i, newName);
                }
                NxsString title;
                title << tb->GetTitle();
                NxsString nt;
                nt << "Renamed " << title;
                tb->SetTitle(nt, false);
                tb->WriteAsNexus(*outStrPtr);

                // We can't just create a TaxaAssociationBlock instance here,
                //  and call WriteAsNexus because it first and second taxa block pointers
                //  would refer to the same instance...
                NxsString taTitle;
                taTitle << "Renaming " << title;
                *outStrPtr << "BEGIN TaxaAssociation;\n  Title " << NxsString::GetEscaped(taTitle) << " ;\n";
                *outStrPtr << "  Taxa " << NxsString::GetEscaped(title) << " , " << NxsString::GetEscaped(nt) << " ;\n";
                *outStrPtr << "  Associates ";
                for (unsigned i = 0; i < numTaxa; ++i)
                {
                    if (i > 0)
                        *outStrPtr << " ,\n    ";
                    *outStrPtr <<  NxsString::GetEscaped(oldNameVec[i]) << " / " << NxsString::GetEscaped(safeLabelsVec[i]);
                }
                *outStrPtr << " ;\nEND;\n" ;
            }
        }
    }
    catch (...)
    {
        if (outStrPtr != 0L)
            outStream.close();
        throw;
    }

    if (outStrPtr != 0L)
        outStream.close();
    return someChanged;
}
Exemple #6
0
void DataMatrix::CreateMatrixFromNCL(GarliReader &reader){
	
	NxsCharactersBlock *charblock = reader.GetCharactersBlock();
//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	if(modSpec.IsAminoAcid() == false){
		for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
			if(charblock->IsActiveTaxon(origTaxIndex)){
				SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str());
				
				int j = 0;
				for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
					if(charblock->IsActiveChar(origIndex)){	
						unsigned char datum = '\0';
						if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15;
						else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15;
						else{
							int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
							for(int s=0;s<nstates;s++){
								datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
								}
							}
						SetMatrix( i, j++, datum );
						}
					}
				i++;
				}
			}
		}
	else{
		for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
			if(charblock->IsActiveTaxon(origTaxIndex)){
				SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str());
				
				int j = 0;
				for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
					if(charblock->IsActiveChar(origIndex)){	
						unsigned char datum = '\0';
						if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20;
						else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20;
						else{
							int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
							assert(nstates == 1);
							datum = CharToAminoAcidNumber(charblock->GetState(origTaxIndex, origIndex, 0));
/*							for(int s=0;s<nstates;s++){
								datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
								}
*/							}
						SetMatrix( i, j++, datum );
						}
					}
				i++;
				}
			}
		}
	}
Exemple #7
0
void AminoacidData::CreateMatrixFromNCL(GarliReader &reader){
	NxsCharactersBlock *charblock;
	int num=0, numNuc = -1;
	do{
		charblock = reader.GetCharactersBlock(num);
		if(charblock->GetDataType() == NxsCharactersBlock::protein){
			if(numNuc < 0) numNuc = num;
			else{
				throw ErrorException("Multiple characters/data blocks containing protein data found in Nexus datafile!\n\tEither combine the blocks or comment one out.");
				}
			}
		else outman.UserMessage("Ignoring non-protein characters block from Nexus datafile");
		num++;
		}while(num < reader.NumCharBlocks());
	if(numNuc < 0) throw ErrorException("No characters/data blocks containing protein data found in Nexus datafile!");
	charblock = reader.GetCharactersBlock(numNuc);

	if(charblock->GetNumActiveChar() < charblock->GetNChar()){
		outman.UserMessageNoCR("Excluded characters:\n\t");
		for(int c=0;c<charblock->GetNCharTotal();c++)
			if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1);
		outman.UserMessage("");
		}

//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
		if(charblock->IsActiveTaxon(origTaxIndex)){
			//internally, blanks in taxon names will be stored as underscores
			NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex);
			tlabel.BlanksToUnderscores();
			SetTaxonLabel( i, tlabel.c_str());
			
			int j = 0;
			bool firstAmbig = true;
			for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
				if(charblock->IsActiveChar(origIndex)){	
					unsigned char datum = '\0';
					if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20;
					else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20;
					else{
						int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
						//assert(nstates == 1);
						//need to deal with the possibility of multiple states represented in matrix
						//just convert to full ambiguity
						if(nstates == 1)
							datum = CharToDatum(charblock->GetState(origTaxIndex, origIndex, 0));
						else{
							if(firstAmbig){
								outman.UserMessageNoCR("Partially ambiguous characters of taxon %s converted to full ambiguity:\n\t", TaxonLabel(origTaxIndex));
								firstAmbig = false;
								}
							outman.UserMessageNoCR("%d ", origIndex+1);
							datum = CharToDatum('?');
							}
						}
					SetMatrix( i, j++, datum );
					}
				}
			if(firstAmbig == false) outman.UserMessage("");
			i++;
			}
		}
	}
Exemple #8
0
void NucleotideData::CreateMatrixFromNCL(GarliReader &reader){
	NxsCharactersBlock *charblock;

	int num=0, numNuc = -1;
	do{
		charblock = reader.GetCharactersBlock(num);
		if(charblock->GetDataType() == NxsCharactersBlock::nucleotide ||
			charblock->GetDataType() == NxsCharactersBlock::dna ||
			charblock->GetDataType() == NxsCharactersBlock::rna){
			if(numNuc < 0) numNuc = num;
			else{
				throw ErrorException("Multiple characters/data blocks containing nucleotide data found in Nexus datafile!\n\tEither combine the blocks or comment one out.");
				}
			}
		else outman.UserMessage("Ignoring non-nucleotide characters block from Nexus datafile");
		num++;
		}while(num < reader.NumCharBlocks());
	if(numNuc < 0) throw ErrorException("No characters/data blocks containing nucleotide data found in Nexus datafile!");
	charblock = reader.GetCharactersBlock(numNuc);

	if(charblock->GetNumActiveChar() < charblock->GetNChar()){
		outman.UserMessageNoCR("Excluded characters:\n\t");
		for(int c=0;c<charblock->GetNCharTotal();c++)
			if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1);
		outman.UserMessage("");
		}

//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
		if(charblock->IsActiveTaxon(origTaxIndex)){
			//internally, blanks in taxon names will be stored as underscores
			NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex);
			tlabel.BlanksToUnderscores();
			SetTaxonLabel( i, tlabel.c_str());
			
			int j = 0;
			for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
				if(charblock->IsActiveChar(origIndex)){	
					unsigned char datum = '\0';
					if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15;
					else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15;
					else{
						int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
						for(int s=0;s<nstates;s++){
							datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
							}
						}
					SetMatrix( i, j++, datum );
					}
				}
			i++;
			}
		}
	}