void writeCharactersBlockToStream( const NxsCharactersBlock & cb, ostream & outf, const std::vector<std::string> & taxaNames, MultiFormatReader::DataFormatType f, long interleaveLen) { const unsigned nt = taxaNames.size(); const unsigned nc = cb.GetNChar(); unsigned nCharsToWrite; unsigned seqStartColumn = 0; if (IsRelaxedPhylipType(f)) { for (unsigned i = 0; i < nt; ++i) { const std::string & name = taxaNames[i]; if (name.length() > seqStartColumn) seqStartColumn = name.length(); } seqStartColumn += 1; } if (IsPhylipType(f) || IsRelaxedPhylipType(f)) { std::string sep; outf << nt << ' ' << nc << '\n'; if (IsInterleaveType(f) && interleaveLen > 0) { std::vector<std::string> storedSeqs; storedSeqs.reserve(nt); std::string *sp; nCharsToWrite = (nc > (unsigned) interleaveLen ? (unsigned)interleaveLen : nc); for (unsigned i = 0; i < nt; ++i) { const std::string & name = taxaNames[i]; storedSeqs.push_back(cb.GetMatrixRowAsStr(i)); sp = &(storedSeqs[i]); if (IsRelaxedPhylipType(f)) { sep.clear(); sep.append(seqStartColumn - name.length(), ' '); } outf << name << sep << sp->substr(0, nCharsToWrite) << '\n'; } for (unsigned currIndex = (unsigned)interleaveLen; currIndex < nc; currIndex += (unsigned)interleaveLen) { outf << '\n'; nCharsToWrite = ((nc - currIndex) > (unsigned)interleaveLen ? (unsigned)interleaveLen : (nc - currIndex)); for (unsigned i = 0; i < nt; ++i) { sp = &(storedSeqs[i]); outf << sp->substr(currIndex, nCharsToWrite) << '\n'; } } } else { if (interleaveLen > 0) { // not interleaved, but wrapping at interleaveLen for (unsigned i = 0; i < nt; ++i) { nCharsToWrite = (nc > (unsigned)interleaveLen ? (unsigned)interleaveLen : nc); const std::string & name = taxaNames[i]; std::string seq = cb.GetMatrixRowAsStr(i); if (IsRelaxedPhylipType(f)) { sep.clear(); sep.append(seqStartColumn - name.length(), ' '); } outf << name << sep << seq.substr(0, nCharsToWrite) << '\n'; for (unsigned currIndex = (unsigned)interleaveLen; currIndex < nc; currIndex += (unsigned)interleaveLen) { nCharsToWrite = ((nc - currIndex) > (unsigned)interleaveLen ? (unsigned)interleaveLen : (nc - currIndex)); outf << seq.substr(currIndex, nCharsToWrite) << '\n'; } } } else { // not interleaved, and not wrapping for (unsigned i = 0; i < nt; ++i) { const std::string & name = taxaNames[i]; std::string seq = cb.GetMatrixRowAsStr(i); if (IsRelaxedPhylipType(f)) { sep.clear(); sep.append(seqStartColumn - name.length(), ' '); } outf << name << sep << seq << '\n'; } } } } else if (IsFastaType(f)) { if (interleaveLen < 1) interleaveLen = 60; // default FASTA line length for (unsigned i = 0; i < nt; ++i) { nCharsToWrite = (nc > (unsigned)interleaveLen ? (unsigned)interleaveLen : nc); const std::string & name = taxaNames[i]; std::string seq = cb.GetMatrixRowAsStr(i); outf << '>' << name << '\n' << seq.substr(0, nCharsToWrite) << '\n'; for (unsigned currIndex = (unsigned)interleaveLen; currIndex < nc; currIndex += (unsigned)interleaveLen) { nCharsToWrite = ((nc - currIndex) > (unsigned)interleaveLen ? (unsigned)interleaveLen : (nc - currIndex)); outf << seq.substr(currIndex, nCharsToWrite) << '\n'; } } } else { throw NxsException("writeCharactersBlockToStream requested for unsupported format"); } }
void AminoacidData::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock; int num=0, numNuc = -1; do{ charblock = reader.GetCharactersBlock(num); if(charblock->GetDataType() == NxsCharactersBlock::protein){ if(numNuc < 0) numNuc = num; else{ throw ErrorException("Multiple characters/data blocks containing protein data found in Nexus datafile!\n\tEither combine the blocks or comment one out."); } } else outman.UserMessage("Ignoring non-protein characters block from Nexus datafile"); num++; }while(num < reader.NumCharBlocks()); if(numNuc < 0) throw ErrorException("No characters/data blocks containing protein data found in Nexus datafile!"); charblock = reader.GetCharactersBlock(numNuc); if(charblock->GetNumActiveChar() < charblock->GetNChar()){ outman.UserMessageNoCR("Excluded characters:\n\t"); for(int c=0;c<charblock->GetNCharTotal();c++) if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1); outman.UserMessage(""); } // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ //internally, blanks in taxon names will be stored as underscores NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex); tlabel.BlanksToUnderscores(); SetTaxonLabel( i, tlabel.c_str()); int j = 0; bool firstAmbig = true; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); //assert(nstates == 1); //need to deal with the possibility of multiple states represented in matrix //just convert to full ambiguity if(nstates == 1) datum = CharToDatum(charblock->GetState(origTaxIndex, origIndex, 0)); else{ if(firstAmbig){ outman.UserMessageNoCR("Partially ambiguous characters of taxon %s converted to full ambiguity:\n\t", TaxonLabel(origTaxIndex)); firstAmbig = false; } outman.UserMessageNoCR("%d ", origIndex+1); datum = CharToDatum('?'); } } SetMatrix( i, j++, datum ); } } if(firstAmbig == false) outman.UserMessage(""); i++; } } }
void DataMatrix::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock = reader.GetCharactersBlock(); // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; if(modSpec.IsAminoAcid() == false){ for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str()); int j = 0; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); for(int s=0;s<nstates;s++){ datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s)); } } SetMatrix( i, j++, datum ); } } i++; } } } else{ for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str()); int j = 0; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); assert(nstates == 1); datum = CharToAminoAcidNumber(charblock->GetState(origTaxIndex, origIndex, 0)); /* for(int s=0;s<nstates;s++){ datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s)); } */ } SetMatrix( i, j++, datum ); } } i++; } } } }
void NucleotideData::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock; int num=0, numNuc = -1; do{ charblock = reader.GetCharactersBlock(num); if(charblock->GetDataType() == NxsCharactersBlock::nucleotide || charblock->GetDataType() == NxsCharactersBlock::dna || charblock->GetDataType() == NxsCharactersBlock::rna){ if(numNuc < 0) numNuc = num; else{ throw ErrorException("Multiple characters/data blocks containing nucleotide data found in Nexus datafile!\n\tEither combine the blocks or comment one out."); } } else outman.UserMessage("Ignoring non-nucleotide characters block from Nexus datafile"); num++; }while(num < reader.NumCharBlocks()); if(numNuc < 0) throw ErrorException("No characters/data blocks containing nucleotide data found in Nexus datafile!"); charblock = reader.GetCharactersBlock(numNuc); if(charblock->GetNumActiveChar() < charblock->GetNChar()){ outman.UserMessageNoCR("Excluded characters:\n\t"); for(int c=0;c<charblock->GetNCharTotal();c++) if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1); outman.UserMessage(""); } // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ //internally, blanks in taxon names will be stored as underscores NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex); tlabel.BlanksToUnderscores(); SetTaxonLabel( i, tlabel.c_str()); int j = 0; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); for(int s=0;s<nstates;s++){ datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s)); } } SetMatrix( i, j++, datum ); } } i++; } } }