void AminoacidData::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock; int num=0, numNuc = -1; do{ charblock = reader.GetCharactersBlock(num); if(charblock->GetDataType() == NxsCharactersBlock::protein){ if(numNuc < 0) numNuc = num; else{ throw ErrorException("Multiple characters/data blocks containing protein data found in Nexus datafile!\n\tEither combine the blocks or comment one out."); } } else outman.UserMessage("Ignoring non-protein characters block from Nexus datafile"); num++; }while(num < reader.NumCharBlocks()); if(numNuc < 0) throw ErrorException("No characters/data blocks containing protein data found in Nexus datafile!"); charblock = reader.GetCharactersBlock(numNuc); if(charblock->GetNumActiveChar() < charblock->GetNChar()){ outman.UserMessageNoCR("Excluded characters:\n\t"); for(int c=0;c<charblock->GetNCharTotal();c++) if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1); outman.UserMessage(""); } // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ //internally, blanks in taxon names will be stored as underscores NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex); tlabel.BlanksToUnderscores(); SetTaxonLabel( i, tlabel.c_str()); int j = 0; bool firstAmbig = true; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); //assert(nstates == 1); //need to deal with the possibility of multiple states represented in matrix //just convert to full ambiguity if(nstates == 1) datum = CharToDatum(charblock->GetState(origTaxIndex, origIndex, 0)); else{ if(firstAmbig){ outman.UserMessageNoCR("Partially ambiguous characters of taxon %s converted to full ambiguity:\n\t", TaxonLabel(origTaxIndex)); firstAmbig = false; } outman.UserMessageNoCR("%d ", origIndex+1); datum = CharToDatum('?'); } } SetMatrix( i, j++, datum ); } } if(firstAmbig == false) outman.UserMessage(""); i++; } } }
extern "C" SEXP GetNCL(SEXP params, SEXP paramsVecR) { Rcpp::List list(params); Rcpp::LogicalVector paramsVec(paramsVecR); bool charall = paramsVec[0]; bool polyconvert = paramsVec[1]; bool levelsUnif = paramsVec[2]; bool returnTrees = paramsVec[3]; bool returnData = paramsVec[4]; int nCharToReturn = 0; std::vector<std::string> dataTypes; //vector of datatypes for each character block std::vector<int> nbCharacters; //number of characters for each character block std::vector<std::string> dataChr; //characters std::vector<std::string> charLabels; //labels for the characters std::vector<std::string> stateLabels; //labels for the states std::vector<int> nbStates; //number of states for each character (for Standard datatype) std::vector<std::string> trees; //vector of Newick strings holding the names std::vector<std::string> treeNames; //vector of tree names std::vector<std::string> taxaNames; //vector of taxa names std::vector<bool> test(3); test[0] = charall; test[1] = polyconvert; test[2] = levelsUnif; # if defined(FILENAME_AS_NEXUS) std::string filename = "'" + list["fileName"] + "'"; # else std::string filename = list["fileName"]; # endif MultiFormatReader nexusReader(-1, NxsReader::WARNINGS_TO_STDERR); /* make NCL less strict */ NxsTreesBlock * treesB = nexusReader.GetTreesBlockTemplate(); treesB->SetAllowImplicitNames(true); nexusReader.cullIdenticalTaxaBlocks(true); /* End of making NCL less strict */ nexusReader.ReadFilepath(const_cast < char* > (filename.c_str()), MultiFormatReader::NEXUS_FORMAT); const unsigned nTaxaBlocks = nexusReader.GetNumTaxaBlocks(); for (unsigned t = 0; t < nTaxaBlocks; ++t) { /* Get blocks */ const NxsTaxaBlock * taxaBlock = nexusReader.GetTaxaBlock(t); const unsigned nTreesBlocks = nexusReader.GetNumTreesBlocks(taxaBlock); const unsigned nCharBlocks = nexusReader.GetNumCharactersBlocks(taxaBlock); int nTax = taxaBlock->GetNumTaxonLabels(); /* Get taxa names */ for (int j=0; j < nTax; ++j) { taxaNames.push_back (taxaBlock->GetTaxonLabel(j)); } /* Get trees */ if (returnTrees) { if (nTreesBlocks == 0) { continue; } for (unsigned i = 0; i < nTreesBlocks; ++i) { NxsTreesBlock* treeBlock = nexusReader.GetTreesBlock(taxaBlock, i); const unsigned nTrees = treeBlock->GetNumTrees(); if (nTrees > 0) { for (unsigned k = 0; k < nTrees; k++) { NxsString ts = treeBlock->GetTreeDescription(k); NxsString trNm = treeBlock->GetTreeName(k); treeNames.push_back(trNm); trees.push_back (ts); } } else { continue; } } } /* Get data */ if (returnData) { for (unsigned k = 0; k < nCharBlocks; ++k) { NxsCharactersBlock * charBlock = nexusReader.GetCharactersBlock(taxaBlock, k); if (nCharBlocks == 0) { continue; } else { NxsString dtType = charBlock->GetNameOfDatatype(charBlock->GetDataType()); dataTypes.push_back(dtType); if (charall) { nCharToReturn=charBlock->GetNCharTotal(); } else { nCharToReturn=charBlock->GetNumIncludedChars(); } nbCharacters.push_back (nCharToReturn); for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) { //We only pass the non-eliminated chars NxsString charLabel=charBlock->GetCharLabel(eachChar); if (charLabel.length()>1) { charLabels.push_back (charLabel); } else { charLabels.push_back ("standard_char"); //FIXME: needs to fixed for sequence data } NxsString tmpCharString; if (std::string("Continuous") == dtType) { tmpCharString = contData(*charBlock, tmpCharString, eachChar, nTax); nbStates.push_back (0); } else { if (std::string("Standard") == dtType) { tmpCharString = stdData(*charBlock, tmpCharString, eachChar, nTax, polyconvert); unsigned int nCharStates = charBlock->GetNumObsStates(eachChar, false); nbStates.push_back (nCharStates); for (unsigned int l=0; l < nCharStates; ++l) { NxsString label = charBlock->GetStateLabel(eachChar, l); stateLabels.push_back (label); } } else { if (std::string("DNA") == dtType) { for (int taxon=0; taxon < nTax; ++taxon) { for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) { unsigned int nCharStates = charBlock->GetNumStates(taxon, eachChar); if (charBlock->IsGapState(taxon, eachChar)) { tmpCharString += "-"; } else { if (charBlock->IsMissingState(taxon, eachChar)) { tmpCharString += "?"; } else { if (nCharStates == 1) { tmpCharString += charBlock->GetState(taxon, eachChar, 0); } else { tmpCharString += "?"; //FIXME } } } } } } else { // other type of data not yet supported tmpCharString = ""; nbStates.push_back (0); stateLabels.push_back (std::string("")); } } } std::string charString = "c(" + tmpCharString + ");"; dataChr.push_back (charString); } } } } } /* Prepare list to return */ Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames, Rcpp::Named("treeNames") = treeNames, Rcpp::Named("trees") = trees, Rcpp::Named("dataTypes") = dataTypes, Rcpp::Named("nbCharacters") = nbCharacters, Rcpp::Named("charLabels") = charLabels, Rcpp::Named("nbStates") = nbStates, Rcpp::Named("stateLabels") = stateLabels, Rcpp::Named("dataChr") = dataChr, Rcpp::Named("Test") = test); return res; }
void NucleotideData::CreateMatrixFromNCL(GarliReader &reader){ NxsCharactersBlock *charblock; int num=0, numNuc = -1; do{ charblock = reader.GetCharactersBlock(num); if(charblock->GetDataType() == NxsCharactersBlock::nucleotide || charblock->GetDataType() == NxsCharactersBlock::dna || charblock->GetDataType() == NxsCharactersBlock::rna){ if(numNuc < 0) numNuc = num; else{ throw ErrorException("Multiple characters/data blocks containing nucleotide data found in Nexus datafile!\n\tEither combine the blocks or comment one out."); } } else outman.UserMessage("Ignoring non-nucleotide characters block from Nexus datafile"); num++; }while(num < reader.NumCharBlocks()); if(numNuc < 0) throw ErrorException("No characters/data blocks containing nucleotide data found in Nexus datafile!"); charblock = reader.GetCharactersBlock(numNuc); if(charblock->GetNumActiveChar() < charblock->GetNChar()){ outman.UserMessageNoCR("Excluded characters:\n\t"); for(int c=0;c<charblock->GetNCharTotal();c++) if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1); outman.UserMessage(""); } // vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector(); NxsTaxaBlock *taxablock = reader.GetTaxaBlock(); int numOrigTaxa = charblock->GetNTax(); int numActiveTaxa = charblock->GetNumActiveTaxa(); int numOrigChar = charblock->GetNChar(); int numActiveChar = charblock->GetNumActiveChar(); //int num_chars = reducedToOrigCharMap.size(); //cout << num_chars << endl; NewMatrix( numActiveTaxa, numActiveChar ); // read in the data, including taxon names int i=0; for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) { if(charblock->IsActiveTaxon(origTaxIndex)){ //internally, blanks in taxon names will be stored as underscores NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex); tlabel.BlanksToUnderscores(); SetTaxonLabel( i, tlabel.c_str()); int j = 0; for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) { if(charblock->IsActiveChar(origIndex)){ unsigned char datum = '\0'; if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15; else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15; else{ int nstates = charblock->GetNumStates(origTaxIndex, origIndex); for(int s=0;s<nstates;s++){ datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s)); } } SetMatrix( i, j++, datum ); } } i++; } } }