Beispiel #1
0
NxsString stdData(NxsCharactersBlock& charBlock, NxsString& charString, const int& eachChar,
		  const int& nTax, bool polyconvert) {
    for (int taxon=0; taxon<nTax; ++taxon) {

	int stateNumber=charBlock.GetInternalRepresentation(taxon, eachChar, 0);

	if(charBlock.IsMissingState(taxon, eachChar)) {
	    charString+="NA";
	}
	else if (charBlock.GetNumStates(taxon, eachChar)>1) {
	    if(polyconvert) {
		charString+="NA";
	    }
	    else {
		charString+='"';
		charString+='{';
		for (unsigned int k=0; k < charBlock.GetNumStates(taxon, eachChar); ++k) {
		    charString += charBlock.GetInternalRepresentation(taxon, eachChar, k);	
		    if (k+1 < charBlock.GetNumStates(taxon, eachChar)) {
			charString+=',';
		    }
		}
		charString+='}';
		charString+='"';
	    }
	}
	else {
	    charString+='"';
	    charString+=stateNumber;
	    charString+='"';
	}
	if (taxon+1 < nTax) {
	    charString+=',';
	}
    }
    return charString;
}
Beispiel #2
0
extern "C" SEXP GetNCL(SEXP params, SEXP paramsVecR) {

    Rcpp::List list(params);
    Rcpp::LogicalVector paramsVec(paramsVecR);

    bool charall = paramsVec[0];
    bool polyconvert = paramsVec[1];
    bool levelsUnif = paramsVec[2];
    bool returnTrees = paramsVec[3];
    bool returnData = paramsVec[4];

    int nCharToReturn = 0;

    std::vector<std::string> dataTypes;      //vector of datatypes for each character block
    std::vector<int> nbCharacters;           //number of characters for each character block
    std::vector<std::string> dataChr;        //characters
    std::vector<std::string> charLabels;     //labels for the characters
    std::vector<std::string> stateLabels;    //labels for the states
    std::vector<int> nbStates;               //number of states for each character (for Standard datatype)
    std::vector<std::string> trees;          //vector of Newick strings holding the names
    std::vector<std::string> treeNames;      //vector of tree names
    std::vector<std::string> taxaNames;      //vector of taxa names

    std::vector<bool> test(3);
    test[0] = charall;
    test[1] = polyconvert;
    test[2] = levelsUnif;

#   if defined(FILENAME_AS_NEXUS)
    std::string filename = "'" + list["fileName"] + "'";
#   else
    std::string filename = list["fileName"];
#   endif

    MultiFormatReader nexusReader(-1, NxsReader::WARNINGS_TO_STDERR);

    /* make NCL less strict */
    NxsTreesBlock * treesB = nexusReader.GetTreesBlockTemplate();
    treesB->SetAllowImplicitNames(true);
    nexusReader.cullIdenticalTaxaBlocks(true);
    /* End of making NCL less strict */
    
    nexusReader.ReadFilepath(const_cast < char* > (filename.c_str()), MultiFormatReader::NEXUS_FORMAT);  

    const unsigned nTaxaBlocks = nexusReader.GetNumTaxaBlocks();
    for (unsigned t = 0; t < nTaxaBlocks; ++t) {
	/* Get blocks */
	const NxsTaxaBlock * taxaBlock = nexusReader.GetTaxaBlock(t);
	const unsigned nTreesBlocks = nexusReader.GetNumTreesBlocks(taxaBlock);
	const unsigned nCharBlocks = nexusReader.GetNumCharactersBlocks(taxaBlock);

	int nTax = taxaBlock->GetNumTaxonLabels();
	
	/* Get taxa names */
	for (int j=0; j < nTax; ++j) {	
	    taxaNames.push_back (taxaBlock->GetTaxonLabel(j));
	}

	/* Get trees */
	if (returnTrees) {
	    if (nTreesBlocks == 0) {
		continue;
	    }
	    for (unsigned i = 0; i < nTreesBlocks; ++i) {
		NxsTreesBlock* treeBlock = nexusReader.GetTreesBlock(taxaBlock, i);
		const unsigned nTrees = treeBlock->GetNumTrees();
		if (nTrees > 0) {
		    for (unsigned k = 0; k < nTrees; k++) {
			NxsString ts = treeBlock->GetTreeDescription(k);
			NxsString trNm = treeBlock->GetTreeName(k);
			treeNames.push_back(trNm);
			trees.push_back (ts);
		    }
		}
		else {
		    continue;
		}
	    }
	}
	
	/* Get data */
	if (returnData) {
	    for (unsigned k = 0; k < nCharBlocks; ++k) {
		NxsCharactersBlock * charBlock = nexusReader.GetCharactersBlock(taxaBlock, k);
		
		if (nCharBlocks == 0) {
		    continue;
		}
		else {
		    NxsString dtType = charBlock->GetNameOfDatatype(charBlock->GetDataType());
		    dataTypes.push_back(dtType);
		
		    if (charall) {
			nCharToReturn=charBlock->GetNCharTotal();
		    }
		    else {
			nCharToReturn=charBlock->GetNumIncludedChars();
		    }
		    nbCharacters.push_back (nCharToReturn);
		    for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) { //We only pass the non-eliminated chars
			NxsString charLabel=charBlock->GetCharLabel(eachChar);
			if (charLabel.length()>1) {
			    charLabels.push_back (charLabel);
			}
			else {
			    charLabels.push_back ("standard_char"); //FIXME: needs to fixed for sequence data
			}
			
			NxsString tmpCharString;
			if (std::string("Continuous") == dtType) {
			    tmpCharString = contData(*charBlock, tmpCharString, eachChar, nTax);
			    nbStates.push_back (0);			    
			}
			else {
			    if (std::string("Standard") == dtType) {			    
				tmpCharString = stdData(*charBlock, tmpCharString, eachChar, nTax,
							polyconvert);
				unsigned int nCharStates = charBlock->GetNumObsStates(eachChar, false);
				nbStates.push_back (nCharStates);
				for (unsigned int l=0; l < nCharStates; ++l) {
				    NxsString label = charBlock->GetStateLabel(eachChar, l);
				    stateLabels.push_back (label);
				}
			    }
			    else {
				if (std::string("DNA") == dtType) {
				    for (int taxon=0; taxon < nTax; ++taxon) {
					for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) {
					    unsigned int nCharStates = charBlock->GetNumStates(taxon, eachChar);
					    if (charBlock->IsGapState(taxon, eachChar)) {
						tmpCharString += "-";
					    }
					    else {
						if (charBlock->IsMissingState(taxon, eachChar)) {
						    tmpCharString += "?";
						}
						else {
						    if (nCharStates == 1) {
							tmpCharString += charBlock->GetState(taxon, eachChar, 0);
						    }
						    else {
							tmpCharString += "?"; //FIXME
						    }			    
						}
					    }
					}
				    }
				}
				else { // other type of data not yet supported
				    tmpCharString = "";
				    nbStates.push_back (0);
				    stateLabels.push_back (std::string(""));
				}
			    }
			}
			std::string charString = "c(" + tmpCharString + ");";
			dataChr.push_back (charString);
		    }				
		}  
	    }
	}
    }

    /* Prepare list to return */
    Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames,
					Rcpp::Named("treeNames") = treeNames,
					Rcpp::Named("trees") = trees,
					Rcpp::Named("dataTypes") = dataTypes,
					Rcpp::Named("nbCharacters") = nbCharacters,
					Rcpp::Named("charLabels") = charLabels,
					Rcpp::Named("nbStates") = nbStates,
					Rcpp::Named("stateLabels") = stateLabels,
					Rcpp::Named("dataChr") = dataChr,
					Rcpp::Named("Test") = test);
    return res;				
}
Beispiel #3
0
void AminoacidData::CreateMatrixFromNCL(GarliReader &reader){
	NxsCharactersBlock *charblock;
	int num=0, numNuc = -1;
	do{
		charblock = reader.GetCharactersBlock(num);
		if(charblock->GetDataType() == NxsCharactersBlock::protein){
			if(numNuc < 0) numNuc = num;
			else{
				throw ErrorException("Multiple characters/data blocks containing protein data found in Nexus datafile!\n\tEither combine the blocks or comment one out.");
				}
			}
		else outman.UserMessage("Ignoring non-protein characters block from Nexus datafile");
		num++;
		}while(num < reader.NumCharBlocks());
	if(numNuc < 0) throw ErrorException("No characters/data blocks containing protein data found in Nexus datafile!");
	charblock = reader.GetCharactersBlock(numNuc);

	if(charblock->GetNumActiveChar() < charblock->GetNChar()){
		outman.UserMessageNoCR("Excluded characters:\n\t");
		for(int c=0;c<charblock->GetNCharTotal();c++)
			if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1);
		outman.UserMessage("");
		}

//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
		if(charblock->IsActiveTaxon(origTaxIndex)){
			//internally, blanks in taxon names will be stored as underscores
			NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex);
			tlabel.BlanksToUnderscores();
			SetTaxonLabel( i, tlabel.c_str());
			
			int j = 0;
			bool firstAmbig = true;
			for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
				if(charblock->IsActiveChar(origIndex)){	
					unsigned char datum = '\0';
					if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20;
					else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20;
					else{
						int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
						//assert(nstates == 1);
						//need to deal with the possibility of multiple states represented in matrix
						//just convert to full ambiguity
						if(nstates == 1)
							datum = CharToDatum(charblock->GetState(origTaxIndex, origIndex, 0));
						else{
							if(firstAmbig){
								outman.UserMessageNoCR("Partially ambiguous characters of taxon %s converted to full ambiguity:\n\t", TaxonLabel(origTaxIndex));
								firstAmbig = false;
								}
							outman.UserMessageNoCR("%d ", origIndex+1);
							datum = CharToDatum('?');
							}
						}
					SetMatrix( i, j++, datum );
					}
				}
			if(firstAmbig == false) outman.UserMessage("");
			i++;
			}
		}
	}
Beispiel #4
0
void DataMatrix::CreateMatrixFromNCL(GarliReader &reader){
	
	NxsCharactersBlock *charblock = reader.GetCharactersBlock();
//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	if(modSpec.IsAminoAcid() == false){
		for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
			if(charblock->IsActiveTaxon(origTaxIndex)){
				SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str());
				
				int j = 0;
				for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
					if(charblock->IsActiveChar(origIndex)){	
						unsigned char datum = '\0';
						if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15;
						else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15;
						else{
							int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
							for(int s=0;s<nstates;s++){
								datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
								}
							}
						SetMatrix( i, j++, datum );
						}
					}
				i++;
				}
			}
		}
	else{
		for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
			if(charblock->IsActiveTaxon(origTaxIndex)){
				SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str());
				
				int j = 0;
				for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
					if(charblock->IsActiveChar(origIndex)){	
						unsigned char datum = '\0';
						if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20;
						else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20;
						else{
							int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
							assert(nstates == 1);
							datum = CharToAminoAcidNumber(charblock->GetState(origTaxIndex, origIndex, 0));
/*							for(int s=0;s<nstates;s++){
								datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
								}
*/							}
						SetMatrix( i, j++, datum );
						}
					}
				i++;
				}
			}
		}
	}
Beispiel #5
0
void NucleotideData::CreateMatrixFromNCL(GarliReader &reader){
	NxsCharactersBlock *charblock;

	int num=0, numNuc = -1;
	do{
		charblock = reader.GetCharactersBlock(num);
		if(charblock->GetDataType() == NxsCharactersBlock::nucleotide ||
			charblock->GetDataType() == NxsCharactersBlock::dna ||
			charblock->GetDataType() == NxsCharactersBlock::rna){
			if(numNuc < 0) numNuc = num;
			else{
				throw ErrorException("Multiple characters/data blocks containing nucleotide data found in Nexus datafile!\n\tEither combine the blocks or comment one out.");
				}
			}
		else outman.UserMessage("Ignoring non-nucleotide characters block from Nexus datafile");
		num++;
		}while(num < reader.NumCharBlocks());
	if(numNuc < 0) throw ErrorException("No characters/data blocks containing nucleotide data found in Nexus datafile!");
	charblock = reader.GetCharactersBlock(numNuc);

	if(charblock->GetNumActiveChar() < charblock->GetNChar()){
		outman.UserMessageNoCR("Excluded characters:\n\t");
		for(int c=0;c<charblock->GetNCharTotal();c++)
			if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1);
		outman.UserMessage("");
		}

//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
		if(charblock->IsActiveTaxon(origTaxIndex)){
			//internally, blanks in taxon names will be stored as underscores
			NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex);
			tlabel.BlanksToUnderscores();
			SetTaxonLabel( i, tlabel.c_str());
			
			int j = 0;
			for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
				if(charblock->IsActiveChar(origIndex)){	
					unsigned char datum = '\0';
					if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15;
					else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15;
					else{
						int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
						for(int s=0;s<nstates;s++){
							datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
							}
						}
					SetMatrix( i, j++, datum );
					}
				}
			i++;
			}
		}
	}