Example #1
0
void writeCharactersBlockToStream(
    const NxsCharactersBlock & cb,
    ostream & outf,
    const std::vector<std::string> & taxaNames,
    MultiFormatReader::DataFormatType f,
    long interleaveLen)
{
    const unsigned nt = taxaNames.size();
    const unsigned nc = cb.GetNChar();
    unsigned nCharsToWrite;
    unsigned seqStartColumn = 0;

    if (IsRelaxedPhylipType(f))
    {
        for (unsigned i = 0; i < nt; ++i)
        {
            const std::string & name = taxaNames[i];
            if (name.length() > seqStartColumn)
                seqStartColumn = name.length();
        }
        seqStartColumn += 1;
    }


    if (IsPhylipType(f) || IsRelaxedPhylipType(f))
    {
        std::string sep;

        outf << nt << ' ' << nc << '\n';

        if (IsInterleaveType(f) && interleaveLen > 0)
        {
            std::vector<std::string> storedSeqs;
            storedSeqs.reserve(nt);
            std::string *sp;
            nCharsToWrite = (nc > (unsigned) interleaveLen ? (unsigned)interleaveLen : nc);
            for (unsigned i = 0; i < nt; ++i)
            {
                const std::string & name = taxaNames[i];
                storedSeqs.push_back(cb.GetMatrixRowAsStr(i));
                sp = &(storedSeqs[i]);
                if (IsRelaxedPhylipType(f))
                {
                    sep.clear();
                    sep.append(seqStartColumn - name.length(), ' ');
                }
                outf << name << sep << sp->substr(0, nCharsToWrite) << '\n';
            }

            for (unsigned currIndex = (unsigned)interleaveLen; currIndex < nc; currIndex += (unsigned)interleaveLen)
            {
                outf << '\n';
                nCharsToWrite = ((nc - currIndex) > (unsigned)interleaveLen ? (unsigned)interleaveLen : (nc - currIndex));
                for (unsigned i = 0; i < nt; ++i)
                {
                    sp = &(storedSeqs[i]);
                    outf << sp->substr(currIndex, nCharsToWrite) << '\n';
                }
            }
        }
        else
        {
            if (interleaveLen > 0)
            {
                // not interleaved, but wrapping at interleaveLen
                for (unsigned i = 0; i < nt; ++i)
                {
                    nCharsToWrite = (nc > (unsigned)interleaveLen ? (unsigned)interleaveLen : nc);
                    const std::string & name = taxaNames[i];
                    std::string seq = cb.GetMatrixRowAsStr(i);
                    if (IsRelaxedPhylipType(f))
                    {
                        sep.clear();
                        sep.append(seqStartColumn - name.length(), ' ');
                    }
                    outf << name << sep << seq.substr(0, nCharsToWrite) << '\n';
                    for (unsigned currIndex = (unsigned)interleaveLen; currIndex < nc; currIndex += (unsigned)interleaveLen)
                    {
                        nCharsToWrite = ((nc - currIndex) > (unsigned)interleaveLen ? (unsigned)interleaveLen : (nc - currIndex));
                        outf << seq.substr(currIndex, nCharsToWrite) << '\n';
                    }
                }
            }
            else
            {
                // not interleaved, and not wrapping
                for (unsigned i = 0; i < nt; ++i)
                {
                    const std::string & name = taxaNames[i];
                    std::string seq = cb.GetMatrixRowAsStr(i);
                    if (IsRelaxedPhylipType(f))
                    {
                        sep.clear();
                        sep.append(seqStartColumn - name.length(), ' ');
                    }
                    outf << name << sep << seq << '\n';
                }
            }
        }
    }
    else if (IsFastaType(f))
    {
        if (interleaveLen < 1)
            interleaveLen = 60; // default FASTA line length
        for (unsigned i = 0; i < nt; ++i)
        {
            nCharsToWrite = (nc > (unsigned)interleaveLen ? (unsigned)interleaveLen : nc);
            const std::string & name = taxaNames[i];
            std::string seq = cb.GetMatrixRowAsStr(i);
            outf << '>' << name << '\n' << seq.substr(0, nCharsToWrite) << '\n';
            for (unsigned currIndex = (unsigned)interleaveLen; currIndex < nc; currIndex += (unsigned)interleaveLen)
            {
                nCharsToWrite = ((nc - currIndex) > (unsigned)interleaveLen ? (unsigned)interleaveLen : (nc - currIndex));
                outf << seq.substr(currIndex, nCharsToWrite) << '\n';
            }
        }
    }
    else
    {
        throw NxsException("writeCharactersBlockToStream requested for unsupported format");
    }
}
Example #2
0
void AminoacidData::CreateMatrixFromNCL(GarliReader &reader){
	NxsCharactersBlock *charblock;
	int num=0, numNuc = -1;
	do{
		charblock = reader.GetCharactersBlock(num);
		if(charblock->GetDataType() == NxsCharactersBlock::protein){
			if(numNuc < 0) numNuc = num;
			else{
				throw ErrorException("Multiple characters/data blocks containing protein data found in Nexus datafile!\n\tEither combine the blocks or comment one out.");
				}
			}
		else outman.UserMessage("Ignoring non-protein characters block from Nexus datafile");
		num++;
		}while(num < reader.NumCharBlocks());
	if(numNuc < 0) throw ErrorException("No characters/data blocks containing protein data found in Nexus datafile!");
	charblock = reader.GetCharactersBlock(numNuc);

	if(charblock->GetNumActiveChar() < charblock->GetNChar()){
		outman.UserMessageNoCR("Excluded characters:\n\t");
		for(int c=0;c<charblock->GetNCharTotal();c++)
			if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1);
		outman.UserMessage("");
		}

//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
		if(charblock->IsActiveTaxon(origTaxIndex)){
			//internally, blanks in taxon names will be stored as underscores
			NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex);
			tlabel.BlanksToUnderscores();
			SetTaxonLabel( i, tlabel.c_str());
			
			int j = 0;
			bool firstAmbig = true;
			for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
				if(charblock->IsActiveChar(origIndex)){	
					unsigned char datum = '\0';
					if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20;
					else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20;
					else{
						int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
						//assert(nstates == 1);
						//need to deal with the possibility of multiple states represented in matrix
						//just convert to full ambiguity
						if(nstates == 1)
							datum = CharToDatum(charblock->GetState(origTaxIndex, origIndex, 0));
						else{
							if(firstAmbig){
								outman.UserMessageNoCR("Partially ambiguous characters of taxon %s converted to full ambiguity:\n\t", TaxonLabel(origTaxIndex));
								firstAmbig = false;
								}
							outman.UserMessageNoCR("%d ", origIndex+1);
							datum = CharToDatum('?');
							}
						}
					SetMatrix( i, j++, datum );
					}
				}
			if(firstAmbig == false) outman.UserMessage("");
			i++;
			}
		}
	}
Example #3
0
void DataMatrix::CreateMatrixFromNCL(GarliReader &reader){
	
	NxsCharactersBlock *charblock = reader.GetCharactersBlock();
//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	if(modSpec.IsAminoAcid() == false){
		for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
			if(charblock->IsActiveTaxon(origTaxIndex)){
				SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str());
				
				int j = 0;
				for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
					if(charblock->IsActiveChar(origIndex)){	
						unsigned char datum = '\0';
						if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15;
						else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15;
						else{
							int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
							for(int s=0;s<nstates;s++){
								datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
								}
							}
						SetMatrix( i, j++, datum );
						}
					}
				i++;
				}
			}
		}
	else{
		for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
			if(charblock->IsActiveTaxon(origTaxIndex)){
				SetTaxonLabel( i, taxablock->GetTaxonLabel(origTaxIndex).c_str());
				
				int j = 0;
				for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
					if(charblock->IsActiveChar(origIndex)){	
						unsigned char datum = '\0';
						if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 20;
						else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 20;
						else{
							int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
							assert(nstates == 1);
							datum = CharToAminoAcidNumber(charblock->GetState(origTaxIndex, origIndex, 0));
/*							for(int s=0;s<nstates;s++){
								datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
								}
*/							}
						SetMatrix( i, j++, datum );
						}
					}
				i++;
				}
			}
		}
	}
Example #4
0
void NucleotideData::CreateMatrixFromNCL(GarliReader &reader){
	NxsCharactersBlock *charblock;

	int num=0, numNuc = -1;
	do{
		charblock = reader.GetCharactersBlock(num);
		if(charblock->GetDataType() == NxsCharactersBlock::nucleotide ||
			charblock->GetDataType() == NxsCharactersBlock::dna ||
			charblock->GetDataType() == NxsCharactersBlock::rna){
			if(numNuc < 0) numNuc = num;
			else{
				throw ErrorException("Multiple characters/data blocks containing nucleotide data found in Nexus datafile!\n\tEither combine the blocks or comment one out.");
				}
			}
		else outman.UserMessage("Ignoring non-nucleotide characters block from Nexus datafile");
		num++;
		}while(num < reader.NumCharBlocks());
	if(numNuc < 0) throw ErrorException("No characters/data blocks containing nucleotide data found in Nexus datafile!");
	charblock = reader.GetCharactersBlock(numNuc);

	if(charblock->GetNumActiveChar() < charblock->GetNChar()){
		outman.UserMessageNoCR("Excluded characters:\n\t");
		for(int c=0;c<charblock->GetNCharTotal();c++)
			if(charblock->IsExcluded(c)) outman.UserMessageNoCR("%d ", c+1);
		outman.UserMessage("");
		}

//	vector<unsigned> reducedToOrigCharMap = charblock->GetOrigIndexVector();
	NxsTaxaBlock *taxablock = reader.GetTaxaBlock();
	
	int numOrigTaxa = charblock->GetNTax();
	int numActiveTaxa = charblock->GetNumActiveTaxa();
	int numOrigChar = charblock->GetNChar();
	int numActiveChar = charblock->GetNumActiveChar();
	//int num_chars = reducedToOrigCharMap.size();
	//cout << num_chars << endl;

	NewMatrix( numActiveTaxa, numActiveChar );

	// read in the data, including taxon names
	int i=0;
	for( int origTaxIndex = 0; origTaxIndex < numOrigTaxa; origTaxIndex++ ) {
		if(charblock->IsActiveTaxon(origTaxIndex)){
			//internally, blanks in taxon names will be stored as underscores
			NxsString tlabel = taxablock->GetTaxonLabel(origTaxIndex);
			tlabel.BlanksToUnderscores();
			SetTaxonLabel( i, tlabel.c_str());
			
			int j = 0;
			for( int origIndex = 0; origIndex < numOrigChar; origIndex++ ) {
				if(charblock->IsActiveChar(origIndex)){	
					unsigned char datum = '\0';
					if(charblock->IsGapState(origTaxIndex, origIndex) == true) datum = 15;
					else if(charblock->IsMissingState(origTaxIndex, origIndex) == true) datum = 15;
					else{
						int nstates = charblock->GetNumStates(origTaxIndex, origIndex);
						for(int s=0;s<nstates;s++){
							datum += CharToBitwiseRepresentation(charblock->GetState(origTaxIndex, origIndex, s));
							}
						}
					SetMatrix( i, j++, datum );
					}
				}
			i++;
			}
		}
	}