Exemplo n.º 1
0
/**
|	returns the number of indices added.
*/
unsigned NxsSetReader::InterpretTokenAsIndices(NxsToken &token, 
  const NxsLabelToIndicesMapper & mapper, 
  const char * setType, 
  const char * cmdName, 
  NxsUnsignedSet * destination)
	{
	try {
		const std::string t = token.GetToken();
		if (NxsString::case_insensitive_equals(t.c_str(), "ALL"))
			{
			unsigned m = mapper.GetMaxIndex();
			NxsUnsignedSet s;
			for (unsigned i = 0; i <= m; ++i)
				s.insert(i);
			destination->insert(s.begin(), s.end());
			return (unsigned)s.size();
			}
		return mapper.GetIndicesForLabel(t, destination);
		}
	catch (const NxsException & x)
		{
		NxsString errormsg = "Error in the ";
		errormsg << setType << " descriptor of a " << cmdName << " command.\n";
		errormsg += x.msg;
		throw NxsException(errormsg, token);
		}
	catch (...)
		{
		NxsString errormsg = "Expecting a ";
		errormsg << setType << " descriptor (number or label) in the " << cmdName << ".  Encountered ";
		errormsg <<  token.GetToken();
		throw NxsException(errormsg, token);
		}
	}
Exemplo n.º 2
0
std::vector<unsigned> NxsSetReader::GetSetAsVector(const NxsUnsignedSet &s)
	{
	std::vector<unsigned> u;
	u.reserve(s.size());
	for (NxsUnsignedSet::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
		u.push_back(*sIt);
	return u;
	}
Exemplo n.º 3
0
void writeCoreAssumptions(ostream &out, NxsCharactersBlock * cb, const char * newTitle)
{
	std::vector<int>			intWts;
	std::vector<double>			dblWts;
	NxsUnsignedSet activeExSet = cb->GetExcludedIndexSet();

	const NxsTransformationManager &tm = cb->GetNxsTransformationManagerRef();
	intWts = tm.GetDefaultIntWeights();
	if (intWts.empty())
		dblWts = tm.GetDefaultDoubleWeights();

	if (!(activeExSet.empty() && intWts.empty() && dblWts.empty()))
		{
		out << "BEGIN ASSUMPTIONS; \n    LINK CHARACTERS = ";
		out << NxsString::GetEscaped(newTitle) << " ;\n";
		if (!activeExSet.empty())
			{
			NxsString exsetName;
			exsetName << newTitle;
			exsetName.append("ExSet");
			NxsUnsignedSetMap m;
			m[exsetName] = activeExSet;
			NxsWriteSetCommand("EXSET", m, out, exsetName.c_str());;
			}
		if (!(intWts.empty() && dblWts.empty()))
			{
			NxsTransformationManager &cbntm = cb->GetNxsTransformationManagerRef();
			const std::string &wtSetName =  cbntm.GetDefaultWeightSetName();
			NxsTransformationManager ntm;
			if (!intWts.empty())
				{
				NxsTransformationManager::ListOfIntWeights iw;
				vecToListOfIntWeights(intWts, iw);
				ntm.AddIntWeightSet(wtSetName, iw, true);
				}
			else
				{
				NxsTransformationManager::ListOfDblWeights dw;
				vecToListOfDblWeights(dblWts, dw);
				ntm.AddRealWeightSet(wtSetName, dw, true);
				}
			ntm.WriteWtSet(out);
			}
		out << "END;\n";
		}
}
Exemplo n.º 4
0
void writeCharactersAsGapped(ostream &out, NxsCharactersBlock * cb)
{
	if (!cb )
		return;
	NxsTaxaBlockAPI *tb = cb->GetTaxaBlockPtr(NULL);
	if (tb == NULL)
		return;
	NxsCharactersBlock::DataTypesEnum dt = cb->GetDataType();
	const char g = cb->GetGapSymbol();
	const std::string baseTitle = cb->GetTitle();
	if (tb == NULL
		|| dt == NxsCharactersBlock::standard
		|| dt == NxsCharactersBlock::continuous
		|| dt == NxsCharactersBlock::mixed
		|| (g == '\0' &&  !gGapCodeAllPresentSites))
		{
		cb->WriteAsNexus(out);
		writeCoreAssumptions(out, cb, baseTitle.c_str());
		return;
		}
	bool isAutogen = cb->IsAutoGeneratedTitle();
	std::string newTitle = baseTitle;
	newTitle.append("GapsAsMissing");
	cb->SetTitle(newTitle, isAutogen);

	std::set<unsigned> gappedColumns;
	std::set<unsigned> residueColumns;

	out << "BEGIN CHARACTERS;\n";
	cb->WriteBasicBlockCommands(out);

	const unsigned ntaxTotal = tb->GetNTax();
	out << "    DIMENSIONS";
	if (tb)
		{
		const unsigned wod = cb->GetNTaxWithData();
		if (wod > 0 && wod != ntaxTotal)
			out << " NTax=" << wod;
		}
	const unsigned nc = cb->GetNCharTotal();
	out << " NChar=" << nc << ";\n";
	cb->WriteEliminateCommand(out);
	cb->SetGapSymbol('\0');
	cb->WriteFormatCommand(out);
	cb->SetGapSymbol(g);

	cb->WriteCharStateLabelsCommand(out);

	std::vector<int>			intWts;
	std::vector<double>			dblWts;
	NxsUnsignedSet activeExSet = cb->GetExcludedIndexSet();

	const NxsTransformationManager &tm = cb->GetNxsTransformationManagerRef();
	intWts = tm.GetDefaultIntWeights();
	if (intWts.empty())
		dblWts = tm.GetDefaultDoubleWeights();

	unsigned width = tb->GetMaxTaxonLabelLength();
	out << "Matrix\n";
	unsigned begCharInd = 0;
	unsigned endCharInd = nc;
	for (unsigned i = 0; i < ntaxTotal; i++)
		{
		if (cb->TaxonIndHasData(i))
			{
			const std::string currTaxonLabel = NxsString::GetEscaped(tb->GetTaxonLabel(i));
			out << currTaxonLabel;
			unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
			unsigned diff = width - currTaxonLabelLen;
			for (unsigned k = 0; k < diff + 5; k++)
				out << ' ';

			const NxsDiscreteStateRow & row = cb->GetDiscreteMatrixRow(i);
			if (!row.empty())
				{
				const NxsDiscreteDatatypeMapper * dm = cb->GetDatatypeMapperForChar(0);
				if (dm == NULL)
					throw NxsNCLAPIException("No DatatypeMapper in WriteStatesForTaxonAsNexus");
				if (cb->IsMixedType())
					throw NxsNCLAPIException("Mixed datatypes are not supported by the NEXUSgapcode program.");
				if (cb->IsTokens())
					{
					for (unsigned charInd = begCharInd; charInd < endCharInd; ++charInd)
						{
						int sc = row[charInd];
						out << ' ';
						if (sc == NXS_GAP_STATE_CODE)
							out << dm->GetMissingSymbol();
						else
							{
							NxsString sl = cb->GetStateLabel(charInd, sc); /*v2.1to2.2 2 */
							if (sl == " ")
								{
								NxsString errormsg = "Writing character state ";
								errormsg << 1 + sc << " for character " << 1+charInd << ", but no appropriate chararcter label or symbol was found.";
								throw NxsNCLAPIException(errormsg);
								}
							else
								out  << NxsString::GetEscaped(sl);
							}
						}
					}
				else
					{
					std::vector<NxsDiscreteStateCell>::const_iterator endIt = row.begin() + begCharInd;
					std::vector<NxsDiscreteStateCell>::const_iterator begIt = endIt;
					if (endCharInd == row.size())
						endIt = row.end();
					else
						endIt += endCharInd - begCharInd;
					unsigned j = begCharInd;
					for (; begIt != endIt; ++begIt, ++j)
						{
						NxsDiscreteStateCell c;
						if (*begIt == NXS_GAP_STATE_CODE)
							{
							c = NXS_MISSING_CODE;
							gappedColumns.insert(j);
							}
						else
							{
							c = *begIt;
							if (gGapCodeAllPresentSites)
								gappedColumns.insert(j);
							residueColumns.insert(j);
							}
						dm->WriteStateCodeAsNexusString(out, c, true);
						}
					}
				}
			out << '\n';
			}
		}
	out << ";\n";
	cb->WriteSkippedCommands(out);
	out << "END;\n";


	writeCoreAssumptions(out, cb, newTitle.c_str());
	const unsigned nGappedCols = gappedColumns.size();
	if (nGappedCols > 0)
		{
		newTitle = baseTitle;
		newTitle.append("GapsAsBinary");
		cb->SetTitle(newTitle, isAutogen);
			out << "BEGIN CHARACTERS;\n";
		cb->WriteBasicBlockCommands(out);

		out << "    DIMENSIONS";
		if (tb)
			{
			const unsigned wod = cb->GetNTaxWithData();
			if (wod > 0 && wod != ntaxTotal)
				out << " NTax=" << wod;
			}
		out << " NChar=" << nGappedCols << ";\n";
		out << " CharStateLabels " ;
		unsigned currChNumber = 1;
		std::set<unsigned>::iterator gcIt = gappedColumns.begin();
		out << currChNumber++ << " col_" << (1 + *gcIt);
		for (++gcIt ; gcIt != gappedColumns.end(); ++gcIt)
			out << ",\n    " << currChNumber++ << " col_" << (1 + *gcIt);
		out << " ;\n" ;
		out << "Format Datatype = Standard Symbols=\"01\" missing = '?' ;\n";

		out << "Matrix\n";
		for (unsigned i = 0; i < ntaxTotal; i++)
			{
			if (cb->TaxonIndHasData(i))
				{
				const std::string currTaxonLabel = NxsString::GetEscaped(tb->GetTaxonLabel(i));
				out << currTaxonLabel;
				unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
				unsigned diff = width - currTaxonLabelLen;
				for (unsigned k = 0; k < diff + 5; k++)
					out << ' ';

				const NxsDiscreteStateRow & row = cb->GetDiscreteMatrixRow(i);
				for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt)
					{
					NxsDiscreteStateCell sc = row[*cIt];
					if (sc == NXS_GAP_STATE_CODE)
						{
						if (gGapCodeAllAbsentSites || (residueColumns.find(*cIt) != residueColumns.end()))
							out << '0';
						else
							out << '?';
						}
					else if (sc == NXS_MISSING_CODE) 
						{
						out << '?';
						}
					else 
						{
						out << '1';
						}
					}
				out << '\n';
				}
			}
		out << ";\n";
		cb->WriteSkippedCommands(out);
		out << "END;\n";
		}

	std::vector<int>			gapintWts;
	std::vector<double>			gapdblWts;
	std::set<unsigned>			gapactiveExSet;
	if (!activeExSet.empty())
		{
		unsigned gapind = 0;
		for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt, ++gapind)
			{
			if (activeExSet.find(*cIt) != activeExSet.end())
				gapactiveExSet.insert(gapind);
			}
		}
	if (!intWts.empty())
		{
		for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt)
			gapintWts.push_back(intWts[*cIt]);
		}
	if (!dblWts.empty())
		{
		for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt)
			gapdblWts.push_back(dblWts[*cIt]);
		}

	if (!(gapactiveExSet.empty() && gapintWts.empty() && gapdblWts.empty()))
		{
		out << "BEGIN ASSUMPTIONS; \n    LINK CHARACTERS = ";
		out << NxsString::GetEscaped(newTitle) << " ;\n";
		if (!gapactiveExSet.empty())
			{
			NxsString exsetName;
			exsetName << newTitle;
			exsetName.append("GapExSet");
			NxsUnsignedSetMap m;
			m[exsetName] = gapactiveExSet;
			NxsWriteSetCommand("EXSET", m, out, exsetName.c_str());;
			}
		if (!(gapintWts.empty() && gapdblWts.empty()))
			{
			NxsTransformationManager &cbntm = cb->GetNxsTransformationManagerRef();
			std::string wtSetName =  cbntm.GetDefaultWeightSetName();
			wtSetName.append("GapWtSet");
			NxsTransformationManager ntm;
			if (!gapintWts.empty())
				{
				NxsTransformationManager::ListOfIntWeights iw;
				vecToListOfIntWeights(gapintWts, iw);
				ntm.AddIntWeightSet(wtSetName, iw, true);
				}
			else
				{
				NxsTransformationManager::ListOfDblWeights dw;
				vecToListOfDblWeights(gapdblWts, dw);
				ntm.AddRealWeightSet(wtSetName, dw, true);
				}
			ntm.WriteWtSet(out);
			}
		out << "END;\n";
		}


	cb->SetTitle(baseTitle, isAutogen);
}
Exemplo n.º 5
0
void NxsSetReader::ReadSetDefinition(
  NxsToken &token, 
  const NxsLabelToIndicesMapper & mapper, 
  const char * setType, /* "TAXON" or "CHARACTER" -- for error messages only */ 
  const char * cmdName, /* command name -- "TAXSET" or "EXSET"-- for error messages only */ 
  NxsUnsignedSet * destination, /** to be filled */
  const NxsUnsignedSet * taboo)
	{
	NxsString errormsg;
	NxsUnsignedSet tmpset;
	NxsUnsignedSet dummy;
	if (destination == NULL)
		destination = & dummy;
	unsigned previousInd = UINT_MAX;
	std::vector<unsigned> intersectVec;
	while (!token.Equals(";"))
		{
		if (taboo && token.Equals(","))
			return;
		if (token.Equals("-"))
			{
			if (previousInd == UINT_MAX)
				{
				errormsg = "The '-' must be preceded by number or a ";
				errormsg << setType << " label in the " << cmdName << " command.";
				throw NxsException(errormsg, token);
				}
			token.GetNextToken();
			if (token.Equals(";") || token.Equals("\\"))
				{
				errormsg = "Range in the ";
				errormsg << setType << " set definition in the " << cmdName << " command must be closed with a number or label.";
				throw NxsException(errormsg, token);
				}
			unsigned endpoint;
			if (token.Equals("."))
				endpoint = mapper.GetMaxIndex();
			else
				{
				tmpset.clear();
				unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
				if (nAdded != 1)
					{
					errormsg = "End of a range in a ";
					errormsg << setType << " set definition in the " << cmdName << " command must be closed with a single number or label (not a set).";
					throw NxsException(errormsg, token);
					}
				endpoint = *(tmpset.begin());
				if (endpoint < previousInd)
					{
					errormsg = "End of a range in a ";
					errormsg << setType << " set definition in the " << cmdName << " command must be a larger index than the start of the range (found ";
					errormsg << previousInd + 1 << " - " << token.GetToken();
					throw NxsException(errormsg, token);
					}
				}
			token.GetNextToken();
			if (token.Equals("\\"))
				{
				token.GetNextToken();
				NxsString t = token.GetToken(); 
				unsigned stride = 0;
				try
					{
					stride = t.ConvertToUnsigned();
					}
				catch (const NxsString::NxsX_NotANumber &)
					{}
				if (stride == 0)
					{
					errormsg = "Expecting a positive number indicating the 'stride' after the \\ in the ";
					errormsg << setType << " set definition in the " << cmdName << " command. Encountered ";
					errormsg << t;
					throw NxsException(errormsg, token);
					}
				AddRangeToSet(previousInd, endpoint, stride, destination, taboo, token);
				token.GetNextToken();
				}
			else
				AddRangeToSet(previousInd, endpoint, 1, destination, taboo, token);
			previousInd = UINT_MAX;
			}
		else 
			{
			tmpset.clear();
			const unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
			if (taboo != NULL)
				{
				set_intersection(taboo->begin(), taboo->end(), tmpset.begin(), tmpset.end(), back_inserter(intersectVec));
				if (!intersectVec.empty())
					{
					errormsg << "Illegal repitition of an index (" << 1 + *(intersectVec.begin()) << ") in multiple subsets.";
					throw NxsException(errormsg, token);
					}
				}
			if (nAdded == 1 )
				{
				previousInd = *(tmpset.begin());
				destination->insert(previousInd);
				}
			else
				{
				previousInd = UINT_MAX;
				destination->insert(tmpset.begin(), tmpset.end());
				}
			token.GetNextToken();
			}
		}
	}
Exemplo n.º 6
0
void NxsSetReader::WriteSetAsNexusValue(const NxsUnsignedSet & nxsset, std::ostream & out)
	{
	NxsUnsignedSet::const_iterator currIt = nxsset.begin();
	const NxsUnsignedSet::const_iterator endIt = nxsset.end();
	if (currIt == endIt)
		return;
	unsigned rangeBegin = 1 + *currIt++;
	if (currIt == endIt)
		{
		out << ' ' << rangeBegin;
		return;
		}
	unsigned prev = 1 + *currIt++;
	if (currIt == endIt)
		{
		out << ' ' << rangeBegin << ' ' << prev;
		return;
		}
	unsigned stride = prev - rangeBegin;
	unsigned curr = 1 + *currIt++;
	bool inRange = true;
	while (currIt != endIt)
		{
		if (inRange)
			{
			if (curr - prev != stride)
				{
				if (prev - rangeBegin == stride)
					{
					out << ' ' << rangeBegin;
					rangeBegin = prev;
					stride = curr - prev;
					}
				else
					{
					if (stride > 1)
						out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
					else
						out << ' ' << rangeBegin << '-' << prev ;
					inRange = false;
					}
				}
			}
		else
			{
			inRange = true;
			rangeBegin = prev;
			stride = curr - prev;
			}
		prev = curr;
		curr = 1 + *currIt;
		currIt++;
		}
	if (inRange)
		{
		if (curr - prev != stride)
			{
			if (prev - rangeBegin == stride)
				out << ' ' << rangeBegin << ' ' << prev;
			else
				{
				if (stride > 1)
					out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
				else
					out << ' ' << rangeBegin << '-' << prev ;
				}
			out << ' ' << curr;
			}
		else
			{
			if (stride > 1)
				out << ' ' << rangeBegin << '-' << curr << " \\ " << stride;
			else
				out << ' ' << rangeBegin << '-' << curr ;
			}
		}
	else
		out << ' ' << prev << ' ' << curr;
	}
Exemplo n.º 7
0
unsigned NxsCompressDiscreteMatrix(
    const NxsCXXDiscreteMatrix & mat,       /**< is the data source */
    std::set<NxsCharacterPattern> & patternSet, /* matrix that will hold the compressed columns */
    std::vector<const NxsCharacterPattern *> * compressedIndexPattern, /** if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
    const NxsUnsignedSet * taxaToInclude, /**< if not 0L, this should be  the indices of the taxa in `mat` to include (if 0L all characters will be included). Excluding taxa will result in shorter patterns (the skipped taxa will not be filled with empty codes, instead the taxon indexing will be frameshifted -- the client code must keep track of these frameshifts). */
    const NxsUnsignedSet * charactersToInclude)
{
    const unsigned origNumPatterns = (unsigned) patternSet.size();
    unsigned ntax = mat.getNTax();
    unsigned patternLength = ntax;
    unsigned nchar = mat.getNChar();
    if (compressedIndexPattern)
    {
        compressedIndexPattern->resize(nchar);
    }
    NxsUnsignedSet allTaxaInds;
    if (taxaToInclude)
    {
        if (taxaToInclude->empty())
        {
            return 0; // might want to warn about this!
        }
        const unsigned lastTaxonIndex = *(taxaToInclude->rbegin());
        if (lastTaxonIndex >= ntax)
        {
            throw NxsException("Taxon index in taxaToInclude argument to NxsCompressDiscreteMatrix is out of range");
        }
        patternLength -= taxaToInclude->size();
    }
    else
    {
        for (unsigned i = 0; i < ntax; ++i) {
            allTaxaInds.insert(i);
        }
        taxaToInclude = &allTaxaInds;
    }
    if (charactersToInclude)
    {
        if (charactersToInclude->empty())
        {
            return 0; // might want to warn about this!
        }
        const unsigned lastColumnIndex = *(charactersToInclude->rbegin());
        if (lastColumnIndex >= nchar)
        {
            throw NxsException("Character index in charactersToInclude argument to NxsCompressDiscreteMatrix is out of range");
        }
    }

    // Create actingWeights vector and copy the integer weights from mat into it
    // If there are no integer weights in mat, copy the floating point weights instead
    // if floating point weights have been defined
    const std::vector<int> & iwts = mat.getIntWeightsConst();
    std::vector<double> actingWeights(nchar, 1.0);
    bool weightsSpecified = false;
    bool weightsAsInts = false;
    if (!iwts.empty())
    {
        NCL_ASSERT(iwts.size() >= nchar);
        weightsSpecified = true;
        weightsAsInts = true;
        for (unsigned j = 0; j < nchar; ++j) {
            actingWeights[j] = (double)iwts.at(j);
        }
    }
    else
    {
        const std::vector<double> & dwts = mat.getDblWeightsConst();
        if (!dwts.empty())
        {
            weightsSpecified = true;
            actingWeights = dwts;
            NCL_ASSERT(actingWeights.size() == nchar);
        }
    }

    // Set corresponding actingWeights elements to zero if any characters have been excluded in mat
    const NxsUnsignedSet & excl = mat.getExcludedCharIndices();
    for (NxsUnsignedSet::const_iterator eIt = excl.begin(); eIt != excl.end(); ++eIt)
    {
        NCL_ASSERT(*eIt < nchar);
        actingWeights[*eIt] = 0.0;
    }
    const double * wts = &(actingWeights[0]);

    NxsCharacterPattern patternTemp;
    patternTemp.count = 1;
    for (unsigned j = 0; j < nchar; ++j)
    {
        double patternWeight = wts[j];
        bool shouldInclude = (charactersToInclude == 0L || (charactersToInclude->find(j) != charactersToInclude->end()));
        if (patternWeight > 0.0 &&  shouldInclude)
        {
            // Build up a vector representing the pattern of state codes at this site
            patternTemp.stateCodes.clear();
            patternTemp.stateCodes.reserve(patternLength);
            patternTemp.sumOfPatternWeights = patternWeight;

            unsigned indexInPattern = 0;
            for (NxsUnsignedSet::const_iterator taxIndIt = taxaToInclude->begin(); taxIndIt != taxaToInclude->end(); ++taxIndIt, ++indexInPattern)
            {
                const unsigned taxonIndex = *taxIndIt;
                const NxsCDiscreteStateSet * row    = mat.getRow(taxonIndex);
                const NxsCDiscreteStateSet code = row[j];
                patternTemp.stateCodes.push_back(code);
            }
            NCL_ASSERT(indexInPattern == patternLength);

            std::set<NxsCharacterPattern>::iterator lowBoundLoc = patternSet.lower_bound(patternTemp);
            if ((lowBoundLoc == patternSet.end()) || (patternTemp < *lowBoundLoc))
            {
                std::set<NxsCharacterPattern>::iterator insertedIt = patternSet.insert(lowBoundLoc, patternTemp);
                if (compressedIndexPattern)
                {
                    const NxsCharacterPattern & patInserted = *insertedIt;
                    (*compressedIndexPattern)[j] = &patInserted;
                }
            }
            else
            {
                NCL_ASSERT(patternTemp == *lowBoundLoc);
                lowBoundLoc->sumOfPatternWeights += patternWeight;
                lowBoundLoc->count += 1;
                if (compressedIndexPattern)
                {
                    (*compressedIndexPattern)[j] = &(*lowBoundLoc);
                }
            }
        }
    }
    return (unsigned)patternSet.size() - origNumPatterns;
}
Exemplo n.º 8
0
void NxsCXXDiscreteMatrix::Initialize(const NxsCharactersBlock * cb, bool gapsToMissing, const NxsUnsignedSet * toInclude, bool standardizeCoding)
{
    this->nativeCMatrix.stateList = 0L;
    this->nativeCMatrix.stateListPos = 0L;
    this->nativeCMatrix.matrix = 0L;
    this->nativeCMatrix.symbolsList = 0L;
    this->nativeCMatrix.nStates = 0;
    this->nativeCMatrix.nChar = 0;
    this->nativeCMatrix.nTax = 0L;
    this->nativeCMatrix.nObservedStateSets = 0;
    this->nativeCMatrix.datatype = NxsAltGeneric_Datatype;
    this->symbolsStringAlias.clear();
    this->matrixAlias.Initialize(0, 0);
    this->stateListAlias.clear();
    this->stateListPosAlias.clear();
    this->intWts.clear();
    this->dblWts.clear();
    this->activeExSet.clear();
    if (cb == NULL)
    {
        return;
    }
    std::vector<const NxsDiscreteDatatypeMapper *> mappers = cb->GetAllDatatypeMappers();
    if (mappers.empty() || mappers[0] == NULL)
    {
        throw NxsException("no mappers");
    }

    std::set <const NxsDiscreteDatatypeMapper * > usedMappers;
    NxsUnsignedSet scratchSet;
    if (toInclude == 0L)
    {
        for (unsigned i = 0; i < cb->GetNChar(); ++i) {
            scratchSet.insert(i);
        }
        toInclude = &scratchSet;
    }
    for (NxsUnsignedSet::const_iterator indIt = toInclude->begin(); indIt != toInclude->end(); ++indIt)
    {
        unsigned charIndex = *indIt;
        usedMappers.insert(cb->GetDatatypeMapperForChar(charIndex));
    }


    if (usedMappers.size() > 1)
    {
        throw NxsException("too many mappers");
    }
    if (usedMappers.empty())
    {
        throw NxsException("no mappers - or empty charset");
    }


    const NxsDiscreteDatatypeMapper & mapper = **usedMappers.begin();
    const NxsDiscreteStateMatrix & rawMatrix = cb->GetRawDiscreteMatrixRef();

    NxsCharactersBlock::DataTypesEnum inDatatype = mapper.GetDatatype();
    if (inDatatype < LowestNxsCDatatype || inDatatype > HighestNxsCDatatype)
    {
        throw NxsException("Datatype cannot be converted to NxsCDiscreteMatrix");
    }
    this->nativeCMatrix.datatype = NxsAltDatatypes(inDatatype);
    this->nativeCMatrix.nStates = mapper.GetNumStates();
    const std::string fundamentalSymbols = mapper.GetSymbols();
    const std::string fundamentalSymbolsPlusGaps = mapper.GetSymbolsWithGapChar();
    const bool hadGaps = !(fundamentalSymbols == fundamentalSymbolsPlusGaps);

    this->symbolsStringAlias = fundamentalSymbols;
    char missingSym = cb->GetMissingSymbol();
    const NxsCDiscreteState_t newMissingStateCode = (standardizeCoding ? (NxsCDiscreteState_t) this->nativeCMatrix.nStates : (NxsCDiscreteState_t) NXS_MISSING_CODE);
    NCL_ASSERT((int)NXS_MISSING_CODE < 0);
    NCL_ASSERT((int)NXS_GAP_STATE_CODE < 0);
    NxsDiscreteStateCell sclOffsetV;
    if (hadGaps)
    {
        sclOffsetV = std::min((NxsDiscreteStateCell)NXS_GAP_STATE_CODE, (NxsDiscreteStateCell)NXS_MISSING_CODE);
    }
    else
    {
        sclOffsetV = NXS_MISSING_CODE;
    }
    const NxsDiscreteStateCell sclOffset(sclOffsetV);

    const NxsDiscreteStateCell negSCLOffset = -sclOffset;
    const unsigned nMapperStateCodes = mapper.GetNumStateCodes();
    const unsigned recodeVecLen = nMapperStateCodes;
    const unsigned nMapperPosStateCodes = nMapperStateCodes + sclOffset;
    std::vector<NxsCDiscreteState_t> recodeVec(recodeVecLen + negSCLOffset, -2);
    NxsCDiscreteState_t * recodeArr = &recodeVec[negSCLOffset];

    if (fundamentalSymbols.length() < this->nativeCMatrix.nStates)
    {
        throw NxsException("Fundamental states missing from the symbols string");
    }
    const unsigned nfun_sym = (const unsigned)fundamentalSymbols.length();
    for (NxsCDiscreteState_t i = 0; i < (NxsCDiscreteState_t) this->nativeCMatrix.nStates; ++i)
    {
        if (i < (NxsCDiscreteState_t)nfun_sym && (NxsCDiscreteState_t)fundamentalSymbols[i] == '\0' && mapper.PositionInSymbols(fundamentalSymbols[i]) != (NxsDiscreteStateCell) i)
        {
            NCL_ASSERT(i >= (NxsCDiscreteState_t)nfun_sym || fundamentalSymbols[i] == '\0' || mapper.PositionInSymbols(fundamentalSymbols[i]) == (NxsDiscreteStateCell) i);
        }
#       if !defined (NDEBUG)
        const std::set<NxsDiscreteStateCell>     & ss =  mapper.GetStateSetForCode(i);
        NCL_ASSERT(ss.size() == 1);
        NCL_ASSERT(*ss.begin() == i);
#       endif
        stateListAlias.push_back(1);
        stateListAlias.push_back(i);
        stateListPosAlias.push_back((unsigned) 2*i);
        recodeArr[i] = i;
    }

    //NXS_INVALID_STATE_CODE

    if (hadGaps)
    {
        if (standardizeCoding)
        {
            recodeArr[NXS_GAP_STATE_CODE] = ((hadGaps && gapsToMissing) ? newMissingStateCode : -1);
        }
        else
        {
            recodeArr[NXS_GAP_STATE_CODE] = NXS_GAP_STATE_CODE;
        }
    }

    if (missingSym == '\0')
    {
        missingSym = (hadGaps ? mapper.GetGapSymbol() : '?');
    }
    else
    {
        NCL_ASSERT(NXS_MISSING_CODE == mapper.GetStateCodeStored(missingSym));
    }
    recodeArr[NXS_MISSING_CODE] = newMissingStateCode;
    const unsigned nCodesInMissing  = this->nativeCMatrix.nStates + (gapsToMissing ?  0 : 1);
    if (standardizeCoding)
    {
        this->symbolsStringAlias.append(1, missingSym);
        stateListPosAlias.push_back(2*this->nativeCMatrix.nStates);
        stateListAlias.push_back(nCodesInMissing);
        if (!gapsToMissing)
        {
            stateListAlias.push_back(-1);
        }
        for (NxsCDiscreteState_t i = 0; i < (NxsCDiscreteState_t) this->nativeCMatrix.nStates; ++i) {
            stateListAlias.push_back(i);
        }
    }

    NxsCDiscreteState_t nextStateCode = (standardizeCoding ? (newMissingStateCode + 1) : this->nativeCMatrix.nStates);
    for (NxsDiscreteStateCell i = (NxsDiscreteStateCell) this->nativeCMatrix.nStates; i < (NxsDiscreteStateCell) nMapperPosStateCodes; ++i)
    {
        const std::set<NxsDiscreteStateCell>     &ss = mapper.GetStateSetForCode( i);
        const unsigned ns = (const unsigned)ss.size();
        const bool mapToMissing  = (!mapper.IsPolymorphic(i) && (nCodesInMissing + 1 == ns || nCodesInMissing == ns));
        if (mapToMissing)
        {
            recodeArr[i] = newMissingStateCode;
        }
        else
        {
            recodeArr[i] = nextStateCode++;
            stateListPosAlias.push_back((unsigned)stateListAlias.size());
            stateListAlias.push_back(ns);
            for (std::set<NxsDiscreteStateCell>::const_iterator sIt = ss.begin(); sIt != ss.end(); ++sIt) {
                stateListAlias.push_back((NxsCDiscreteState_t) *sIt);
            }
            std::string stateName = mapper.StateCodeToNexusString(i);
            if (stateName.length() != 1)
            {
                this->symbolsStringAlias.append(1, ' ');
            }
            else
            {
                this->symbolsStringAlias.append(1, stateName[0]);
            }
        }
    }
    NCL_ASSERT(stateListPosAlias.size() == (unsigned)nextStateCode);
    NCL_ASSERT(symbolsStringAlias.size() == (unsigned)nextStateCode);
    this->nativeCMatrix.nObservedStateSets = nextStateCode;

    this->nativeCMatrix.nTax = (unsigned)rawMatrix.size();
    this->nativeCMatrix.nChar = (this->nativeCMatrix.nTax == 0 ? 0 : toInclude->size());
    this->matrixAlias.Initialize(this->nativeCMatrix.nTax, this->nativeCMatrix.nChar);
    nativeCMatrix.matrix = matrixAlias.GetAlias();
    const unsigned nt = this->nativeCMatrix.nTax;
    const unsigned nc = this->nativeCMatrix.nChar;
    for (unsigned r = 0; r < nt; ++r)
    {
        NxsCDiscreteStateSet     * recodedRow = nativeCMatrix.matrix[r];
        const std::vector<NxsDiscreteStateCell> & rawRowVec = rawMatrix[r];
        if (rawRowVec.empty())
        {
            NxsCDiscreteState_t recodedMissing = recodeArr[NXS_MISSING_CODE];
            for (unsigned c = 0; c < nc; ++c) {
                *recodedRow++ = recodedMissing;
            }
        }
        else
        {
            NCL_ASSERT(rawRowVec.size() >= nc);
            const NxsDiscreteStateCell * rawRow = &rawRowVec[0];
            NxsUnsignedSet::const_iterator includedIt = toInclude->begin();
            for (unsigned c = 0; c < nc; ++c)
            {
                unsigned charIndex = *includedIt++;
                const NxsDiscreteStateCell rawC = rawRow[charIndex];
                if ((unsigned)(rawC +  negSCLOffset) >= recodeVecLen)
                {
                    NCL_ASSERT((unsigned)(rawC +  negSCLOffset) < recodeVecLen);
                }
                NCL_ASSERT(rawC >= sclOffset);
                const NxsCDiscreteState_t recodedC = recodeArr[rawC];
                NCL_ASSERT(recodedC > -2 || !standardizeCoding);
                NCL_ASSERT(recodedC < nextStateCode);
                *recodedRow++ = recodedC;
            }
        }
    }
    nativeCMatrix.symbolsList = symbolsStringAlias.c_str();
    nativeCMatrix.stateListPos = &stateListPosAlias[0];
    nativeCMatrix.stateList = &stateListAlias[0];

    intWts.clear();
    dblWts.clear();
    const NxsTransformationManager &tm = cb->GetNxsTransformationManagerRef();
    intWts = tm.GetDefaultIntWeights();
    if (intWts.empty())
    {
        dblWts = tm.GetDefaultDoubleWeights();
    }
    activeExSet = cb->GetExcludedIndexSet();
}