Пример #1
0
/**
|	returns the number of indices added.
*/
unsigned NxsSetReader::InterpretTokenAsIndices(NxsToken &token, 
  const NxsLabelToIndicesMapper & mapper, 
  const char * setType, 
  const char * cmdName, 
  NxsUnsignedSet * destination)
	{
	try {
		const std::string t = token.GetToken();
		if (NxsString::case_insensitive_equals(t.c_str(), "ALL"))
			{
			unsigned m = mapper.GetMaxIndex();
			NxsUnsignedSet s;
			for (unsigned i = 0; i <= m; ++i)
				s.insert(i);
			destination->insert(s.begin(), s.end());
			return (unsigned)s.size();
			}
		return mapper.GetIndicesForLabel(t, destination);
		}
	catch (const NxsException & x)
		{
		NxsString errormsg = "Error in the ";
		errormsg << setType << " descriptor of a " << cmdName << " command.\n";
		errormsg += x.msg;
		throw NxsException(errormsg, token);
		}
	catch (...)
		{
		NxsString errormsg = "Expecting a ";
		errormsg << setType << " descriptor (number or label) in the " << cmdName << ".  Encountered ";
		errormsg <<  token.GetToken();
		throw NxsException(errormsg, token);
		}
	}
Пример #2
0
std::vector<unsigned> NxsSetReader::GetSetAsVector(const NxsUnsignedSet &s)
	{
	std::vector<unsigned> u;
	u.reserve(s.size());
	for (NxsUnsignedSet::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
		u.push_back(*sIt);
	return u;
	}
Пример #3
0
void writeCharactersAsGapped(ostream &out, NxsCharactersBlock * cb)
{
	if (!cb )
		return;
	NxsTaxaBlockAPI *tb = cb->GetTaxaBlockPtr(NULL);
	if (tb == NULL)
		return;
	NxsCharactersBlock::DataTypesEnum dt = cb->GetDataType();
	const char g = cb->GetGapSymbol();
	const std::string baseTitle = cb->GetTitle();
	if (tb == NULL
		|| dt == NxsCharactersBlock::standard
		|| dt == NxsCharactersBlock::continuous
		|| dt == NxsCharactersBlock::mixed
		|| (g == '\0' &&  !gGapCodeAllPresentSites))
		{
		cb->WriteAsNexus(out);
		writeCoreAssumptions(out, cb, baseTitle.c_str());
		return;
		}
	bool isAutogen = cb->IsAutoGeneratedTitle();
	std::string newTitle = baseTitle;
	newTitle.append("GapsAsMissing");
	cb->SetTitle(newTitle, isAutogen);

	std::set<unsigned> gappedColumns;
	std::set<unsigned> residueColumns;

	out << "BEGIN CHARACTERS;\n";
	cb->WriteBasicBlockCommands(out);

	const unsigned ntaxTotal = tb->GetNTax();
	out << "    DIMENSIONS";
	if (tb)
		{
		const unsigned wod = cb->GetNTaxWithData();
		if (wod > 0 && wod != ntaxTotal)
			out << " NTax=" << wod;
		}
	const unsigned nc = cb->GetNCharTotal();
	out << " NChar=" << nc << ";\n";
	cb->WriteEliminateCommand(out);
	cb->SetGapSymbol('\0');
	cb->WriteFormatCommand(out);
	cb->SetGapSymbol(g);

	cb->WriteCharStateLabelsCommand(out);

	std::vector<int>			intWts;
	std::vector<double>			dblWts;
	NxsUnsignedSet activeExSet = cb->GetExcludedIndexSet();

	const NxsTransformationManager &tm = cb->GetNxsTransformationManagerRef();
	intWts = tm.GetDefaultIntWeights();
	if (intWts.empty())
		dblWts = tm.GetDefaultDoubleWeights();

	unsigned width = tb->GetMaxTaxonLabelLength();
	out << "Matrix\n";
	unsigned begCharInd = 0;
	unsigned endCharInd = nc;
	for (unsigned i = 0; i < ntaxTotal; i++)
		{
		if (cb->TaxonIndHasData(i))
			{
			const std::string currTaxonLabel = NxsString::GetEscaped(tb->GetTaxonLabel(i));
			out << currTaxonLabel;
			unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
			unsigned diff = width - currTaxonLabelLen;
			for (unsigned k = 0; k < diff + 5; k++)
				out << ' ';

			const NxsDiscreteStateRow & row = cb->GetDiscreteMatrixRow(i);
			if (!row.empty())
				{
				const NxsDiscreteDatatypeMapper * dm = cb->GetDatatypeMapperForChar(0);
				if (dm == NULL)
					throw NxsNCLAPIException("No DatatypeMapper in WriteStatesForTaxonAsNexus");
				if (cb->IsMixedType())
					throw NxsNCLAPIException("Mixed datatypes are not supported by the NEXUSgapcode program.");
				if (cb->IsTokens())
					{
					for (unsigned charInd = begCharInd; charInd < endCharInd; ++charInd)
						{
						int sc = row[charInd];
						out << ' ';
						if (sc == NXS_GAP_STATE_CODE)
							out << dm->GetMissingSymbol();
						else
							{
							NxsString sl = cb->GetStateLabel(charInd, sc); /*v2.1to2.2 2 */
							if (sl == " ")
								{
								NxsString errormsg = "Writing character state ";
								errormsg << 1 + sc << " for character " << 1+charInd << ", but no appropriate chararcter label or symbol was found.";
								throw NxsNCLAPIException(errormsg);
								}
							else
								out  << NxsString::GetEscaped(sl);
							}
						}
					}
				else
					{
					std::vector<NxsDiscreteStateCell>::const_iterator endIt = row.begin() + begCharInd;
					std::vector<NxsDiscreteStateCell>::const_iterator begIt = endIt;
					if (endCharInd == row.size())
						endIt = row.end();
					else
						endIt += endCharInd - begCharInd;
					unsigned j = begCharInd;
					for (; begIt != endIt; ++begIt, ++j)
						{
						NxsDiscreteStateCell c;
						if (*begIt == NXS_GAP_STATE_CODE)
							{
							c = NXS_MISSING_CODE;
							gappedColumns.insert(j);
							}
						else
							{
							c = *begIt;
							if (gGapCodeAllPresentSites)
								gappedColumns.insert(j);
							residueColumns.insert(j);
							}
						dm->WriteStateCodeAsNexusString(out, c, true);
						}
					}
				}
			out << '\n';
			}
		}
	out << ";\n";
	cb->WriteSkippedCommands(out);
	out << "END;\n";


	writeCoreAssumptions(out, cb, newTitle.c_str());
	const unsigned nGappedCols = gappedColumns.size();
	if (nGappedCols > 0)
		{
		newTitle = baseTitle;
		newTitle.append("GapsAsBinary");
		cb->SetTitle(newTitle, isAutogen);
			out << "BEGIN CHARACTERS;\n";
		cb->WriteBasicBlockCommands(out);

		out << "    DIMENSIONS";
		if (tb)
			{
			const unsigned wod = cb->GetNTaxWithData();
			if (wod > 0 && wod != ntaxTotal)
				out << " NTax=" << wod;
			}
		out << " NChar=" << nGappedCols << ";\n";
		out << " CharStateLabels " ;
		unsigned currChNumber = 1;
		std::set<unsigned>::iterator gcIt = gappedColumns.begin();
		out << currChNumber++ << " col_" << (1 + *gcIt);
		for (++gcIt ; gcIt != gappedColumns.end(); ++gcIt)
			out << ",\n    " << currChNumber++ << " col_" << (1 + *gcIt);
		out << " ;\n" ;
		out << "Format Datatype = Standard Symbols=\"01\" missing = '?' ;\n";

		out << "Matrix\n";
		for (unsigned i = 0; i < ntaxTotal; i++)
			{
			if (cb->TaxonIndHasData(i))
				{
				const std::string currTaxonLabel = NxsString::GetEscaped(tb->GetTaxonLabel(i));
				out << currTaxonLabel;
				unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
				unsigned diff = width - currTaxonLabelLen;
				for (unsigned k = 0; k < diff + 5; k++)
					out << ' ';

				const NxsDiscreteStateRow & row = cb->GetDiscreteMatrixRow(i);
				for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt)
					{
					NxsDiscreteStateCell sc = row[*cIt];
					if (sc == NXS_GAP_STATE_CODE)
						{
						if (gGapCodeAllAbsentSites || (residueColumns.find(*cIt) != residueColumns.end()))
							out << '0';
						else
							out << '?';
						}
					else if (sc == NXS_MISSING_CODE) 
						{
						out << '?';
						}
					else 
						{
						out << '1';
						}
					}
				out << '\n';
				}
			}
		out << ";\n";
		cb->WriteSkippedCommands(out);
		out << "END;\n";
		}

	std::vector<int>			gapintWts;
	std::vector<double>			gapdblWts;
	std::set<unsigned>			gapactiveExSet;
	if (!activeExSet.empty())
		{
		unsigned gapind = 0;
		for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt, ++gapind)
			{
			if (activeExSet.find(*cIt) != activeExSet.end())
				gapactiveExSet.insert(gapind);
			}
		}
	if (!intWts.empty())
		{
		for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt)
			gapintWts.push_back(intWts[*cIt]);
		}
	if (!dblWts.empty())
		{
		for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt)
			gapdblWts.push_back(dblWts[*cIt]);
		}

	if (!(gapactiveExSet.empty() && gapintWts.empty() && gapdblWts.empty()))
		{
		out << "BEGIN ASSUMPTIONS; \n    LINK CHARACTERS = ";
		out << NxsString::GetEscaped(newTitle) << " ;\n";
		if (!gapactiveExSet.empty())
			{
			NxsString exsetName;
			exsetName << newTitle;
			exsetName.append("GapExSet");
			NxsUnsignedSetMap m;
			m[exsetName] = gapactiveExSet;
			NxsWriteSetCommand("EXSET", m, out, exsetName.c_str());;
			}
		if (!(gapintWts.empty() && gapdblWts.empty()))
			{
			NxsTransformationManager &cbntm = cb->GetNxsTransformationManagerRef();
			std::string wtSetName =  cbntm.GetDefaultWeightSetName();
			wtSetName.append("GapWtSet");
			NxsTransformationManager ntm;
			if (!gapintWts.empty())
				{
				NxsTransformationManager::ListOfIntWeights iw;
				vecToListOfIntWeights(gapintWts, iw);
				ntm.AddIntWeightSet(wtSetName, iw, true);
				}
			else
				{
				NxsTransformationManager::ListOfDblWeights dw;
				vecToListOfDblWeights(gapdblWts, dw);
				ntm.AddRealWeightSet(wtSetName, dw, true);
				}
			ntm.WriteWtSet(out);
			}
		out << "END;\n";
		}


	cb->SetTitle(baseTitle, isAutogen);
}
Пример #4
0
void NxsSetReader::ReadSetDefinition(
  NxsToken &token, 
  const NxsLabelToIndicesMapper & mapper, 
  const char * setType, /* "TAXON" or "CHARACTER" -- for error messages only */ 
  const char * cmdName, /* command name -- "TAXSET" or "EXSET"-- for error messages only */ 
  NxsUnsignedSet * destination, /** to be filled */
  const NxsUnsignedSet * taboo)
	{
	NxsString errormsg;
	NxsUnsignedSet tmpset;
	NxsUnsignedSet dummy;
	if (destination == NULL)
		destination = & dummy;
	unsigned previousInd = UINT_MAX;
	std::vector<unsigned> intersectVec;
	while (!token.Equals(";"))
		{
		if (taboo && token.Equals(","))
			return;
		if (token.Equals("-"))
			{
			if (previousInd == UINT_MAX)
				{
				errormsg = "The '-' must be preceded by number or a ";
				errormsg << setType << " label in the " << cmdName << " command.";
				throw NxsException(errormsg, token);
				}
			token.GetNextToken();
			if (token.Equals(";") || token.Equals("\\"))
				{
				errormsg = "Range in the ";
				errormsg << setType << " set definition in the " << cmdName << " command must be closed with a number or label.";
				throw NxsException(errormsg, token);
				}
			unsigned endpoint;
			if (token.Equals("."))
				endpoint = mapper.GetMaxIndex();
			else
				{
				tmpset.clear();
				unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
				if (nAdded != 1)
					{
					errormsg = "End of a range in a ";
					errormsg << setType << " set definition in the " << cmdName << " command must be closed with a single number or label (not a set).";
					throw NxsException(errormsg, token);
					}
				endpoint = *(tmpset.begin());
				if (endpoint < previousInd)
					{
					errormsg = "End of a range in a ";
					errormsg << setType << " set definition in the " << cmdName << " command must be a larger index than the start of the range (found ";
					errormsg << previousInd + 1 << " - " << token.GetToken();
					throw NxsException(errormsg, token);
					}
				}
			token.GetNextToken();
			if (token.Equals("\\"))
				{
				token.GetNextToken();
				NxsString t = token.GetToken(); 
				unsigned stride = 0;
				try
					{
					stride = t.ConvertToUnsigned();
					}
				catch (const NxsString::NxsX_NotANumber &)
					{}
				if (stride == 0)
					{
					errormsg = "Expecting a positive number indicating the 'stride' after the \\ in the ";
					errormsg << setType << " set definition in the " << cmdName << " command. Encountered ";
					errormsg << t;
					throw NxsException(errormsg, token);
					}
				AddRangeToSet(previousInd, endpoint, stride, destination, taboo, token);
				token.GetNextToken();
				}
			else
				AddRangeToSet(previousInd, endpoint, 1, destination, taboo, token);
			previousInd = UINT_MAX;
			}
		else 
			{
			tmpset.clear();
			const unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
			if (taboo != NULL)
				{
				set_intersection(taboo->begin(), taboo->end(), tmpset.begin(), tmpset.end(), back_inserter(intersectVec));
				if (!intersectVec.empty())
					{
					errormsg << "Illegal repitition of an index (" << 1 + *(intersectVec.begin()) << ") in multiple subsets.";
					throw NxsException(errormsg, token);
					}
				}
			if (nAdded == 1 )
				{
				previousInd = *(tmpset.begin());
				destination->insert(previousInd);
				}
			else
				{
				previousInd = UINT_MAX;
				destination->insert(tmpset.begin(), tmpset.end());
				}
			token.GetNextToken();
			}
		}
	}
Пример #5
0
void NxsSetReader::WriteSetAsNexusValue(const NxsUnsignedSet & nxsset, std::ostream & out)
	{
	NxsUnsignedSet::const_iterator currIt = nxsset.begin();
	const NxsUnsignedSet::const_iterator endIt = nxsset.end();
	if (currIt == endIt)
		return;
	unsigned rangeBegin = 1 + *currIt++;
	if (currIt == endIt)
		{
		out << ' ' << rangeBegin;
		return;
		}
	unsigned prev = 1 + *currIt++;
	if (currIt == endIt)
		{
		out << ' ' << rangeBegin << ' ' << prev;
		return;
		}
	unsigned stride = prev - rangeBegin;
	unsigned curr = 1 + *currIt++;
	bool inRange = true;
	while (currIt != endIt)
		{
		if (inRange)
			{
			if (curr - prev != stride)
				{
				if (prev - rangeBegin == stride)
					{
					out << ' ' << rangeBegin;
					rangeBegin = prev;
					stride = curr - prev;
					}
				else
					{
					if (stride > 1)
						out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
					else
						out << ' ' << rangeBegin << '-' << prev ;
					inRange = false;
					}
				}
			}
		else
			{
			inRange = true;
			rangeBegin = prev;
			stride = curr - prev;
			}
		prev = curr;
		curr = 1 + *currIt;
		currIt++;
		}
	if (inRange)
		{
		if (curr - prev != stride)
			{
			if (prev - rangeBegin == stride)
				out << ' ' << rangeBegin << ' ' << prev;
			else
				{
				if (stride > 1)
					out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
				else
					out << ' ' << rangeBegin << '-' << prev ;
				}
			out << ' ' << curr;
			}
		else
			{
			if (stride > 1)
				out << ' ' << rangeBegin << '-' << curr << " \\ " << stride;
			else
				out << ' ' << rangeBegin << '-' << curr ;
			}
		}
	else
		out << ' ' << prev << ' ' << curr;
	}