/** | returns the number of indices added. */ unsigned NxsSetReader::InterpretTokenAsIndices(NxsToken &token, const NxsLabelToIndicesMapper & mapper, const char * setType, const char * cmdName, NxsUnsignedSet * destination) { try { const std::string t = token.GetToken(); if (NxsString::case_insensitive_equals(t.c_str(), "ALL")) { unsigned m = mapper.GetMaxIndex(); NxsUnsignedSet s; for (unsigned i = 0; i <= m; ++i) s.insert(i); destination->insert(s.begin(), s.end()); return (unsigned)s.size(); } return mapper.GetIndicesForLabel(t, destination); } catch (const NxsException & x) { NxsString errormsg = "Error in the "; errormsg << setType << " descriptor of a " << cmdName << " command.\n"; errormsg += x.msg; throw NxsException(errormsg, token); } catch (...) { NxsString errormsg = "Expecting a "; errormsg << setType << " descriptor (number or label) in the " << cmdName << ". Encountered "; errormsg << token.GetToken(); throw NxsException(errormsg, token); } }
std::vector<unsigned> NxsSetReader::GetSetAsVector(const NxsUnsignedSet &s) { std::vector<unsigned> u; u.reserve(s.size()); for (NxsUnsignedSet::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt) u.push_back(*sIt); return u; }
void writeCharactersAsGapped(ostream &out, NxsCharactersBlock * cb) { if (!cb ) return; NxsTaxaBlockAPI *tb = cb->GetTaxaBlockPtr(NULL); if (tb == NULL) return; NxsCharactersBlock::DataTypesEnum dt = cb->GetDataType(); const char g = cb->GetGapSymbol(); const std::string baseTitle = cb->GetTitle(); if (tb == NULL || dt == NxsCharactersBlock::standard || dt == NxsCharactersBlock::continuous || dt == NxsCharactersBlock::mixed || (g == '\0' && !gGapCodeAllPresentSites)) { cb->WriteAsNexus(out); writeCoreAssumptions(out, cb, baseTitle.c_str()); return; } bool isAutogen = cb->IsAutoGeneratedTitle(); std::string newTitle = baseTitle; newTitle.append("GapsAsMissing"); cb->SetTitle(newTitle, isAutogen); std::set<unsigned> gappedColumns; std::set<unsigned> residueColumns; out << "BEGIN CHARACTERS;\n"; cb->WriteBasicBlockCommands(out); const unsigned ntaxTotal = tb->GetNTax(); out << " DIMENSIONS"; if (tb) { const unsigned wod = cb->GetNTaxWithData(); if (wod > 0 && wod != ntaxTotal) out << " NTax=" << wod; } const unsigned nc = cb->GetNCharTotal(); out << " NChar=" << nc << ";\n"; cb->WriteEliminateCommand(out); cb->SetGapSymbol('\0'); cb->WriteFormatCommand(out); cb->SetGapSymbol(g); cb->WriteCharStateLabelsCommand(out); std::vector<int> intWts; std::vector<double> dblWts; NxsUnsignedSet activeExSet = cb->GetExcludedIndexSet(); const NxsTransformationManager &tm = cb->GetNxsTransformationManagerRef(); intWts = tm.GetDefaultIntWeights(); if (intWts.empty()) dblWts = tm.GetDefaultDoubleWeights(); unsigned width = tb->GetMaxTaxonLabelLength(); out << "Matrix\n"; unsigned begCharInd = 0; unsigned endCharInd = nc; for (unsigned i = 0; i < ntaxTotal; i++) { if (cb->TaxonIndHasData(i)) { const std::string currTaxonLabel = NxsString::GetEscaped(tb->GetTaxonLabel(i)); out << currTaxonLabel; unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size(); unsigned diff = width - currTaxonLabelLen; for (unsigned k = 0; k < diff + 5; k++) out << ' '; const NxsDiscreteStateRow & row = cb->GetDiscreteMatrixRow(i); if (!row.empty()) { const NxsDiscreteDatatypeMapper * dm = cb->GetDatatypeMapperForChar(0); if (dm == NULL) throw NxsNCLAPIException("No DatatypeMapper in WriteStatesForTaxonAsNexus"); if (cb->IsMixedType()) throw NxsNCLAPIException("Mixed datatypes are not supported by the NEXUSgapcode program."); if (cb->IsTokens()) { for (unsigned charInd = begCharInd; charInd < endCharInd; ++charInd) { int sc = row[charInd]; out << ' '; if (sc == NXS_GAP_STATE_CODE) out << dm->GetMissingSymbol(); else { NxsString sl = cb->GetStateLabel(charInd, sc); /*v2.1to2.2 2 */ if (sl == " ") { NxsString errormsg = "Writing character state "; errormsg << 1 + sc << " for character " << 1+charInd << ", but no appropriate chararcter label or symbol was found."; throw NxsNCLAPIException(errormsg); } else out << NxsString::GetEscaped(sl); } } } else { std::vector<NxsDiscreteStateCell>::const_iterator endIt = row.begin() + begCharInd; std::vector<NxsDiscreteStateCell>::const_iterator begIt = endIt; if (endCharInd == row.size()) endIt = row.end(); else endIt += endCharInd - begCharInd; unsigned j = begCharInd; for (; begIt != endIt; ++begIt, ++j) { NxsDiscreteStateCell c; if (*begIt == NXS_GAP_STATE_CODE) { c = NXS_MISSING_CODE; gappedColumns.insert(j); } else { c = *begIt; if (gGapCodeAllPresentSites) gappedColumns.insert(j); residueColumns.insert(j); } dm->WriteStateCodeAsNexusString(out, c, true); } } } out << '\n'; } } out << ";\n"; cb->WriteSkippedCommands(out); out << "END;\n"; writeCoreAssumptions(out, cb, newTitle.c_str()); const unsigned nGappedCols = gappedColumns.size(); if (nGappedCols > 0) { newTitle = baseTitle; newTitle.append("GapsAsBinary"); cb->SetTitle(newTitle, isAutogen); out << "BEGIN CHARACTERS;\n"; cb->WriteBasicBlockCommands(out); out << " DIMENSIONS"; if (tb) { const unsigned wod = cb->GetNTaxWithData(); if (wod > 0 && wod != ntaxTotal) out << " NTax=" << wod; } out << " NChar=" << nGappedCols << ";\n"; out << " CharStateLabels " ; unsigned currChNumber = 1; std::set<unsigned>::iterator gcIt = gappedColumns.begin(); out << currChNumber++ << " col_" << (1 + *gcIt); for (++gcIt ; gcIt != gappedColumns.end(); ++gcIt) out << ",\n " << currChNumber++ << " col_" << (1 + *gcIt); out << " ;\n" ; out << "Format Datatype = Standard Symbols=\"01\" missing = '?' ;\n"; out << "Matrix\n"; for (unsigned i = 0; i < ntaxTotal; i++) { if (cb->TaxonIndHasData(i)) { const std::string currTaxonLabel = NxsString::GetEscaped(tb->GetTaxonLabel(i)); out << currTaxonLabel; unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size(); unsigned diff = width - currTaxonLabelLen; for (unsigned k = 0; k < diff + 5; k++) out << ' '; const NxsDiscreteStateRow & row = cb->GetDiscreteMatrixRow(i); for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt) { NxsDiscreteStateCell sc = row[*cIt]; if (sc == NXS_GAP_STATE_CODE) { if (gGapCodeAllAbsentSites || (residueColumns.find(*cIt) != residueColumns.end())) out << '0'; else out << '?'; } else if (sc == NXS_MISSING_CODE) { out << '?'; } else { out << '1'; } } out << '\n'; } } out << ";\n"; cb->WriteSkippedCommands(out); out << "END;\n"; } std::vector<int> gapintWts; std::vector<double> gapdblWts; std::set<unsigned> gapactiveExSet; if (!activeExSet.empty()) { unsigned gapind = 0; for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt, ++gapind) { if (activeExSet.find(*cIt) != activeExSet.end()) gapactiveExSet.insert(gapind); } } if (!intWts.empty()) { for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt) gapintWts.push_back(intWts[*cIt]); } if (!dblWts.empty()) { for (std::set<unsigned>::iterator cIt = gappedColumns.begin(); cIt != gappedColumns.end(); ++cIt) gapdblWts.push_back(dblWts[*cIt]); } if (!(gapactiveExSet.empty() && gapintWts.empty() && gapdblWts.empty())) { out << "BEGIN ASSUMPTIONS; \n LINK CHARACTERS = "; out << NxsString::GetEscaped(newTitle) << " ;\n"; if (!gapactiveExSet.empty()) { NxsString exsetName; exsetName << newTitle; exsetName.append("GapExSet"); NxsUnsignedSetMap m; m[exsetName] = gapactiveExSet; NxsWriteSetCommand("EXSET", m, out, exsetName.c_str());; } if (!(gapintWts.empty() && gapdblWts.empty())) { NxsTransformationManager &cbntm = cb->GetNxsTransformationManagerRef(); std::string wtSetName = cbntm.GetDefaultWeightSetName(); wtSetName.append("GapWtSet"); NxsTransformationManager ntm; if (!gapintWts.empty()) { NxsTransformationManager::ListOfIntWeights iw; vecToListOfIntWeights(gapintWts, iw); ntm.AddIntWeightSet(wtSetName, iw, true); } else { NxsTransformationManager::ListOfDblWeights dw; vecToListOfDblWeights(gapdblWts, dw); ntm.AddRealWeightSet(wtSetName, dw, true); } ntm.WriteWtSet(out); } out << "END;\n"; } cb->SetTitle(baseTitle, isAutogen); }
void NxsSetReader::ReadSetDefinition( NxsToken &token, const NxsLabelToIndicesMapper & mapper, const char * setType, /* "TAXON" or "CHARACTER" -- for error messages only */ const char * cmdName, /* command name -- "TAXSET" or "EXSET"-- for error messages only */ NxsUnsignedSet * destination, /** to be filled */ const NxsUnsignedSet * taboo) { NxsString errormsg; NxsUnsignedSet tmpset; NxsUnsignedSet dummy; if (destination == NULL) destination = & dummy; unsigned previousInd = UINT_MAX; std::vector<unsigned> intersectVec; while (!token.Equals(";")) { if (taboo && token.Equals(",")) return; if (token.Equals("-")) { if (previousInd == UINT_MAX) { errormsg = "The '-' must be preceded by number or a "; errormsg << setType << " label in the " << cmdName << " command."; throw NxsException(errormsg, token); } token.GetNextToken(); if (token.Equals(";") || token.Equals("\\")) { errormsg = "Range in the "; errormsg << setType << " set definition in the " << cmdName << " command must be closed with a number or label."; throw NxsException(errormsg, token); } unsigned endpoint; if (token.Equals(".")) endpoint = mapper.GetMaxIndex(); else { tmpset.clear(); unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset); if (nAdded != 1) { errormsg = "End of a range in a "; errormsg << setType << " set definition in the " << cmdName << " command must be closed with a single number or label (not a set)."; throw NxsException(errormsg, token); } endpoint = *(tmpset.begin()); if (endpoint < previousInd) { errormsg = "End of a range in a "; errormsg << setType << " set definition in the " << cmdName << " command must be a larger index than the start of the range (found "; errormsg << previousInd + 1 << " - " << token.GetToken(); throw NxsException(errormsg, token); } } token.GetNextToken(); if (token.Equals("\\")) { token.GetNextToken(); NxsString t = token.GetToken(); unsigned stride = 0; try { stride = t.ConvertToUnsigned(); } catch (const NxsString::NxsX_NotANumber &) {} if (stride == 0) { errormsg = "Expecting a positive number indicating the 'stride' after the \\ in the "; errormsg << setType << " set definition in the " << cmdName << " command. Encountered "; errormsg << t; throw NxsException(errormsg, token); } AddRangeToSet(previousInd, endpoint, stride, destination, taboo, token); token.GetNextToken(); } else AddRangeToSet(previousInd, endpoint, 1, destination, taboo, token); previousInd = UINT_MAX; } else { tmpset.clear(); const unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset); if (taboo != NULL) { set_intersection(taboo->begin(), taboo->end(), tmpset.begin(), tmpset.end(), back_inserter(intersectVec)); if (!intersectVec.empty()) { errormsg << "Illegal repitition of an index (" << 1 + *(intersectVec.begin()) << ") in multiple subsets."; throw NxsException(errormsg, token); } } if (nAdded == 1 ) { previousInd = *(tmpset.begin()); destination->insert(previousInd); } else { previousInd = UINT_MAX; destination->insert(tmpset.begin(), tmpset.end()); } token.GetNextToken(); } } }
void NxsSetReader::WriteSetAsNexusValue(const NxsUnsignedSet & nxsset, std::ostream & out) { NxsUnsignedSet::const_iterator currIt = nxsset.begin(); const NxsUnsignedSet::const_iterator endIt = nxsset.end(); if (currIt == endIt) return; unsigned rangeBegin = 1 + *currIt++; if (currIt == endIt) { out << ' ' << rangeBegin; return; } unsigned prev = 1 + *currIt++; if (currIt == endIt) { out << ' ' << rangeBegin << ' ' << prev; return; } unsigned stride = prev - rangeBegin; unsigned curr = 1 + *currIt++; bool inRange = true; while (currIt != endIt) { if (inRange) { if (curr - prev != stride) { if (prev - rangeBegin == stride) { out << ' ' << rangeBegin; rangeBegin = prev; stride = curr - prev; } else { if (stride > 1) out << ' ' << rangeBegin << '-' << prev << " \\ " << stride; else out << ' ' << rangeBegin << '-' << prev ; inRange = false; } } } else { inRange = true; rangeBegin = prev; stride = curr - prev; } prev = curr; curr = 1 + *currIt; currIt++; } if (inRange) { if (curr - prev != stride) { if (prev - rangeBegin == stride) out << ' ' << rangeBegin << ' ' << prev; else { if (stride > 1) out << ' ' << rangeBegin << '-' << prev << " \\ " << stride; else out << ' ' << rangeBegin << '-' << prev ; } out << ' ' << curr; } else { if (stride > 1) out << ' ' << rangeBegin << '-' << curr << " \\ " << stride; else out << ' ' << rangeBegin << '-' << curr ; } } else out << ' ' << prev << ' ' << curr; }