docstring sgml::cleanID(Buffer const & buf, OutputParams const & runparams, docstring const & orig) { // The standard DocBook SGML declaration only allows letters, // digits, '-' and '.' in a name. // Since users might change that declaration one has to cater // for additional allowed characters. // This routine replaces illegal characters by '-' or '.' // and adds a number for uniqueness. // If you know what you are doing, you can set allowed=="" // to disable this mangling. DocumentClass const & tclass = buf.params().documentClass(); docstring const allowed = from_ascii( runparams.flavor == OutputParams::XML ? ".-_:" : tclass.options()); if (allowed.empty()) return orig; docstring::const_iterator it = orig.begin(); docstring::const_iterator end = orig.end(); docstring content; // FIXME THREAD typedef map<docstring, docstring> MangledMap; static MangledMap mangledNames; static int mangleID = 1; MangledMap::const_iterator const known = mangledNames.find(orig); if (known != mangledNames.end()) return known->second; // make sure it starts with a letter if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size()) content += "x"; bool mangle = false; for (; it != end; ++it) { char_type c = *it; if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || allowed.find(c) < allowed.size()) content += c; else if (c == '_' || c == ' ') { mangle = true; content += "-"; } else if (c == ':' || c == ',' || c == ';' || c == '!') { mangle = true; content += "."; } else { mangle = true; } } if (mangle) content += "-" + convert<docstring>(mangleID++); else if (isDigitASCII(content[content.size() - 1])) content += "."; mangledNames[orig] = content; return content; }
bool Token::isAlnumASCII() const { return cat_ == catLetter || (cat_ == catOther && cs_.length() == 1 && isDigitASCII(cs_[0])); }