void FStringConverter::ConvertString(const TCHAR* Source, const int32 SourceStartIndex, const int32 SourceLen, icu::UnicodeString& Destination, const bool ShouldNullTerminate)
	{
		if (SourceLen > 0)
		{
			UErrorCode ICUStatus = U_ZERO_ERROR;

			ucnv_reset(ICUConverter);

			// Get the internal buffer of the string, we're going to use it as scratch space
			const int32_t DestinationCapacityUChars = SourceLen * 2;
			UChar* InternalStringBuffer = Destination.getBuffer(DestinationCapacityUChars);

			// Perform the conversion into the string buffer
			const int32_t SourceSizeBytes = SourceLen * sizeof(TCHAR);
			const int32_t DestinationLength = ucnv_toUChars(ICUConverter, InternalStringBuffer, DestinationCapacityUChars, reinterpret_cast<const char*>(Source + SourceStartIndex), SourceSizeBytes, &ICUStatus);

			// Optionally null terminate the string
			if (ShouldNullTerminate)
			{
				InternalStringBuffer[DestinationLength] = 0;
			}

			// Size it back down to the correct size and release our lock on the string buffer
			Destination.releaseBuffer(DestinationLength);

			check(U_SUCCESS(ICUStatus));
		}
		else
		{
			Destination.remove();
		}
	}
Example #2
0
U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &text, 
                          int32_t *position,
                          UErrorCode *status) {
    int32_t result = uspoof_check(sc, text.getBuffer(), text.length(), position, status);
    return result;
}
	int32 GetUnicodeStringLengthImpl(const TCHAR* Source, const int32 InSourceStartIndex, const int32 InSourceLength)
	{
		if (InSourceLength > 0)
		{
			const icu::UnicodeString TmpStr = ConvertString(Source, InSourceStartIndex, InSourceLength);
			return TmpStr.length();
		}
		return 0;
	}
/**
 * Write an Unitex file content (to system filesystem or filespace)
 * it write from two buffer (prefix and suffix). This is useful for writing both header and footer (or BOM and text...)
 */
    UNITEX_FUNC int UNITEX_CALL WriteUnicodeUnitexFile(const char*filename, icu::UnicodeString const& uString)
    {
        UChar uBom = 0xfeff;

        const UChar * uBuffer = uString.getBuffer();
        int32_t uLength = uString.length();

        bool result = WriteUnitexFile(filename, &uBom, sizeof(UChar), uBuffer, uLength * sizeof(UChar)) == 0;

        return result;
    }
Example #5
0
U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
                                  const icu::UnicodeString &s1,
                                  const icu::UnicodeString &s2,
                                  UErrorCode *status) {

    const UChar *u1  = s1.getBuffer();
    int32_t  length1 = s1.length();
    const UChar *u2  = s2.getBuffer();
    int32_t  length2 = s2.length();

    int32_t results  = uspoof_areConfusable(sc, u1, length1, u2, length2, status);
    return results;
}
	void FStringConverter::ConvertString(const icu::UnicodeString& Source, const int32 SourceStartIndex, const int32 SourceLen, FString& Destination)
	{
		if (Source.length() > 0)
		{
			UErrorCode ICUStatus = U_ZERO_ERROR;

			ucnv_reset(ICUConverter);
			
			// Get the internal buffer of the string, we're going to use it as scratch space
			TArray<TCHAR>& InternalStringBuffer = Destination.GetCharArray();
				
			// Work out the maximum size required and resize the buffer so it can hold enough data
			const int32_t DestinationCapacityBytes = UCNV_GET_MAX_BYTES_FOR_STRING(SourceLen, ucnv_getMaxCharSize(ICUConverter));
			const int32 DestinationCapacityTCHARs = DestinationCapacityBytes / sizeof(TCHAR);
			InternalStringBuffer.SetNumUninitialized(DestinationCapacityTCHARs);

			// Perform the conversion into the string buffer, and then null terminate the FString and size it back down to the correct size
			const int32_t DestinationSizeBytes = ucnv_fromUChars(ICUConverter, reinterpret_cast<char*>(InternalStringBuffer.GetData()), DestinationCapacityBytes, Source.getBuffer() + SourceStartIndex, SourceLen, &ICUStatus);
			const int32 DestinationSizeTCHARs = DestinationSizeBytes / sizeof(TCHAR);
			InternalStringBuffer[DestinationSizeTCHARs] = 0;
			InternalStringBuffer.SetNum(DestinationSizeTCHARs + 1, /*bAllowShrinking*/false); // the array size includes null

			check(U_SUCCESS(ICUStatus));
		}
		else
		{
			Destination.Empty();
		}
	}
Example #7
0
bool ustring_from_char(icu::UnicodeString& ret,
                       const String& str,
                       UErrorCode &error) {
  int32_t capacity = str.size() + 1;
  UChar *utf16 = ret.getBuffer(capacity);
  int32_t utf16_len = 0;
  error = U_ZERO_ERROR;
  u_strFromUTF8WithSub(utf16, ret.getCapacity(), &utf16_len,
                       str.c_str(), str.size(),
                       U_SENTINEL /* no substitution */,
                       nullptr, &error);
  ret.releaseBuffer(utf16_len);
  if (U_FAILURE(error)) {
    ret.setToBogus();
    return false;
  }
  return true;
}
// --------------------------------------------------------------------------
void
processor::on_start_tag_type (
 icu::UnicodeString const& type
)
// --------------------------------------------------------------------------
{
  element_info info;
  
  if (!m_character_data.isEmpty())
  {
    character_data(m_character_data);
    m_character_data.remove();
  }

  info.type = type;
  info.child_counter = 0;
  
  if (m_element_info.empty())
  {
    if (m_validating
     && !m_document_type.m_root_type.isEmpty()
     && type != m_document_type.m_root_type)
    {
      std::string msg;

      msg += "Root element type does not match the document type.\n";
      msg += "Document type name: ";
      m_document_type.m_root_type.toUTF8String(msg);
      msg += "\nRoot element type: ";
      type.toUTF8String(msg);

      throw semantic_error(msg);
    }

    info.xmlns[""] = uri();
    info.base = m_base_iri;
    info.space = false;
  }
  else
  {
    if (m_validating)
    {
      throw not_implemented("Element validity checking.");
    }

    ++m_element_info.top().child_counter;
    
    info.xmlns = m_element_info.top().xmlns;
    info.base = m_element_info.top().base;
    info.lang = m_element_info.top().lang;
    info.space = m_element_info.top().space;
  }

  m_element_info.push(info);
  m_sax_attrs.clear();
}
Example #9
0
static bool ustring_from_char(icu::UnicodeString& ret,
                              const String& str,
                              UErrorCode &error) {
  error = U_ZERO_ERROR;
  ret = u16(str, error, U_SENTINEL);
  if (U_FAILURE(error)) {
    ret.setToBogus();
    return false;
  }
  return true;
}
	void ConvertString(const FString& Source, icu::UnicodeString& Destination, const bool ShouldNullTerminate)
	{
		if (Source.Len() > 0)
		{
			FStringConverter StringConverter;
			StringConverter.ConvertString(Source, Destination, ShouldNullTerminate);
		}
		else
		{
			Destination.remove();
		}
	}
	void ConvertString(const TCHAR* Source, const int32 SourceStartIndex, const int32 SourceLen, icu::UnicodeString& Destination, const bool ShouldNullTerminate)
	{
		if (SourceLen > 0)
		{
			FStringConverter StringConverter;
			StringConverter.ConvertString(Source, SourceStartIndex, SourceLen, Destination, ShouldNullTerminate);
		}
		else
		{
			Destination.remove();
		}
	}
Example #12
0
static void
printLine(UChar32 start, UChar32 end, Status status, const icu::UnicodeString &mapping) {
    if(start==end) {
        printf("%04lX          ", (long)start);
    } else {
        printf("%04lX..%04lX    ", (long)start, (long)end);
    }
    printf("; %s", statusNames[status]);
    if(status==MAPPED || status==DEVIATION || !mapping.isEmpty()) {
        printf(" ;");
        const UChar *buffer=mapping.getBuffer();
        int32_t length=mapping.length();
        int32_t i=0;
        UChar32 c;
        while(i<length) {
            U16_NEXT(buffer, i, length, c);
            printf(" %04lX", (long)c);
        }
    }
    puts("");
}
// --------------------------------------------------------------------------
void
processor::on_end_tag (
 icu::UnicodeString const& type
)
// --------------------------------------------------------------------------
{
  if (!m_character_data.isEmpty())
  {
    character_data(m_character_data);
    m_character_data.remove();
  }
  
  if (type != m_element_info.top().type)
  {
    std::string msg, tree;

    msg += "STag-ETag name mismatch.\n";
    msg += "ETag name: ";
    type.toUTF8String(msg);

    while (!m_element_info.empty())
    {
      std::string tmp;
      
      m_element_info.top().type.toUTF8String(tmp);
      m_element_info.pop();
      tree = "/" + tmp + tree;
    }
    
    msg += "\nElement tree: " + tree;

    throw semantic_error(msg);
  }

  element_end();

  if (!m_element_info.empty())
  {
    m_element_info.pop();
  }

  // Update current element's variables.
  if (!m_element_info.empty())
  {
    m_element.assign(m_element_info.top().type, m_element_info.top().xmlns);
    m_attributes.clear();
    m_base_iri = m_element_info.top().base;
    m_language = m_element_info.top().lang;
    m_preserve_space = m_element_info.top().space;
  }
}
Example #14
0
static int
toIDNA2003(const UStringPrepProfile *prep, UChar32 c, icu::UnicodeString &destString) {
    UChar src[2];
    int32_t srcLength=0;
    U16_APPEND_UNSAFE(src, srcLength, c);
    UChar *dest;
    int32_t destLength;
    dest=destString.getBuffer(32);
    if(dest==NULL) {
        return FALSE;
    }
    UErrorCode errorCode=U_ZERO_ERROR;
    destLength=usprep_prepare(prep, src, srcLength,
                              dest, destString.getCapacity(),
                              USPREP_DEFAULT, NULL, &errorCode);
    destString.releaseBuffer(destLength);
    if(errorCode==U_STRINGPREP_PROHIBITED_ERROR) {
        return -1;
    } else {
        // Returns FALSE=0 for U_STRINGPREP_UNASSIGNED_ERROR and processing errors,
        // TRUE=1 if c is valid or mapped.
        return U_SUCCESS(errorCode);
    }
}
Example #15
0
void alignedNormalizeUnicodeString(icu::UnicodeString const& u, IcuNormalizer2Ptr normalizer,
                                   ITakeAlignedChars& out) {
    // TODO: test
    Position start = 0;
    int32 len = u.length(), pos;
    UErrorCode err = U_ZERO_ERROR;
    int nfcPrefixLen = normalizer->spanQuickCheckYes(u, err);
    assert(U_SUCCESS(err));
    assert(len >= 0 && nfcPrefixLen >= 0);
    TokenSpan span;
    span.first = 0;
    icu::StringCharacterIterator it(u);
    while ((pos = it.getIndex()) < nfcPrefixLen) {
        assert(it.hasNext());
        Unicode c = it.next32PostInc();
        span.second = span.first + 1;
        out.takeWithSpan(c, span);
        ++span.first;
    }
    icu::UnicodeString remainder(u.tempSubString(nfcPrefixLen)), normalized;
    CharsFromUnicodeStringImpl chars(remainder);  // TODO: docs say normalizeSecondAndAppend
    IcuNormalizeByChunks<CharsFromUnicodeStringImpl> norm(chars, normalizer);
    norm.takeAllWithSpan(out);
}
// --------------------------------------------------------------------------
icu::UnicodeString
processor::normalize_enum (
 icu::UnicodeString const& value
)
// --------------------------------------------------------------------------
{
  icu::UnicodeString normalized;
  int32_t pos;
  bool space_before = false;
  bool leading = true;

  for (pos=0; pos<value.length(); ++pos)
  {
    if (value[pos] == ' ')
    {
      space_before = true;
    }
    else
    {
      if (space_before)
      {
        if (leading)
        {
          leading = false;
        }
        else
        {
          normalized += ' ';
        }

        space_before = false;
      }

      normalized += value[pos];
    }
  }

  return normalized;
}
Example #17
0
	jobject operator()(icu::UnicodeString const& value) const {
		return env->NewString(value.getBuffer(), value.length());
	}
Example #18
0
 inline
 cxxopts::UnicodeStringIterator
 end(const icu::UnicodeString& s)
 {
   return cxxopts::UnicodeStringIterator(&s, s.length());
 }
Example #19
0
QString
EnabledLocalesModel::unicodeStringToQString( const icu::UnicodeString& sourceStr )
{
    return QString( reinterpret_cast<const QChar*>( sourceStr.getBuffer() ),
                    sourceStr.length() );
}
Example #20
0
U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &id, 
                          int32_t *position,
                          UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    int32_t result = 0;

    IdentifierInfo *identifierInfo = NULL;
    if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) {
        identifierInfo = This->getIdentifierInfo(*status);
        if (U_FAILURE(*status)) {
            goto cleanupAndReturn;
        }
        identifierInfo->setIdentifier(id, *status);
        identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
    }


    if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
        URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
        if (idRestrictionLevel > This->fRestrictionLevel) {
            result |= USPOOF_RESTRICTION_LEVEL;
        }
        if (This->fChecks & USPOOF_AUX_INFO) {
            result |= idRestrictionLevel;
        }
    }

    if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
        const UnicodeSet *numerics = identifierInfo->getNumerics();
        if (numerics->size() > 1) {
            result |= USPOOF_MIXED_NUMBERS;
        }

        // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
        //       We have no easy way to do the same in C.
        // if (checkResult != null) {
        //     checkResult.numerics = numerics;
        // }
    }


    if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
        int32_t i;
        UChar32 c;
        int32_t length = id.length();
        for (i=0; i<length ;) {
            c = id.char32At(i);
            i += U16_LENGTH(c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        UnicodeString nfdText;
        gNfdNormalizer->normalize(id, nfdText, *status);
        int32_t nfdLength = nfdText.length();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                c = nfdText.char32At(i);
                i += U16_LENGTH(c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
                    break;
                }
                marksSeenSoFar.add(c);
            }
        }
       
        
        if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
            // The basic test is the same for both whole and mixed script confusables.
            // Compute the set of scripts that every input character has a confusable in.
            // For this computation an input character is always considered to be
            // confusable with itself in its own script.
            //
            // If the number of such scripts is two or more, and the input consisted of
            // characters all from a single script, we have a whole script confusable.
            // (The two scripts will be the original script and the one that is confusable)
            //
            // If the number of such scripts >= one, and the original input contained characters from
            // more than one script, we have a mixed script confusable.  (We can transform
            // some of the characters, and end up with a visually similar string all in
            // one script.)

            if (identifierInfo == NULL) {
                identifierInfo = This->getIdentifierInfo(*status);
                if (U_FAILURE(*status)) {
                    goto cleanupAndReturn;
                }
                identifierInfo->setIdentifier(id, *status);
            }

            int32_t scriptCount = identifierInfo->getScriptCount();
            
            ScriptSet scripts;
            This->wholeScriptCheck(nfdText, &scripts, *status);
            int32_t confusableScriptCount = scripts.countMembers();
            //printf("confusableScriptCount = %d\n", confusableScriptCount);
            
            if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 2 &&
                scriptCount == 1) {
                result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
            }
        
            if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 1 &&
                scriptCount > 1) {
                result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
            }
        }
    }

cleanupAndReturn:
    This->releaseIdentifierInfo(identifierInfo);
    if (position != NULL) {
        *position = 0;
    }
    return result;
}
	void FStringConverter::ConvertString(const icu::UnicodeString& Source, FString& Destination)
	{
		return ConvertString(Source, 0, Source.length(), Destination);
	}
// --------------------------------------------------------------------------
void
processor::on_reference (
 icu::UnicodeString const& name,
 bool attvalue
)
// --------------------------------------------------------------------------
{
  if (!attvalue && m_ref_history.empty() && m_auto_replace_general)
  {
    if (!m_character_data.isEmpty())
    {
      character_data(m_character_data);
      m_character_data.remove();
    }

    reference(name);
  }
  
  // Pre-defined entities.
  if (name == "lt")
  {
    icu::UnicodeString entity = "&#60;";
    m_buffers.emplace("&lt;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }
    
    m_buffers.pop();
    return;
  }
  else
  if (name == "gt")
  {
    icu::UnicodeString entity = ">";
    m_buffers.emplace("&gt;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }
  else
  if (name == "amp")
  {
    icu::UnicodeString entity = "&#38;";
    m_buffers.emplace("&amp;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }
  else
  if (name == "apos")
  {
    icu::UnicodeString entity = "'";
    m_buffers.emplace("&apos;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }
  else
  if (name == "quot")
  {
    icu::UnicodeString entity = '"';
    m_buffers.emplace("&quot;", new io::uistring(entity, false));

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
    return;
  }

  std::pair<std::set<icu::UnicodeString>::iterator, bool> hist;
  std::map<icu::UnicodeString, general_entity_declaration*>::iterator it;

  // Look for the entity.
  it = m_dtd.general_entities.find(name);
  
  if (it == m_dtd.general_entities.end())
  {
    std::string msg;

    msg += "Reference to undeclared general entity '";
    name.toUTF8String(msg);
    msg += "'.";

    throw semantic_error(msg);
  }
  
  // Unparsed entity references are forbidden.
  if (it->second->unparsed)
  {
    std::string msg;

    msg += "Reference to an unparsed general entity '";
    name.toUTF8String(msg);
    msg += "'.";

    throw semantic_error(msg);
  }
  
  // Check for recursive references.
  hist = m_ref_history.insert(name);

  if (!hist.second)
  {
    std::string msg;

    msg += "Recursive reference to general entity '";
    name.toUTF8String(msg);
    msg += "'.";
    
    throw semantic_error(msg);
  }

  // Process the entity.
  bool const state = m_parsing_entity;
  std::string nameutf8 = "&";

  name.toUTF8String(nameutf8);
  nameutf8 += ';';
  m_parsing_entity = true;
  
  if (!it->second->id.sys.isBogus() || !it->second->id.pub.isBogus())
  {
    if (attvalue)
    {
      std::string msg;

      msg += "Reference to an external parsed general entity '";
      name.toUTF8String(msg);
      msg += "' in attribute value.";

      throw semantic_error(msg);
    }

    io::input* input = nullptr;
    std::string encoding;

    resolve_id(it->second->id, input, encoding);

    if (input == nullptr)
    {
      if (m_validating)
      {
        throw runtime_error(
        "Could not dereference external parsed general entity."
        );
      }
    }
    else
    {
      size_t const size = m_buffers.size();

      try
      {
        m_buffers.emplace(nameutf8, *input, true, encoding);

        if (attvalue)
        {
          parse_included_attvalue();
        }
        else
        {
          parse_content();
        }

        m_buffers.pop();
      }
      catch (...)
      {
        if (size < m_buffers.size())
        {
          m_buffers.pop();
        }

        delete input;
        m_ref_history.erase(hist.first);
        throw;
      }
      
      delete input;
    }
  }
  else
  {
    m_buffers.emplace(
     nameutf8, new io::uistring(it->second->text_or_notation, false)
    );

    if (attvalue)
    {
      parse_included_attvalue();
    }
    else
    {
      parse_content();
    }

    m_buffers.pop();
  }
  
  m_parsing_entity = state;
  m_ref_history.erase(hist.first);
}
// --------------------------------------------------------------------------
void
processor::on_pe_reference (
 icu::UnicodeString const& name,
 bool entityvalue
)
// --------------------------------------------------------------------------
{
  icu::UnicodeString text;
  std::map<icu::UnicodeString, parameter_entity_declaration*>::iterator it;
  std::pair<std::set<icu::UnicodeString>::iterator, bool> hist;
  
  if (!m_parsing_entity && !entityvalue && m_ref_history.empty())
  {
    dtd_element e;
    
    e.type = dtd_element::parameter_reference;
    e.text = new icu::UnicodeString(name);

    m_document_type.m_subset.emplace_back(std::move(e));
  }

  // Look for the entity.
  it = m_dtd.parameter_entities.find(name);

  if (it == m_dtd.parameter_entities.end())
  {
    if (m_validating)
    {
      std::string msg;

      msg += "Reference to undeclared parameter entity '";
      name.toUTF8String(msg);
      msg += "'.";
      
      throw semantic_error(msg);
    }
    else
    {
      m_dtd_stop = true;
      return;
    }
  }

  // Check for recursive references.
  hist = m_ref_history.insert(name);
  
  if (!hist.second)
  {
    std::string msg;

    msg += "Recursive reference to parameter entity '";
    name.toUTF8String(msg);
    msg += "'.";
    
    throw semantic_error(msg);
  }
  
  // Process the entity.
  bool const state = m_parsing_entity;
  std::string nameutf8 = "%";

  name.toUTF8String(nameutf8);
  nameutf8 += ';';
  m_parsing_entity = true;

  if (it->second->external)
  {
    io::input* input = nullptr;
    std::string encoding;

    resolve_id(it->second->id, input, encoding);

    if (input == nullptr)
    {
      if (m_validating)
      {
        throw runtime_error(
        "Could not dereference external parameter entity."
        );
      }
      else
      {
        m_dtd_stop = true;
      }
    }
    else
    {
      size_t const size = m_buffers.size();

      try
      {
        if (entityvalue)
        {
          m_buffers.emplace(nameutf8, *input, true, encoding);
          parse_included_entityvalue();
          m_buffers.pop();
        }
        else
        {
          m_buffers.emplace(nameutf8, *input, true, encoding, true);
          parse_ext_subset();
          m_buffers.pop();
        }
      }
      catch (...)
      {
        if (size < m_buffers.size())
        {
          m_buffers.pop();
        }

        delete input;
        m_ref_history.erase(hist.first);
        throw;
      }
      
      delete input;
    }
  }
  else
  {
    // The literal value is stored as public ID.
    if (entityvalue)
    {
      m_buffers.emplace(
       nameutf8, new io::uistring(it->second->id.pub, false)
      );
      parse_included_entityvalue();
      m_buffers.pop();
    }
    else
    {
      m_buffers.emplace(
       nameutf8, new io::uistring(it->second->id.pub, false), false
      );
      parse_ext_subset();
      m_buffers.pop();
    }
  }
  
  m_parsing_entity = state;
  m_ref_history.erase(hist.first);
}
	int32 GetNativeStringLength(const icu::UnicodeString& Source)
	{
		return GetNativeStringLength(Source, 0, Source.length());
	}
	int32 GetNativeStringLengthImpl<true, 4>(const icu::UnicodeString& Source, const int32 InSourceStartIndex, const int32 InSourceLength)
	{
		return InSourceLength == 0 ? 0 : Source.countChar32(InSourceStartIndex, InSourceLength);
	}
// --------------------------------------------------------------------------
void
processor::on_attribute (
 icu::UnicodeString const& name,
 icu::UnicodeString&& value
)
// --------------------------------------------------------------------------
{
  // Check for duplicates.
  if (!m_sax_attrs.emplace(name, std::move(value)).second)
  {
    std::string msg, tree;
    
    msg += "Duplicate attribute on an element.\n";
    msg += "Attribute name: ";
    name.toUTF8String(msg);

    while (!m_element_info.empty())
    {
      std::string tmp;

      m_element_info.top().type.toUTF8String(tmp);
      m_element_info.pop();
      tree = "/" + tmp + tree;
    }
    
    msg += "\nElement tree: " + tree;
    
    throw semantic_error(msg);
  }
  
  /*
  icu::UnicodeString normalized;
  UChar32 chr;
  int32_t const size = value.countChar32();

  for (int32_t i=0; i<size; i=value.moveIndex32(i, 1))
  {
    chr = value.char32At(i);

    if (chr == 0x20 || chr == 0x0D || chr == 0x0A || chr == 0x09)
    {
      normalized += 0x20;
    }
    else
    if (chr == '&')
    {
      int32_t end = value.indexOf(';', i);
      icu::UnicodeString name{value, i+1, end-i-1};

      i = end;
      
      if (name[0] == '#')
      {
        normalized += dereference_character(name.tempSubString(1));
      }
      else
      {
        if (on_reference(name, true))
        {
          icu::UnicodeString text;

          m_buffers.top().pipe(text);
          normalized += normalize_attvalue(text);
          m_buffers.pop();
        }
      }
    }
    else
    {
      normalized += chr;
    }
  }

  return normalized;
  */

  if (m_validating)
  {
    throw not_implemented("Attribute validity checking.");
  }
}
Example #27
0
 size_t hash<icu::UnicodeString>::operator()(const icu::UnicodeString& x) const
 {
     return x.hashCode();
 }
Example #28
0
inline void clearString(icu::UnicodeString& str) {
  str.truncate(0);
}