Exemple #1
0
JxString*
jx_string_create(char *in_data, long in_size, JxCoreEncoding in_encoding)
/* create a JxString from data in the specified encoding;
 returns NULL if there's a problem converting or reading the string */
{
    JxChar *characters = NULL;
    long length = 0;
    
    if (in_encoding == JX_ENCODING_UTF8)
        characters = _utf8_to_utf32(in_data, in_size, &length);
    else
        return NULL;
    
    JxString *string = _jx_core_malloc(sizeof(JxString), 0);
    string->characters = characters;
    string->length = length;
    
    return string;
}
/**
 * Truncates a string to be at most the given number of characters in length.
 */
static void _truncate_string(
    SG_context* pCtx,   //< [in] [out] Error and context info.
    SG_string*  sValue, //< [in] [out] The string to truncate.
    SG_uint32   uLength //< [in] The length to truncate the string to, in characters.
)
{
    SG_uint32 uValue   = 0u;
    SG_int32* pValue32 = NULL;
    char*     szResult = NULL;

    // get the length of the value and check if it's too long
    SG_ERR_CHECK(  SG_utf8__length_in_characters__sz(pCtx, SG_string__sz(sValue), &uValue)  );
    if (uValue > uLength)
    {
        SG_uint32 uResult = 0u;

        // convert the value to UTF32
        // I can't come up with a good way to do this in UTF8 using the APIs available
        // in sg_utf8.  Truncating to bytes means that we might not end up on a character
        // boundary, and there's no good way to ask how many bytes a given character
        // requires, so iterating through characters doesn't help much either.
        SG_ERR_CHECK(  _utf8_to_utf32(pCtx, SG_string__sz(sValue), &pValue32, NULL)  );

        // chop the buffer down to the given length
        pValue32[uLength] = 0;

        // convert the value back to UTF8
        SG_ERR_CHECK(  _utf32_to_utf8(pCtx, pValue32, &szResult, &uResult)  );

        // replace the old value with the new
        SG_ERR_CHECK(  SG_string__adopt_buffer(pCtx, sValue, szResult, uResult)  );
        szResult = NULL;
    }

fail:
    SG_NULLFREE(pCtx, pValue32);
    SG_NULLFREE(pCtx, szResult);
    return;
}
/**
 * Finds any character from a given set within a string and replaces them with a
 * specified replacement string.
 */
static void _replace_chars_with_string(
    SG_context* pCtx,         //< [in] [out] Error and context info.
    SG_string*  sValue,       //< [in] [out] String to perform replacements in.
    const char* szChars,      //< [in] Set of characters to replace, as a string.
    //<      NULL is treated as an empty string.
    const char* szReplacement //< [in] String to use as a replacement for the characters.
    //<      This whole string is a replacement for each found character.
    //<      NULL is treated as an empty string.
)
{
    SG_int32* pValue32       = NULL;
    SG_uint32 uValue32       = 0u;
    SG_int32* pChars32       = NULL;
    SG_uint32 uChars32       = 0u;
    SG_int32* pReplacement32 = NULL;
    SG_uint32 uReplacement32 = 0u;
    SG_int32* pResult32      = NULL;
    SG_uint32 uResult32      = 0u;
    char*     szResult       = NULL;
    SG_uint32 uResult        = 0u;
    SG_uint32 uValueIndex    = 0u;

    SG_NULLARGCHECK(sValue);

    // treat NULLs as empty strings
    if (szChars == NULL)
    {
        szChars = "";
    }
    if (szReplacement == NULL)
    {
        szReplacement = "";
    }

    // convert everything to UTF32
    // I couldn't come up with a way to do this directly in UTF8 using the APIs
    // available in sg_utf8.
    SG_ERR_CHECK(  _utf8_to_utf32(pCtx, SG_string__sz(sValue), &pValue32, &uValue32)  );
    SG_ERR_CHECK(  _utf8_to_utf32(pCtx, szChars, &pChars32, &uChars32)  );
    SG_ERR_CHECK(  _utf8_to_utf32(pCtx, szReplacement, &pReplacement32, &uReplacement32)  );

    // allocate a result buffer
    if (uReplacement32 > 1u)
    {
        // largest possible size we could end up with is if we replace every single
        // character in the value with the replacement string
        SG_ERR_CHECK(  SG_allocN(pCtx, (uReplacement32 * uValue32) + 1u, pResult32)  );
    }
    else
    {
        // largest possible size we could end up with is if we do no replacements
        // at all and are left with exactly the input value
        SG_ERR_CHECK(  SG_allocN(pCtx, uValue32 + 1u, pResult32)  );
    }

    // run through each character in the value
    for (uValueIndex = 0u; uValueIndex < uValue32; ++uValueIndex)
    {
        SG_int32  iValueChar  = pValue32[uValueIndex];
        SG_bool   bReplace    = SG_FALSE;
        SG_uint32 uCharsIndex = 0u;

        // check if this character should be replaced
        for (uCharsIndex = 0u; uCharsIndex < uChars32; ++uCharsIndex)
        {
            if (iValueChar == pChars32[uCharsIndex])
            {
                bReplace = SG_TRUE;
                break;
            }
        }
        if (bReplace == SG_FALSE)
        {
            // append the character to the output
            pResult32[uResult32] = iValueChar;
            ++uResult32;
        }
        else
        {
            // append the replacement string to the output
            memcpy((void*)(pResult32 + uResult32), (void*)pReplacement32, uReplacement32 * sizeof(SG_int32));
            uResult32 += uReplacement32;
        }
    }

    // NULL-terminate the result and convert it back to UTF8
    pResult32[uResult32] = 0;
    SG_ERR_CHECK(  _utf32_to_utf8(pCtx, pResult32, &szResult, &uResult)  );

    // return the result by replacing the original value's contents
    SG_ERR_CHECK(  SG_string__adopt_buffer(pCtx, sValue, szResult, uResult)  );
    szResult = NULL;

fail:
    SG_NULLFREE(pCtx, pValue32);
    SG_NULLFREE(pCtx, pChars32);
    SG_NULLFREE(pCtx, pReplacement32);
    SG_NULLFREE(pCtx, pResult32);
    SG_NULLFREE(pCtx, szResult);
    return;
}