JxString* jx_string_create(char *in_data, long in_size, JxCoreEncoding in_encoding) /* create a JxString from data in the specified encoding; returns NULL if there's a problem converting or reading the string */ { JxChar *characters = NULL; long length = 0; if (in_encoding == JX_ENCODING_UTF8) characters = _utf8_to_utf32(in_data, in_size, &length); else return NULL; JxString *string = _jx_core_malloc(sizeof(JxString), 0); string->characters = characters; string->length = length; return string; }
/** * Truncates a string to be at most the given number of characters in length. */ static void _truncate_string( SG_context* pCtx, //< [in] [out] Error and context info. SG_string* sValue, //< [in] [out] The string to truncate. SG_uint32 uLength //< [in] The length to truncate the string to, in characters. ) { SG_uint32 uValue = 0u; SG_int32* pValue32 = NULL; char* szResult = NULL; // get the length of the value and check if it's too long SG_ERR_CHECK( SG_utf8__length_in_characters__sz(pCtx, SG_string__sz(sValue), &uValue) ); if (uValue > uLength) { SG_uint32 uResult = 0u; // convert the value to UTF32 // I can't come up with a good way to do this in UTF8 using the APIs available // in sg_utf8. Truncating to bytes means that we might not end up on a character // boundary, and there's no good way to ask how many bytes a given character // requires, so iterating through characters doesn't help much either. SG_ERR_CHECK( _utf8_to_utf32(pCtx, SG_string__sz(sValue), &pValue32, NULL) ); // chop the buffer down to the given length pValue32[uLength] = 0; // convert the value back to UTF8 SG_ERR_CHECK( _utf32_to_utf8(pCtx, pValue32, &szResult, &uResult) ); // replace the old value with the new SG_ERR_CHECK( SG_string__adopt_buffer(pCtx, sValue, szResult, uResult) ); szResult = NULL; } fail: SG_NULLFREE(pCtx, pValue32); SG_NULLFREE(pCtx, szResult); return; }
/** * Finds any character from a given set within a string and replaces them with a * specified replacement string. */ static void _replace_chars_with_string( SG_context* pCtx, //< [in] [out] Error and context info. SG_string* sValue, //< [in] [out] String to perform replacements in. const char* szChars, //< [in] Set of characters to replace, as a string. //< NULL is treated as an empty string. const char* szReplacement //< [in] String to use as a replacement for the characters. //< This whole string is a replacement for each found character. //< NULL is treated as an empty string. ) { SG_int32* pValue32 = NULL; SG_uint32 uValue32 = 0u; SG_int32* pChars32 = NULL; SG_uint32 uChars32 = 0u; SG_int32* pReplacement32 = NULL; SG_uint32 uReplacement32 = 0u; SG_int32* pResult32 = NULL; SG_uint32 uResult32 = 0u; char* szResult = NULL; SG_uint32 uResult = 0u; SG_uint32 uValueIndex = 0u; SG_NULLARGCHECK(sValue); // treat NULLs as empty strings if (szChars == NULL) { szChars = ""; } if (szReplacement == NULL) { szReplacement = ""; } // convert everything to UTF32 // I couldn't come up with a way to do this directly in UTF8 using the APIs // available in sg_utf8. SG_ERR_CHECK( _utf8_to_utf32(pCtx, SG_string__sz(sValue), &pValue32, &uValue32) ); SG_ERR_CHECK( _utf8_to_utf32(pCtx, szChars, &pChars32, &uChars32) ); SG_ERR_CHECK( _utf8_to_utf32(pCtx, szReplacement, &pReplacement32, &uReplacement32) ); // allocate a result buffer if (uReplacement32 > 1u) { // largest possible size we could end up with is if we replace every single // character in the value with the replacement string SG_ERR_CHECK( SG_allocN(pCtx, (uReplacement32 * uValue32) + 1u, pResult32) ); } else { // largest possible size we could end up with is if we do no replacements // at all and are left with exactly the input value SG_ERR_CHECK( SG_allocN(pCtx, uValue32 + 1u, pResult32) ); } // run through each character in the value for (uValueIndex = 0u; uValueIndex < uValue32; ++uValueIndex) { SG_int32 iValueChar = pValue32[uValueIndex]; SG_bool bReplace = SG_FALSE; SG_uint32 uCharsIndex = 0u; // check if this character should be replaced for (uCharsIndex = 0u; uCharsIndex < uChars32; ++uCharsIndex) { if (iValueChar == pChars32[uCharsIndex]) { bReplace = SG_TRUE; break; } } if (bReplace == SG_FALSE) { // append the character to the output pResult32[uResult32] = iValueChar; ++uResult32; } else { // append the replacement string to the output memcpy((void*)(pResult32 + uResult32), (void*)pReplacement32, uReplacement32 * sizeof(SG_int32)); uResult32 += uReplacement32; } } // NULL-terminate the result and convert it back to UTF8 pResult32[uResult32] = 0; SG_ERR_CHECK( _utf32_to_utf8(pCtx, pResult32, &szResult, &uResult) ); // return the result by replacing the original value's contents SG_ERR_CHECK( SG_string__adopt_buffer(pCtx, sValue, szResult, uResult) ); szResult = NULL; fail: SG_NULLFREE(pCtx, pValue32); SG_NULLFREE(pCtx, pChars32); SG_NULLFREE(pCtx, pReplacement32); SG_NULLFREE(pCtx, pResult32); SG_NULLFREE(pCtx, szResult); return; }