static int test_utf8( void ) { const char * in; char * out; in = "hello world"; out = tr_utf8clean( in, -1 ); check_streq (in, out); tr_free( out ); in = "hello world"; out = tr_utf8clean( in, 5 ); check_streq ("hello", out); tr_free( out ); /* this version is not utf-8 */ in = "Трудно быть Богом"; out = tr_utf8clean( in, 17 ); check( out != NULL ); check( ( strlen( out ) == 17 ) || ( strlen( out ) == 32 ) ); check( tr_utf8_validate( out, -1, NULL ) ); tr_free( out ); /* same string, but utf-8 clean */ in = "ÒðóäГГ® áûòü Áîãîì"; out = tr_utf8clean( in, -1 ); check( out != NULL ); check( tr_utf8_validate( out, -1, NULL ) ); check_streq (in, out); tr_free( out ); return 0; }
static int test_utf8 (void) { const char * in; char * out; in = "hello world"; out = tr_utf8clean (in, TR_BAD_SIZE); check_streq (in, out); tr_free (out); in = "hello world"; out = tr_utf8clean (in, 5); check_streq ("hello", out); tr_free (out); /* this version is not utf-8 (but cp866) */ in = "\x92\xE0\xE3\xA4\xAD\xAE \xA1\xEB\xE2\xEC \x81\xAE\xA3\xAE\xAC"; out = tr_utf8clean (in, 17); check (out != NULL); check ((strlen (out) == 17) || (strlen (out) == 33)); check (tr_utf8_validate (out, TR_BAD_SIZE, NULL)); tr_free (out); /* same string, but utf-8 clean */ in = "Трудно быть Богом"; out = tr_utf8clean (in, TR_BAD_SIZE); check (out != NULL); check (tr_utf8_validate (out, TR_BAD_SIZE, NULL)); check_streq (in, out); tr_free (out); in = "\xF4\x00\x81\x82"; out = tr_utf8clean (in, 4); check (out != NULL); check ((strlen (out) == 1) || (strlen (out) == 2)); check (tr_utf8_validate (out, TR_BAD_SIZE, NULL)); tr_free (out); in = "\xF4\x33\x81\x82"; out = tr_utf8clean (in, 4); check (out != NULL); check ((strlen (out) == 4) || (strlen (out) == 7)); check (tr_utf8_validate (out, TR_BAD_SIZE, NULL)); tr_free (out); return 0; }