const char * iconv_canonicalize (const char * name) { const char* code; char buf[MAX_WORD_LENGTH+10+1]; const char* cp; char* bp; const struct alias * ap; unsigned int count; unsigned int index; const char* pool; /* Before calling aliases_lookup, convert the input string to upper case, * and check whether it's entirely ASCII (we call gperf with option "-7" * to achieve a smaller table) and non-empty. If it's not entirely ASCII, * or if it's too long, it is not a valid encoding name. */ for (code = name;;) { /* Search code in the table. */ for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { unsigned char c = * (unsigned char *) cp; if (c >= 0x80) goto invalid; if (c >= 'a' && c <= 'z') c -= 'a'-'A'; *bp = c; if (c == '\0') break; if (--count == 0) goto invalid; } for (;;) { if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { bp -= 10; *bp = '\0'; continue; } if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { bp -= 8; *bp = '\0'; continue; } break; } if (buf[0] == '\0') { code = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (code[0] == '\0') goto invalid; continue; } pool = stringpool; ap = aliases_lookup(buf, (unsigned int)(bp-buf)); if (ap == NULL) { pool = stringpool2; ap = aliases2_lookup(buf); if (ap == NULL) goto invalid; } if (ap->encoding_index == ei_local_char) { code = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (code[0] == '\0') goto invalid; continue; } if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems and Cygwin >= 1.7, where we know that it is UTF-16. */ #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) if (sizeof(wchar_t) == 4) { index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { # if WORDS_LITTLEENDIAN index = ei_utf16le; # else index = ei_utf16be; # endif break; } #elif __STDC_ISO_10646__ if (sizeof(wchar_t) == 4) { index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { index = ei_ucs2internal; break; } if (sizeof(wchar_t) == 1) { index = ei_iso8859_1; break; } #endif } index = ap->encoding_index; break; } return all_canonical[index] + pool; invalid: return name; }
iconv_t iconv_open (const char* tocode, const char* fromcode) { struct conv_struct * cd; char buf[MAX_WORD_LENGTH+10+1]; const char* cp; char* bp; const struct alias * ap; unsigned int count; unsigned int from_index; int from_wchar; unsigned int to_index; int to_wchar; int transliterate = 0; /* Before calling aliases_lookup, convert the input string to upper case, * and check whether it's entirely ASCII (we call gperf with option "-7" * to achieve a smaller table) and non-empty. If it's not entirely ASCII, * or if it's too long, it is not a valid encoding name. */ for (to_wchar = 0;;) { /* Search tocode in the table. */ for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { unsigned char c = * (unsigned char *) cp; if (c >= 0x80) goto invalid; if (c >= 'a' && c <= 'z') c -= 'a'-'A'; *bp = c; if (c == '\0') break; if (--count == 0) goto invalid; } if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { bp -= 10; *bp = '\0'; transliterate = 1; } ap = aliases_lookup(buf,bp-buf); if (ap == NULL) { ap = aliases2_lookup(buf); if (ap == NULL) goto invalid; } if (ap->encoding_index == ei_local_char) { tocode = locale_charset(); if (tocode != NULL) continue; goto invalid; } if (ap->encoding_index == ei_local_wchar_t) { #if __STDC_ISO_10646__ if (sizeof(wchar_t) == 4) { to_index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { to_index = ei_ucs2internal; break; } if (sizeof(wchar_t) == 1) { to_index = ei_iso8859_1; break; } #endif #if HAVE_MBRTOWC to_wchar = 1; tocode = locale_charset(); if (tocode != NULL) continue; #endif goto invalid; } to_index = ap->encoding_index; break; } for (from_wchar = 0;;) { /* Search fromcode in the table. */ for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { unsigned char c = * (unsigned char *) cp; if (c >= 0x80) goto invalid; if (c >= 'a' && c <= 'z') c -= 'a'-'A'; *bp = c; if (c == '\0') break; if (--count == 0) goto invalid; } if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { bp -= 10; *bp = '\0'; } ap = aliases_lookup(buf,bp-buf); if (ap == NULL) { ap = aliases2_lookup(buf); if (ap == NULL) goto invalid; } if (ap->encoding_index == ei_local_char) { fromcode = locale_charset(); if (fromcode != NULL) continue; goto invalid; } if (ap->encoding_index == ei_local_wchar_t) { #if __STDC_ISO_10646__ if (sizeof(wchar_t) == 4) { from_index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { from_index = ei_ucs2internal; break; } if (sizeof(wchar_t) == 1) { from_index = ei_iso8859_1; break; } #endif #if HAVE_WCRTOMB from_wchar = 1; fromcode = locale_charset(); if (fromcode != NULL) continue; #endif goto invalid; } from_index = ap->encoding_index; break; } cd = (struct conv_struct *) malloc(from_wchar != to_wchar ? sizeof(struct wchar_conv_struct) : sizeof(struct conv_struct)); if (cd == NULL) { errno = ENOMEM; return (iconv_t)(-1); } cd->iindex = from_index; cd->ifuncs = all_encodings[from_index].ifuncs; cd->oindex = to_index; cd->ofuncs = all_encodings[to_index].ofuncs; cd->oflags = all_encodings[to_index].oflags; /* Initialize the loop functions. */ #if HAVE_MBRTOWC if (to_wchar) { #if HAVE_WCRTOMB if (from_wchar) { cd->lfuncs.loop_convert = wchar_id_loop_convert; cd->lfuncs.loop_reset = wchar_id_loop_reset; } else #endif { cd->lfuncs.loop_convert = wchar_to_loop_convert; cd->lfuncs.loop_reset = wchar_to_loop_reset; } } else #endif { #if HAVE_WCRTOMB if (from_wchar) { cd->lfuncs.loop_convert = wchar_from_loop_convert; cd->lfuncs.loop_reset = wchar_from_loop_reset; } else #endif { cd->lfuncs.loop_convert = unicode_loop_convert; cd->lfuncs.loop_reset = unicode_loop_reset; } } /* Initialize the states. */ memset(&cd->istate,'\0',sizeof(state_t)); memset(&cd->ostate,'\0',sizeof(state_t)); /* Initialize the operation flags. */ cd->transliterate = transliterate; /* Initialize additional fields. */ if (from_wchar != to_wchar) { struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd; memset(&wcd->state,'\0',sizeof(mbstate_t)); } /* Done. */ return (iconv_t)cd; invalid: errno = EINVAL; return (iconv_t)(-1); }
const char * iconv_canonicalize (const char * name) { const char* code; char buf[MAX_WORD_LENGTH+10+1]; const char* cp; char* bp; const struct alias * ap; unsigned int count; unsigned int index; const char* pool; /* Before calling aliases_lookup, convert the input string to upper case, * and check whether it's entirely ASCII (we call gperf with option "-7" * to achieve a smaller table) and non-empty. If it's not entirely ASCII, * or if it's too long, it is not a valid encoding name. */ for (code = name;;) { /* Search code in the table. */ for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { unsigned char c = * (unsigned char *) cp; if (c >= 0x80) goto invalid; if (c >= 'a' && c <= 'z') c -= 'a'-'A'; *bp = c; if (c == '\0') break; if (--count == 0) goto invalid; } for (;;) { if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { bp -= 10; *bp = '\0'; continue; } if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { bp -= 8; *bp = '\0'; continue; } break; } if (buf[0] == '\0') { code = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (code[0] == '\0') goto invalid; continue; } pool = stringpool; ap = aliases_lookup(buf,bp-buf); if (ap == NULL) { pool = stringpool2; ap = aliases2_lookup(buf); if (ap == NULL) goto invalid; } if (ap->encoding_index == ei_local_char) { code = locale_charset(); /* Avoid an endless loop that could occur when using an older version of localcharset.c. */ if (code[0] == '\0') goto invalid; continue; } if (ap->encoding_index == ei_local_wchar_t) { #if __STDC_ISO_10646__ if (sizeof(wchar_t) == 4) { index = ei_ucs4internal; break; } if (sizeof(wchar_t) == 2) { index = ei_ucs2internal; break; } if (sizeof(wchar_t) == 1) { index = ei_iso8859_1; break; } #endif } index = ap->encoding_index; break; } return all_canonical[index] + pool; invalid: return name; }