Example #1
0
const char * iconv_canonicalize (const char * name)
{
  const char* code;
  char buf[MAX_WORD_LENGTH+10+1];
  const char* cp;
  char* bp;
  const struct alias * ap;
  unsigned int count;
  unsigned int index;
  const char* pool;

  /* Before calling aliases_lookup, convert the input string to upper case,
   * and check whether it's entirely ASCII (we call gperf with option "-7"
   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
   * or if it's too long, it is not a valid encoding name.
   */
  for (code = name;;) {
    /* Search code in the table. */
    for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
      unsigned char c = * (unsigned char *) cp;
      if (c >= 0x80)
        goto invalid;
      if (c >= 'a' && c <= 'z')
        c -= 'a'-'A';
      *bp = c;
      if (c == '\0')
        break;
      if (--count == 0)
        goto invalid;
    }
    for (;;) {
      if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
        bp -= 10;
        *bp = '\0';
        continue;
      }
      if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
        bp -= 8;
        *bp = '\0';
        continue;
      }
      break;
    }
    if (buf[0] == '\0') {
      code = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (code[0] == '\0')
        goto invalid;
      continue;
    }
    pool = stringpool;
    ap = aliases_lookup(buf, (unsigned int)(bp-buf));
    if (ap == NULL) {
      pool = stringpool2;
      ap = aliases2_lookup(buf);
      if (ap == NULL)
        goto invalid;
    }
    if (ap->encoding_index == ei_local_char) {
      code = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (code[0] == '\0')
        goto invalid;
      continue;
    }
    if (ap->encoding_index == ei_local_wchar_t) {
      /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
         This is also the case on native Woe32 systems and Cygwin >= 1.7, where
         we know that it is UTF-16.  */
#if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
      if (sizeof(wchar_t) == 4) {
        index = ei_ucs4internal;
        break;
      }
      if (sizeof(wchar_t) == 2) {
# if WORDS_LITTLEENDIAN
        index = ei_utf16le;
# else
        index = ei_utf16be;
# endif
        break;
      }
#elif __STDC_ISO_10646__
      if (sizeof(wchar_t) == 4) {
        index = ei_ucs4internal;
        break;
      }
      if (sizeof(wchar_t) == 2) {
        index = ei_ucs2internal;
        break;
      }
      if (sizeof(wchar_t) == 1) {
        index = ei_iso8859_1;
        break;
      }
#endif
    }
    index = ap->encoding_index;
    break;
  }
  return all_canonical[index] + pool;
 invalid:
  return name;
}
Example #2
0
iconv_t iconv_open (const char* tocode, const char* fromcode)
{
    struct conv_struct * cd;
    char buf[MAX_WORD_LENGTH+10+1];
    const char* cp;
    char* bp;
    const struct alias * ap;
    unsigned int count;
    unsigned int from_index;
    int from_wchar;
    unsigned int to_index;
    int to_wchar;
    int transliterate = 0;

    /* Before calling aliases_lookup, convert the input string to upper case,
     * and check whether it's entirely ASCII (we call gperf with option "-7"
     * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
     * or if it's too long, it is not a valid encoding name.
     */
    for (to_wchar = 0;;) {
        /* Search tocode in the table. */
        for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
            unsigned char c = * (unsigned char *) cp;
            if (c >= 0x80)
                goto invalid;
            if (c >= 'a' && c <= 'z')
                c -= 'a'-'A';
            *bp = c;
            if (c == '\0')
                break;
            if (--count == 0)
                goto invalid;
        }
        if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
            bp -= 10;
            *bp = '\0';
            transliterate = 1;
        }
        ap = aliases_lookup(buf,bp-buf);
        if (ap == NULL) {
            ap = aliases2_lookup(buf);
            if (ap == NULL)
                goto invalid;
        }
        if (ap->encoding_index == ei_local_char) {
            tocode = locale_charset();
            if (tocode != NULL)
                continue;
            goto invalid;
        }
        if (ap->encoding_index == ei_local_wchar_t) {
#if __STDC_ISO_10646__
            if (sizeof(wchar_t) == 4) {
                to_index = ei_ucs4internal;
                break;
            }
            if (sizeof(wchar_t) == 2) {
                to_index = ei_ucs2internal;
                break;
            }
            if (sizeof(wchar_t) == 1) {
                to_index = ei_iso8859_1;
                break;
            }
#endif
#if HAVE_MBRTOWC
            to_wchar = 1;
            tocode = locale_charset();
            if (tocode != NULL)
                continue;
#endif
            goto invalid;
        }
        to_index = ap->encoding_index;
        break;
    }
    for (from_wchar = 0;;) {
        /* Search fromcode in the table. */
        for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
            unsigned char c = * (unsigned char *) cp;
            if (c >= 0x80)
                goto invalid;
            if (c >= 'a' && c <= 'z')
                c -= 'a'-'A';
            *bp = c;
            if (c == '\0')
                break;
            if (--count == 0)
                goto invalid;
        }
        if (bp-buf > 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
            bp -= 10;
            *bp = '\0';
        }
        ap = aliases_lookup(buf,bp-buf);
        if (ap == NULL) {
            ap = aliases2_lookup(buf);
            if (ap == NULL)
                goto invalid;
        }
        if (ap->encoding_index == ei_local_char) {
            fromcode = locale_charset();
            if (fromcode != NULL)
                continue;
            goto invalid;
        }
        if (ap->encoding_index == ei_local_wchar_t) {
#if __STDC_ISO_10646__
            if (sizeof(wchar_t) == 4) {
                from_index = ei_ucs4internal;
                break;
            }
            if (sizeof(wchar_t) == 2) {
                from_index = ei_ucs2internal;
                break;
            }
            if (sizeof(wchar_t) == 1) {
                from_index = ei_iso8859_1;
                break;
            }
#endif
#if HAVE_WCRTOMB
            from_wchar = 1;
            fromcode = locale_charset();
            if (fromcode != NULL)
                continue;
#endif
            goto invalid;
        }
        from_index = ap->encoding_index;
        break;
    }
    cd = (struct conv_struct *) malloc(from_wchar != to_wchar
                                       ? sizeof(struct wchar_conv_struct)
                                       : sizeof(struct conv_struct));
    if (cd == NULL) {
        errno = ENOMEM;
        return (iconv_t)(-1);
    }
    cd->iindex = from_index;
    cd->ifuncs = all_encodings[from_index].ifuncs;
    cd->oindex = to_index;
    cd->ofuncs = all_encodings[to_index].ofuncs;
    cd->oflags = all_encodings[to_index].oflags;
    /* Initialize the loop functions. */
#if HAVE_MBRTOWC
    if (to_wchar) {
#if HAVE_WCRTOMB
        if (from_wchar) {
            cd->lfuncs.loop_convert = wchar_id_loop_convert;
            cd->lfuncs.loop_reset = wchar_id_loop_reset;
        } else
#endif
        {
            cd->lfuncs.loop_convert = wchar_to_loop_convert;
            cd->lfuncs.loop_reset = wchar_to_loop_reset;
        }
    } else
#endif
    {
#if HAVE_WCRTOMB
        if (from_wchar) {
            cd->lfuncs.loop_convert = wchar_from_loop_convert;
            cd->lfuncs.loop_reset = wchar_from_loop_reset;
        } else
#endif
        {
            cd->lfuncs.loop_convert = unicode_loop_convert;
            cd->lfuncs.loop_reset = unicode_loop_reset;
        }
    }
    /* Initialize the states. */
    memset(&cd->istate,'\0',sizeof(state_t));
    memset(&cd->ostate,'\0',sizeof(state_t));
    /* Initialize the operation flags. */
    cd->transliterate = transliterate;
    /* Initialize additional fields. */
    if (from_wchar != to_wchar) {
        struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
        memset(&wcd->state,'\0',sizeof(mbstate_t));
    }
    /* Done. */
    return (iconv_t)cd;
invalid:
    errno = EINVAL;
    return (iconv_t)(-1);
}
Example #3
0
const char * iconv_canonicalize (const char * name)
{
  const char* code;
  char buf[MAX_WORD_LENGTH+10+1];
  const char* cp;
  char* bp;
  const struct alias * ap;
  unsigned int count;
  unsigned int index;
  const char* pool;

  /* Before calling aliases_lookup, convert the input string to upper case,
   * and check whether it's entirely ASCII (we call gperf with option "-7"
   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
   * or if it's too long, it is not a valid encoding name.
   */
  for (code = name;;) {
    /* Search code in the table. */
    for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
      unsigned char c = * (unsigned char *) cp;
      if (c >= 0x80)
        goto invalid;
      if (c >= 'a' && c <= 'z')
        c -= 'a'-'A';
      *bp = c;
      if (c == '\0')
        break;
      if (--count == 0)
        goto invalid;
    }
    for (;;) {
      if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
        bp -= 10;
        *bp = '\0';
        continue;
      }
      if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
        bp -= 8;
        *bp = '\0';
        continue;
      }
      break;
    }
    if (buf[0] == '\0') {
      code = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (code[0] == '\0')
        goto invalid;
      continue;
    }
    pool = stringpool;
    ap = aliases_lookup(buf,bp-buf);
    if (ap == NULL) {
      pool = stringpool2;
      ap = aliases2_lookup(buf);
      if (ap == NULL)
        goto invalid;
    }
    if (ap->encoding_index == ei_local_char) {
      code = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (code[0] == '\0')
        goto invalid;
      continue;
    }
    if (ap->encoding_index == ei_local_wchar_t) {
#if __STDC_ISO_10646__
      if (sizeof(wchar_t) == 4) {
        index = ei_ucs4internal;
        break;
      }
      if (sizeof(wchar_t) == 2) {
        index = ei_ucs2internal;
        break;
      }
      if (sizeof(wchar_t) == 1) {
        index = ei_iso8859_1;
        break;
      }
#endif
    }
    index = ap->encoding_index;
    break;
  }
  return all_canonical[index] + pool;
 invalid:
  return name;
}