Example #1
0
static int
is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
  const UChar* p = *pp;

  (*pp)++;
  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
       ONIGENC_IS_MBC_ASCII(p)) ||
      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
       !ONIGENC_IS_MBC_ASCII(p))) {
    int v = (EncISO_8859_15_CtypeTable[*p] &
             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));

    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
      /* 0xdf etc.. are lower case letter, but can't convert. */
      if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
        return FALSE;
      else
        return TRUE;
    }

    return (v != 0 ? TRUE : FALSE);
  }
  return FALSE;
}
Example #2
0
static int
iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag,
			    const UChar** pp, const UChar* end)
{
  const UChar* p = *pp;

  (*pp)++;
  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
       ONIGENC_IS_MBC_ASCII(p)) ||
      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
       !ONIGENC_IS_MBC_ASCII(p))) {
    int v = (EncISO_8859_7_CtypeTable[*p] &
             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));

    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
      if (*p == 0xc0 || *p == 0xe0)
        return FALSE;
      else
        return TRUE;
    }

    return (v != 0 ? TRUE : FALSE);
  }
  return FALSE;
}
Example #3
0
static int
iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
			     const UChar** pp, const UChar* end, UChar* lower)
{
  const UChar* p = *pp;

  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
    if ((*p == 's' && *(p+1) == 's') ||
	((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
	 (*p == 'S' && *(p+1) == 'S'))) {
      *lower = 0xdf;
      (*pp) += 2;
      return 1;
    }
  }

  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
       ONIGENC_IS_MBC_ASCII(p)) ||
      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
       !ONIGENC_IS_MBC_ASCII(p))) {
    *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
  }
  else {
    *lower = *p;
  }
  (*pp)++;
  return 1; /* return byte length of converted char to lower */
}
Example #4
0
static int
utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
  const UChar* p = *pp;

  (*pp) += 4;

  if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
    int c, v;

    p += 3;
    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
	 ONIGENC_IS_MBC_ASCII(p)) ||
	((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
	 !ONIGENC_IS_MBC_ASCII(p))) {
      c = *p;
      v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
                       (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
      if ((v | ONIGENC_CTYPE_LOWER) != 0) {
        /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
        if (c >= 0xaa && c <= 0xba)
          return FALSE;
        else
          return TRUE;
      }
      return (v != 0 ? TRUE : FALSE);
    }
  }

  return FALSE;
}
Example #5
0
static int
utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
                         UChar* lower)
{
  const UChar* p = *pp;

  if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
    p += 3;
    if (end > p + 4 &&
        (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
	((*p == 's' && *(p+4) == 's') ||
	((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
	 (*p == 'S' && *(p+4) == 'S'))) &&
        *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
      *lower++ = '\0';
      *lower++ = '\0';
      *lower++ = '\0';
      *lower   = 0xdf;
      (*pp) += 8;
      return 4;
    }

    *lower++ = '\0';
    *lower++ = '\0';
    *lower++ = '\0';
    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
	 ONIGENC_IS_MBC_ASCII(p)) ||
	((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
	 !ONIGENC_IS_MBC_ASCII(p))) {
      *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
    }
    else {
      *lower = *p;
    }

    (*pp) += 4;
    return 4;  /* return byte length of converted char to lower */
  }
  else {
    int len = 4;
    if (lower != p) {
      int i;
      for (i = 0; i < len; i++) {
	*lower++ = *p++;
      }
    }
    (*pp) += len;
    return len; /* return byte length of converted char to lower */
  }
}
Example #6
0
extern int
onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
                             const UChar** pp, const UChar* end, UChar* lower)
{
  int len;
  const UChar *p = *pp;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
      *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
    }
    else {
      *lower = *p;
    }
    (*pp)++;
    return 1;
  }
  else {
    len = enc_len(enc, p);
    if (lower != p) {
      int i;
      for (i = 0; i < len; i++) {
	*lower++ = *p++;
      }
    }
    (*pp) += len;
    return len; /* return byte length of converted to lower char */
  }
}
Example #7
0
extern int
onigenc_mbn_mbc_is_case_ambig(UChar* p)
{
  if (ONIGENC_IS_MBC_ASCII(p))
    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);

  return FALSE;
}
Example #8
0
static int
mbc_to_normalize(OnigAmbigType flag,
		 const UChar** pp, const UChar* end, UChar* lower)
{
  const UChar* p = *pp;

  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
       ONIGENC_IS_MBC_ASCII(p)) ||
      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
       !ONIGENC_IS_MBC_ASCII(p))) {
    *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
  }
  else {
    *lower = *p;
  }
  (*pp)++;
  return 1; /* return byte length of converted char to lower */
}
Example #9
0
static int
iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
			     const UChar** pp, const UChar* end)
{
  const UChar* p = *pp;

  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
    if (end > p + 1) {
      if ((*p == 's' && *(p+1) == 's') ||
	  ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
	   (*p == 'S' && *(p+1) == 'S'))) {
	(*pp) += 2;
	return TRUE;
      }
    }

    if (*p == 0xdf) {
      (*pp)++;
      return TRUE;
    }
  }

  (*pp)++;
  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
       ONIGENC_IS_MBC_ASCII(p)) ||
      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
       !ONIGENC_IS_MBC_ASCII(p))) {
    int v = (EncISO_8859_16_CtypeTable[*p] &
             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));

    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
      /* 0xdf is lower case letter, but can't convert. */
      if (*p == 0xdf)
        return FALSE;
      else
        return TRUE;
    }

    return (v != 0 ? TRUE : FALSE);
  }
  return FALSE;
}
Example #10
0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
                             const UChar** pp, const UChar* end)
{
  const UChar* p = *pp;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    (*pp)++;
    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
      return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
    }
    else {
      return FALSE;
    }
  }

  (*pp) += enc_len(enc, p);
  return FALSE;
}
Example #11
0
extern int
onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
{
  int len;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
    return 1;
  }
  else {
    len = enc_len(enc, *p);
    if (lower != p) {
      /* memcpy(lower, p, len); */
      int i;
      for (i = 0; i < len; i++) {
	*lower++ = *p++;
      }
    }
    return len; /* return byte length of converted to lower char */
  }
}
Example #12
0
static int
mbc_case_fold(OnigCaseFoldType flag,
	      const UChar** pp, const UChar* end, UChar* lower,
	      OnigEncoding enc)
{
  const UChar* p = *pp;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
    (*pp)++;
    return 1;
  }
  else {
    OnigCodePoint code;
    int len;

    code = get_lower_case(mbc_to_code(p, end, enc));
    len = code_to_mbc(code, lower, enc);
    (*pp) += len;
    return len; /* return byte length of converted char to lower */
  }
}
Example #13
0
static int
mbc_case_fold(OnigCaseFoldType flag,
	      const UChar** pp, const UChar* end, UChar* lower,
	      OnigEncoding enc)
{
  const UChar* p = *pp;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
    (*pp)++;
    return 1;
  }
  else {
    int i;
    int len = enclen(enc, p, end);

    for (i = 0; i < len; i++) {
      *lower++ = *p++;
    }
    (*pp) += len;
    return len; /* return byte length of converted char to lower */
  }
}