Example #1
0
extern int
onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
                             const UChar** pp, const UChar* end, UChar* lower)
{
  int len;
  const UChar *p = *pp;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
      *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
    }
    else {
      *lower = *p;
    }
    (*pp)++;
    return 1;
  }
  else {
    len = enc_len(enc, p);
    if (lower != p) {
      int i;
      for (i = 0; i < len; i++) {
	*lower++ = *p++;
      }
    }
    (*pp) += len;
    return len; /* return byte length of converted to lower char */
  }
}
Example #2
0
static OnigCodePoint
utf8_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
{
  int c, len;
  OnigCodePoint n;

  len = enc_len(ONIG_ENCODING_UTF8, p, end);
  c = *p++;
  if (len > 1) {
    len--;
    n = c & ((1 << (6 - len)) - 1);
    while (len--) {
      c = *p++;
      n = (n << 6) | (c & ((1 << 6) - 1));
    }
    return n;
  }
  else {
#ifdef USE_INVALID_CODE_SCHEME
    if (c > 0xfd) {
      return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
    }
#endif
    return (OnigCodePoint )c;
  }
}
Example #3
0
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
  if (p < s) {
    p += enc_len(enc, *p);
  }
  return p;
}
Example #4
0
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
				   UChar* start, UChar* s, UChar** prev)
{
  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);

  if (p < s) {
    if (prev) *prev = p;
    p += enc_len(enc, *p);
  }
  else {
    if (prev) *prev = (UChar* )NULL; /* Sorry */
  }
  return p;
}
Example #5
0
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{
  int c, i, len;
  OnigCodePoint n;

  len = enc_len(ONIG_ENCODING_EUC_JP, p);
  n = (OnigCodePoint )*p++;
  if (len == 1) return n;

  for (i = 1; i < len; i++) {
    if (p >= end) break;
    c = *p++;
    n <<= 8;  n += c;
  }
  return n;
}
Example #6
0
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
{
  int c, i, len;
  OnigCodePoint n;

  len = enc_len(enc, p);
  n = (OnigCodePoint )(*p++);
  if (len == 1) return n;

  for (i = 1; i < len; i++) {
    if (p >= end) break;
    c = *p++;
    n <<= 8;  n += c;
  }
  return n;
}
Example #7
0
static UChar*
euctw_left_adjust_char_head(const UChar* start, const UChar* s)
{
  /* Assumed in this encoding,
     mb-trail bytes don't mix with single bytes.
  */
  const UChar *p;
  int len;

  if (s <= start) return (UChar* )s;
  p = s;

  while (!euctw_islead(*p) && p > start) p--;
  len = enc_len(ONIG_ENCODING_EUC_TW, p);
  if (p + len > s) return (UChar* )p;
  p += len;
  return (UChar* )(p + ((s - p) & ~1));
}
Example #8
0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
                             const UChar** pp, const UChar* end)
{
  const UChar* p = *pp;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    (*pp)++;
    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
      return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
    }
    else {
      return FALSE;
    }
  }

  (*pp) += enc_len(enc, p);
  return FALSE;
}
Example #9
0
extern int
onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
{
  int len;

  if (ONIGENC_IS_MBC_ASCII(p)) {
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
    return 1;
  }
  else {
    len = enc_len(enc, *p);
    if (lower != p) {
      /* memcpy(lower, p, len); */
      int i;
      for (i = 0; i < len; i++) {
	*lower++ = *p++;
      }
    }
    return len; /* return byte length of converted to lower char */
  }
}
Example #10
0
static UChar*
big5_left_adjust_char_head(const UChar* start, const UChar* s)
{
  const UChar *p;
  int len;

  if (s <= start) return (UChar* )s;
  p = s;

  if (BIG5_ISMB_TRAIL(*p)) {
    while (p > start) {
      if (! BIG5_ISMB_FIRST(*--p)) {
	p++;
	break;
      }
    } 
  }
  len = enc_len(ONIG_ENCODING_BIG5, p);
  if (p + len > s) return (UChar* )p;
  p += len;
  return (UChar* )(p + ((s - p) & ~1));
}