Example #1
0
cmap_init::cmap_init()
{
  if (initialised)
    return;
  initialised = 1;
  for (int i = 0; i <= UCHAR_MAX; i++) {
    cmupper.v[i] = ISASCII(i) && islower(i) ? toupper(i) : i;
    cmlower.v[i] = ISASCII(i) && isupper(i) ? tolower(i) : i;
  }
}
Example #2
0
cmap_init::cmap_init()
{
  if (initialised)
    return;
  initialised = 1;
#ifdef __FreeBSD__
  (void) setlocale(LC_CTYPE, "");
#endif
  for (int i = 0; i <= UCHAR_MAX; i++) {
    cmupper.v[i] = ISASCII(i) && islower(i) ? toupper(i) : i;
    cmlower.v[i] = ISASCII(i) && isupper(i) ? tolower(i) : i;
  }
}
Example #3
0
static VALUE
range_include(VALUE range, SEL sel, VALUE val)
{
    VALUE beg = RANGE_BEG(range);
    VALUE end = RANGE_END(range);
    int nv = FIXNUM_P(beg) || FIXNUM_P(end) ||
	     rb_obj_is_kind_of(beg, rb_cNumeric) ||
	     rb_obj_is_kind_of(end, rb_cNumeric);

    if (nv ||
	!NIL_P(rb_check_to_integer(beg, "to_int")) ||
	!NIL_P(rb_check_to_integer(end, "to_int"))) {
	if (r_le(beg, val)) {
	    if (EXCL(range)) {
		if (r_lt(val, end))
		    return Qtrue;
	    }
	    else {
		if (r_le(val, end))
		    return Qtrue;
	    }
	}
	return Qfalse;
    }
    else if (TYPE(beg) == T_STRING && TYPE(end) == T_STRING &&
	     RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
	if (NIL_P(val)) return Qfalse;
	if (TYPE(val) == T_STRING) {
	    if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
		return Qfalse;
	    else {
		char b = RSTRING_PTR(beg)[0];
		char e = RSTRING_PTR(end)[0];
		char v = RSTRING_PTR(val)[0];

		if (ISASCII(b) && ISASCII(e) && ISASCII(v)) {
		    if (b <= v && v < e) return Qtrue;
		    if (!EXCL(range) && v == e) return Qtrue;
		    return Qfalse;
		}
	    }
	}
    }
    if (sel == NULL) {
	sel = sel_registerName("include?:");
    }
    return rb_vm_call_super(range, sel, 1, &val);
}
Example #4
0
/* given euc string.						*/
int
eucscol(const unsigned char *s)

{
	int	col = 0;

	while (*s) { /* end if euc char is a NULL character */
		if (ISASCII(*s)) {
			col += 1;
			s++;
		}
		else
			switch (*s) {
			case SS2:
				col += scrw2;
				s += (eucw2 +1);
				break;
			case SS3:
				col += scrw3;
				s += (eucw3 +1);
				break;
			default:	/* code set 1 */
				col += scrw1;
				s += eucw1;
				break;
			}

	}
	return (col);
}
Example #5
0
char   *printable(char *string, int replacement)
{
    unsigned char *cp;
    int     ch;

    /*
     * XXX Replace invalid UTF8 sequences (too short, over-long encodings,
     * out-of-range code points, etc). See valid_utf8_string.c.
     */
    cp = (unsigned char *) string;
    while ((ch = *cp) != 0) {
	if (ISASCII(ch) && ISPRINT(ch)) {
	    /* ok */
	} else if (util_utf8_enable && ch >= 194 && ch <= 254
		   && cp[1] >= 128 && cp[1] < 192) {
	    /* UTF8; skip the rest of the bytes in the character. */
	    while (cp[1] >= 128 && cp[1] < 192)
		cp++;
	} else {
	    /* Not ASCII and not UTF8. */
	    *cp = replacement;
	}
	cp++;
    }
    return (string);
}
Example #6
0
static VALUE
range_include(VALUE range, VALUE val)
{
    VALUE beg = RANGE_BEG(range);
    VALUE end = RANGE_END(range);
    int nv = FIXNUM_P(beg) || FIXNUM_P(end) ||
	     rb_obj_is_kind_of(beg, rb_cNumeric) ||
	     rb_obj_is_kind_of(end, rb_cNumeric);

    if (nv ||
	!NIL_P(rb_check_to_integer(beg, "to_int")) ||
	!NIL_P(rb_check_to_integer(end, "to_int"))) {
	if (r_le(beg, val)) {
	    if (EXCL(range)) {
		if (r_lt(val, end))
		    return Qtrue;
	    }
	    else {
		if (r_le(val, end))
		    return Qtrue;
	    }
	}
	return Qfalse;
    }
    else if (RB_TYPE_P(beg, T_STRING) && RB_TYPE_P(end, T_STRING) &&
	     RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
	if (NIL_P(val)) return Qfalse;
	if (RB_TYPE_P(val, T_STRING)) {
	    if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
		return Qfalse;
	    else {
		char b = RSTRING_PTR(beg)[0];
		char e = RSTRING_PTR(end)[0];
		char v = RSTRING_PTR(val)[0];

		if (ISASCII(b) && ISASCII(e) && ISASCII(v)) {
		    if (b <= v && v < e) return Qtrue;
		    if (!EXCL(range) && v == e) return Qtrue;
		    return Qfalse;
		}
	    }
	}
    }
    /* TODO: ruby_frame->this_func = rb_intern("include?"); */
    return rb_call_super(1, &val);
}
Example #7
0
 native_int Encoding::find_non_ascii_index(const uint8_t* start, const uint8_t* end) {
   uint8_t* p = (uint8_t*) start;
   while(p < end) {
     if(!ISASCII(*p)) {
       return p - start;
     }
     ++p;
   }
   return -1;
 }
Example #8
0
int     allprint(const char *string)
{
    const char *cp;
    int     ch;

    if (*string == 0)
	return (0);
    for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++)
	if (!ISASCII(ch) || !ISPRINT(ch))
	    return (0);
    return (1);
}
Example #9
0
  SymbolTable::Kind SymbolTable::detect_kind(STATE, const Symbol* sym) {
    std::string str = strings[sym->index()];
    size_t size = str.size();
    uint8_t* p = reinterpret_cast<uint8_t*>(const_cast<char*>(str.c_str()));

    Encoding* e = Encoding::from_index(state, encodings[sym->index()]);
    OnigEncodingType* enc = e->encoding();

    // Constants start with A-Z, followed by alphanumeric characters or '_' or
    // non-ascii character.
    if(isupper(*p)) {
      uint8_t* e = p + size;
      int n = 0, code = 0;

      for(++p; p < e; p += n) {
        n = Encoding::precise_mbclen(p, e, enc);
        if(!ONIGENC_MBCLEN_CHARFOUND_P(n)) {
          return SymbolTable::eNormal;
        }

        n = ONIGENC_MBCLEN_CHARFOUND_LEN(n);
        code = ONIGENC_MBC_TO_CODE(enc, p, p + n);
        if(!(ONIGENC_IS_CODE_ALNUM(enc, code) || *p == '_' || !ISASCII(*p))) {
          return SymbolTable::eNormal;
        }
      }

      return SymbolTable::eConstant;
    }

    if(p[0] == '@') {
      // A class variable begins with @@
      if(size > 1 && p[1] == '@') {
        return SymbolTable::eCVar;
      }

      // An instance variable can't start with a digit and can't be just @.
      if((size == 1) || (size > 1 && ISDIGIT(p[1]))) {
        return SymbolTable::eNormal;
      }

      // An instance variable begins with @
      return SymbolTable::eIVar;
    }

    // A system variable begins with __
    if(size > 2 && p[0] == '_' && p[1] == '_') {
      return SymbolTable::eSystem;
    }

    // Everything else is normal
    return SymbolTable::eNormal;
  }
Example #10
0
static inline const char *
search_nonascii(const char *p, const char *e)
{
#if SIZEOF_VALUE == 8
# define NONASCII_MASK 0x8080808080808080ULL
#elif SIZEOF_VALUE == 4
# define NONASCII_MASK 0x80808080UL
#endif
#ifdef NONASCII_MASK
    if ((int)sizeof(VALUE) * 2 < e - p) {
        const VALUE *s, *t;
        const VALUE lowbits = sizeof(VALUE) - 1;
        s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
        while (p < (const char *)s) {
            if (!ISASCII(*p))
                return p;
            p++;
        }
        t = (const VALUE*)(~lowbits & (VALUE)e);
        while (s < t) {
            if (*s & NONASCII_MASK) {
                t = s;
                break;
            }
            s++;
        }
        p = (const char *)t;
    }
#endif
    while (p < e) {
        if (!ISASCII(*p))
            return p;
        p++;
    }
    return NULL;
}
Example #11
0
/*
 * euclen(s,n) returns the code width of the  EUC char.
 * May also be implemented as a macro.
 */
int
euclen(const unsigned char *s)
{

	if (ISASCII(*s))
		return (1);
	else
		switch (*s) {
		case SS2:
			return (eucw2 + 1); /* include SS2 */
		case SS3:
			return (eucw3 + 1); /* include SS3 */
		default: /* code set 1 */
			return (eucw1);
		}
}
Example #12
0
/*
 * euccol(s) returns the screen column width of the EUC char.
 */
int
euccol(const unsigned char *s)
{

	if (ISASCII(*s))
		return (1);
	else
		switch (*s) {
		case SS2:
			return (scrw2);
		case SS3:
			return (scrw3);
		default: /* code set 1 */
			return (scrw1);
		}
}
Example #13
0
int     is_header(const char *str)
{
    const unsigned char *cp;
    int     state;
    int     c;
    int     len;

#define INIT		0
#define IN_CHAR		1
#define IN_CHAR_SPACE	2
#define CU_CHAR_PTR(x)	((const unsigned char *) (x))

    /*
     * XXX RFC 2822 Section 4.5, Obsolete header fields: whitespace may
     * appear between header label and ":" (see: RFC 822, Section 3.4.2.).
     */
    for (len = 0, state = INIT, cp = CU_CHAR_PTR(str); (c = *cp) != 0; cp++) {
        switch (c) {
        default:
            if (!ISASCII(c) || ISCNTRL(c))
                return (0);
            if (state == INIT)
                state = IN_CHAR;
            if (state == IN_CHAR) {
                len++;
                continue;
            }
            return (0);
        case ' ':
        case '\t':
            if (state == IN_CHAR)
                state = IN_CHAR_SPACE;
            if (state == IN_CHAR_SPACE)
                continue;
            return (0);
        case ':':
            return ((state == IN_CHAR || state == IN_CHAR_SPACE) ? len : 0);
        }
    }
    return (0);
}
Example #14
0
File: encoding.c Project: 217/ruby
int
rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
{
    unsigned int c, l;
    if (e <= p)
        return -1;
    if (rb_enc_asciicompat(enc)) {
        c = (unsigned char)*p;
        if (!ISASCII(c))
            return -1;
        if (len) *len = 1;
        return c;
    }
    l = rb_enc_precise_mbclen(p, e, enc);
    if (!MBCLEN_CHARFOUND_P(l))
        return -1;
    c = rb_enc_mbc_to_codepoint(p, e, enc);
    if (!rb_enc_isascii(c, enc))
        return -1;
    if (len) *len = l;
    return c;
}
Example #15
0
/*
 * static int
 * inet_net_pton_ipv4(src, dst, size)
 *      convert IPv4 network number from presentation to network format.
 *      accepts hex octets, hex strings, decimal octets, and /CIDR.
 *      "size" is in bytes and describes "dst".
 * return:
 *      number of bits, either imputed classfully or specified with /CIDR,
 *      or -1 if some failure occurred (check errno).  ENOENT means it was
 *      not an IPv4 network specification.
 * note:
 *      network byte order assumed.  this means 192.5.5.240/28 has
 *      0b11110000 in its fourth octet.
 * note:
 *      On Windows we store the error in the thread errno, not
 *      in the winsock error code. This is to avoid loosing the
 *      actual last winsock error. So use macro ERRNO to fetch the
 *      errno this funtion sets when returning (-1), not SOCKERRNO.
 * author:
 *      Paul Vixie (ISC), June 1996
 */
static int
inet_net_pton_ipv4(const char *src, unsigned char *dst, size_t size)
{
  static const char xdigits[] = "0123456789abcdef";
  static const char digits[] = "0123456789";
  int n, ch, tmp = 0, dirty, bits;
  const unsigned char *odst = dst;

  ch = *src++;
  if (ch == '0' && (src[0] == 'x' || src[0] == 'X')
      && ISASCII(src[1])
      && ISXDIGIT(src[1])) {
    /* Hexadecimal: Eat nybble string. */
    if (!size)
      goto emsgsize;
    dirty = 0;
    src++;  /* skip x or X. */
    while ((ch = *src++) != '\0' && ISASCII(ch) && ISXDIGIT(ch)) {
      if (ISUPPER(ch))
        ch = tolower(ch);
      n = aresx_sztosi(strchr(xdigits, ch) - xdigits);
      if (dirty == 0)
        tmp = n;
      else
        tmp = (tmp << 4) | n;
      if (++dirty == 2) {
        if (!size--)
          goto emsgsize;
        *dst++ = (unsigned char) tmp;
        dirty = 0;
      }
    }
    if (dirty) {  /* Odd trailing nybble? */
      if (!size--)
        goto emsgsize;
      *dst++ = (unsigned char) (tmp << 4);
    }
  } else if (ISASCII(ch) && ISDIGIT(ch)) {
    /* Decimal: eat dotted digit string. */
    for (;;) {
      tmp = 0;
      do {
        n = aresx_sztosi(strchr(digits, ch) - digits);
        tmp *= 10;
        tmp += n;
        if (tmp > 255)
          goto enoent;
      } while ((ch = *src++) != '\0' &&
               ISASCII(ch) && ISDIGIT(ch));
      if (!size--)
        goto emsgsize;
      *dst++ = (unsigned char) tmp;
      if (ch == '\0' || ch == '/')
        break;
      if (ch != '.')
        goto enoent;
      ch = *src++;
      if (!ISASCII(ch) || !ISDIGIT(ch))
        goto enoent;
    }
  } else
    goto enoent;

  bits = -1;
  if (ch == '/' && ISASCII(src[0]) &&
      ISDIGIT(src[0]) && dst > odst) {
    /* CIDR width specifier.  Nothing can follow it. */
    ch = *src++;    /* Skip over the /. */
    bits = 0;
    do {
      n = aresx_sztosi(strchr(digits, ch) - digits);
      bits *= 10;
      bits += n;
      if (bits > 32)
        goto enoent;
    } while ((ch = *src++) != '\0' && ISASCII(ch) && ISDIGIT(ch));
    if (ch != '\0')
      goto enoent;
  }

  /* Firey death and destruction unless we prefetched EOS. */
  if (ch != '\0')
    goto enoent;

  /* If nothing was written to the destination, we found no address. */
  if (dst == odst)
    goto enoent;
  /* If no CIDR spec was given, infer width from net class. */
  if (bits == -1) {
    if (*odst >= 240)       /* Class E */
      bits = 32;
    else if (*odst >= 224)  /* Class D */
      bits = 8;
    else if (*odst >= 192)  /* Class C */
      bits = 24;
    else if (*odst >= 128)  /* Class B */
      bits = 16;
    else                    /* Class A */
      bits = 8;
    /* If imputed mask is narrower than specified octets, widen. */
    if (bits < ((dst - odst) * 8))
      bits = aresx_sztosi(dst - odst) * 8;
    /*
     * If there are no additional bits specified for a class D
     * address adjust bits to 4.
     */
    if (bits == 8 && *odst == 224)
      bits = 4;
  }
  /* Extend network to cover the actual mask. */
  while (bits > ((dst - odst) * 8)) {
    if (!size--)
      goto emsgsize;
    *dst++ = '\0';
  }
  return (bits);

  enoent:
  SET_ERRNO(ENOENT);
  return (-1);

  emsgsize:
  SET_ERRNO(EMSGSIZE);
  return (-1);
}
Example #16
0
cset_init::cset_init()
{
  if (initialised)
    return;
  initialised = 1;
  for (int i = 0; i <= UCHAR_MAX; i++) {
    csalpha.v[i] = ISASCII(i) && isalpha(i);
    csupper.v[i] = ISASCII(i) && isupper(i);
    cslower.v[i] = ISASCII(i) && islower(i);
    csdigit.v[i] = ISASCII(i) && isdigit(i);
    csxdigit.v[i] = ISASCII(i) && isxdigit(i);
    csspace.v[i] = ISASCII(i) && isspace(i);
    cspunct.v[i] = ISASCII(i) && ispunct(i);
    csalnum.v[i] = ISASCII(i) && isalnum(i);
    csprint.v[i] = ISASCII(i) && isprint(i);
    csgraph.v[i] = ISASCII(i) && isgraph(i);
    cscntrl.v[i] = ISASCII(i) && iscntrl(i);
  }
}
Example #17
0
static char *tls_text_name(X509_NAME *name, int nid, const char *label,
			        const TLS_SESS_STATE *TLScontext, int gripe)
{
    const char *myname = "tls_text_name";
    int     pos;
    X509_NAME_ENTRY *entry;
    ASN1_STRING *entry_str;
    int     asn1_type;
    int     utf8_length;
    unsigned char *utf8_value;
    int     ch;
    unsigned char *cp;

    if (name == 0 || (pos = X509_NAME_get_index_by_NID(name, nid, -1)) < 0) {
	if (gripe != DONT_GRIPE) {
	    msg_warn("%s: %s: peer certificate has no %s",
		     myname, TLScontext->namaddr, label);
	    tls_print_errors();
	}
	return (0);
    }
#if 0

    /*
     * If the match is required unambiguous, insist that that no other values
     * be present.
     */
    if (X509_NAME_get_index_by_NID(name, nid, pos) >= 0) {
	msg_warn("%s: %s: multiple %ss in peer certificate",
		 myname, TLScontext->namaddr, label);
	return (0);
    }
#endif

    if ((entry = X509_NAME_get_entry(name, pos)) == 0) {
	/* This should not happen */
	msg_warn("%s: %s: error reading peer certificate %s entry",
		 myname, TLScontext->namaddr, label);
	tls_print_errors();
	return (0);
    }
    if ((entry_str = X509_NAME_ENTRY_get_data(entry)) == 0) {
	/* This should not happen */
	msg_warn("%s: %s: error reading peer certificate %s data",
		 myname, TLScontext->namaddr, label);
	tls_print_errors();
	return (0);
    }

    /*
     * XXX Convert everything into UTF-8. This is a super-set of ASCII, so we
     * don't have to bother with separate code paths for ASCII-like content.
     * If the payload is ASCII then we won't waste lots of CPU cycles
     * converting it into UTF-8. It's up to OpenSSL to do something
     * reasonable when converting ASCII formats that contain non-ASCII
     * content.
     * 
     * XXX Don't bother optimizing the string length error check. It is not
     * worth the complexity.
     */
    asn1_type = ASN1_STRING_type(entry_str);
    if ((utf8_length = ASN1_STRING_to_UTF8(&utf8_value, entry_str)) < 0) {
	msg_warn("%s: %s: error decoding peer %s of ASN.1 type=%d",
		 myname, TLScontext->namaddr, label, asn1_type);
	tls_print_errors();
	return (0);
    }

    /*
     * No returns without cleaning up. A good optimizer will replace multiple
     * blocks of identical code by jumps to just one such block.
     */
#define TLS_TEXT_NAME_RETURN(x) do { \
	char *__tls_text_name_temp = (x); \
	OPENSSL_free(utf8_value); \
	return (__tls_text_name_temp); \
    } while (0)

    /*
     * Remove trailing null characters. They would give false alarms with the
     * length check and with the embedded null check.
     */
#define TRIM0(s, l) do { while ((l) > 0 && (s)[(l)-1] == 0) --(l); } while (0)

    TRIM0(utf8_value, utf8_length);

    /*
     * Enforce the length limit, because the caller will copy the result into
     * a fixed-length buffer.
     */
    if (utf8_length >= CCERT_BUFSIZ) {
	msg_warn("%s: %s: peer %s too long: %d",
		 myname, TLScontext->namaddr, label, utf8_length);
	TLS_TEXT_NAME_RETURN(0);
    }

    /*
     * Reject embedded nulls in ASCII or UTF-8 names. OpenSSL is responsible
     * for producing properly-formatted UTF-8.
     */
    if (utf8_length != strlen((char *) utf8_value)) {
	msg_warn("%s: %s: NULL character in peer %s",
		 myname, TLScontext->namaddr, label);
	TLS_TEXT_NAME_RETURN(0);
    }

    /*
     * Reject non-printable ASCII characters in UTF-8 content.
     * 
     * Note: the code below does not find control characters in illegal UTF-8
     * sequences. It's OpenSSL's job to produce valid UTF-8, and reportedly,
     * it does validation.
     */
    for (cp = utf8_value; (ch = *cp) != 0; cp++) {
	if (ISASCII(ch) && !ISPRINT(ch)) {
	    msg_warn("%s: %s: non-printable content in peer %s",
		     myname, TLScontext->namaddr, label);
	    TLS_TEXT_NAME_RETURN(0);
	}
    }
    TLS_TEXT_NAME_RETURN(mystrdup((char *) utf8_value));
}
Example #18
0
// TODO: specify this per file type?
int is_word_boundary(int c) {
	return ISASCII(c) && !(('0' <= c && c <= '9') ||
	         ('a' <= c && c <= 'z') ||
	         ('A' <= c && c <= 'Z') || c == '_');
}
Example #19
0
cset_init::cset_init()
{
  if (initialised)
    return;
  initialised = 1;
#ifdef __FreeBSD__
  (void) setlocale(LC_CTYPE, "");
#endif
  for (int i = 0; i <= UCHAR_MAX; i++) {
    csalpha.v[i] = ISASCII(i) && isalpha(i);
    csupper.v[i] = ISASCII(i) && isupper(i);
    cslower.v[i] = ISASCII(i) && islower(i);
    csdigit.v[i] = ISASCII(i) && isdigit(i);
    csxdigit.v[i] = ISASCII(i) && isxdigit(i);
    csspace.v[i] = ISASCII(i) && isspace(i);
    cspunct.v[i] = ISASCII(i) && ispunct(i);
    csalnum.v[i] = ISASCII(i) && isalnum(i);
    csprint.v[i] = ISASCII(i) && isprint(i);
    csgraph.v[i] = ISASCII(i) && isgraph(i);
    cscntrl.v[i] = ISASCII(i) && iscntrl(i);
  }
}