cmap_init::cmap_init() { if (initialised) return; initialised = 1; for (int i = 0; i <= UCHAR_MAX; i++) { cmupper.v[i] = ISASCII(i) && islower(i) ? toupper(i) : i; cmlower.v[i] = ISASCII(i) && isupper(i) ? tolower(i) : i; } }
cmap_init::cmap_init() { if (initialised) return; initialised = 1; #ifdef __FreeBSD__ (void) setlocale(LC_CTYPE, ""); #endif for (int i = 0; i <= UCHAR_MAX; i++) { cmupper.v[i] = ISASCII(i) && islower(i) ? toupper(i) : i; cmlower.v[i] = ISASCII(i) && isupper(i) ? tolower(i) : i; } }
static VALUE range_include(VALUE range, SEL sel, VALUE val) { VALUE beg = RANGE_BEG(range); VALUE end = RANGE_END(range); int nv = FIXNUM_P(beg) || FIXNUM_P(end) || rb_obj_is_kind_of(beg, rb_cNumeric) || rb_obj_is_kind_of(end, rb_cNumeric); if (nv || !NIL_P(rb_check_to_integer(beg, "to_int")) || !NIL_P(rb_check_to_integer(end, "to_int"))) { if (r_le(beg, val)) { if (EXCL(range)) { if (r_lt(val, end)) return Qtrue; } else { if (r_le(val, end)) return Qtrue; } } return Qfalse; } else if (TYPE(beg) == T_STRING && TYPE(end) == T_STRING && RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) { if (NIL_P(val)) return Qfalse; if (TYPE(val) == T_STRING) { if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1) return Qfalse; else { char b = RSTRING_PTR(beg)[0]; char e = RSTRING_PTR(end)[0]; char v = RSTRING_PTR(val)[0]; if (ISASCII(b) && ISASCII(e) && ISASCII(v)) { if (b <= v && v < e) return Qtrue; if (!EXCL(range) && v == e) return Qtrue; return Qfalse; } } } } if (sel == NULL) { sel = sel_registerName("include?:"); } return rb_vm_call_super(range, sel, 1, &val); }
/* given euc string. */ int eucscol(const unsigned char *s) { int col = 0; while (*s) { /* end if euc char is a NULL character */ if (ISASCII(*s)) { col += 1; s++; } else switch (*s) { case SS2: col += scrw2; s += (eucw2 +1); break; case SS3: col += scrw3; s += (eucw3 +1); break; default: /* code set 1 */ col += scrw1; s += eucw1; break; } } return (col); }
char *printable(char *string, int replacement) { unsigned char *cp; int ch; /* * XXX Replace invalid UTF8 sequences (too short, over-long encodings, * out-of-range code points, etc). See valid_utf8_string.c. */ cp = (unsigned char *) string; while ((ch = *cp) != 0) { if (ISASCII(ch) && ISPRINT(ch)) { /* ok */ } else if (util_utf8_enable && ch >= 194 && ch <= 254 && cp[1] >= 128 && cp[1] < 192) { /* UTF8; skip the rest of the bytes in the character. */ while (cp[1] >= 128 && cp[1] < 192) cp++; } else { /* Not ASCII and not UTF8. */ *cp = replacement; } cp++; } return (string); }
static VALUE range_include(VALUE range, VALUE val) { VALUE beg = RANGE_BEG(range); VALUE end = RANGE_END(range); int nv = FIXNUM_P(beg) || FIXNUM_P(end) || rb_obj_is_kind_of(beg, rb_cNumeric) || rb_obj_is_kind_of(end, rb_cNumeric); if (nv || !NIL_P(rb_check_to_integer(beg, "to_int")) || !NIL_P(rb_check_to_integer(end, "to_int"))) { if (r_le(beg, val)) { if (EXCL(range)) { if (r_lt(val, end)) return Qtrue; } else { if (r_le(val, end)) return Qtrue; } } return Qfalse; } else if (RB_TYPE_P(beg, T_STRING) && RB_TYPE_P(end, T_STRING) && RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) { if (NIL_P(val)) return Qfalse; if (RB_TYPE_P(val, T_STRING)) { if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1) return Qfalse; else { char b = RSTRING_PTR(beg)[0]; char e = RSTRING_PTR(end)[0]; char v = RSTRING_PTR(val)[0]; if (ISASCII(b) && ISASCII(e) && ISASCII(v)) { if (b <= v && v < e) return Qtrue; if (!EXCL(range) && v == e) return Qtrue; return Qfalse; } } } } /* TODO: ruby_frame->this_func = rb_intern("include?"); */ return rb_call_super(1, &val); }
native_int Encoding::find_non_ascii_index(const uint8_t* start, const uint8_t* end) { uint8_t* p = (uint8_t*) start; while(p < end) { if(!ISASCII(*p)) { return p - start; } ++p; } return -1; }
int allprint(const char *string) { const char *cp; int ch; if (*string == 0) return (0); for (cp = string; (ch = *(unsigned char *) cp) != 0; cp++) if (!ISASCII(ch) || !ISPRINT(ch)) return (0); return (1); }
SymbolTable::Kind SymbolTable::detect_kind(STATE, const Symbol* sym) { std::string str = strings[sym->index()]; size_t size = str.size(); uint8_t* p = reinterpret_cast<uint8_t*>(const_cast<char*>(str.c_str())); Encoding* e = Encoding::from_index(state, encodings[sym->index()]); OnigEncodingType* enc = e->encoding(); // Constants start with A-Z, followed by alphanumeric characters or '_' or // non-ascii character. if(isupper(*p)) { uint8_t* e = p + size; int n = 0, code = 0; for(++p; p < e; p += n) { n = Encoding::precise_mbclen(p, e, enc); if(!ONIGENC_MBCLEN_CHARFOUND_P(n)) { return SymbolTable::eNormal; } n = ONIGENC_MBCLEN_CHARFOUND_LEN(n); code = ONIGENC_MBC_TO_CODE(enc, p, p + n); if(!(ONIGENC_IS_CODE_ALNUM(enc, code) || *p == '_' || !ISASCII(*p))) { return SymbolTable::eNormal; } } return SymbolTable::eConstant; } if(p[0] == '@') { // A class variable begins with @@ if(size > 1 && p[1] == '@') { return SymbolTable::eCVar; } // An instance variable can't start with a digit and can't be just @. if((size == 1) || (size > 1 && ISDIGIT(p[1]))) { return SymbolTable::eNormal; } // An instance variable begins with @ return SymbolTable::eIVar; } // A system variable begins with __ if(size > 2 && p[0] == '_' && p[1] == '_') { return SymbolTable::eSystem; } // Everything else is normal return SymbolTable::eNormal; }
static inline const char * search_nonascii(const char *p, const char *e) { #if SIZEOF_VALUE == 8 # define NONASCII_MASK 0x8080808080808080ULL #elif SIZEOF_VALUE == 4 # define NONASCII_MASK 0x80808080UL #endif #ifdef NONASCII_MASK if ((int)sizeof(VALUE) * 2 < e - p) { const VALUE *s, *t; const VALUE lowbits = sizeof(VALUE) - 1; s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits)); while (p < (const char *)s) { if (!ISASCII(*p)) return p; p++; } t = (const VALUE*)(~lowbits & (VALUE)e); while (s < t) { if (*s & NONASCII_MASK) { t = s; break; } s++; } p = (const char *)t; } #endif while (p < e) { if (!ISASCII(*p)) return p; p++; } return NULL; }
/* * euclen(s,n) returns the code width of the EUC char. * May also be implemented as a macro. */ int euclen(const unsigned char *s) { if (ISASCII(*s)) return (1); else switch (*s) { case SS2: return (eucw2 + 1); /* include SS2 */ case SS3: return (eucw3 + 1); /* include SS3 */ default: /* code set 1 */ return (eucw1); } }
/* * euccol(s) returns the screen column width of the EUC char. */ int euccol(const unsigned char *s) { if (ISASCII(*s)) return (1); else switch (*s) { case SS2: return (scrw2); case SS3: return (scrw3); default: /* code set 1 */ return (scrw1); } }
int is_header(const char *str) { const unsigned char *cp; int state; int c; int len; #define INIT 0 #define IN_CHAR 1 #define IN_CHAR_SPACE 2 #define CU_CHAR_PTR(x) ((const unsigned char *) (x)) /* * XXX RFC 2822 Section 4.5, Obsolete header fields: whitespace may * appear between header label and ":" (see: RFC 822, Section 3.4.2.). */ for (len = 0, state = INIT, cp = CU_CHAR_PTR(str); (c = *cp) != 0; cp++) { switch (c) { default: if (!ISASCII(c) || ISCNTRL(c)) return (0); if (state == INIT) state = IN_CHAR; if (state == IN_CHAR) { len++; continue; } return (0); case ' ': case '\t': if (state == IN_CHAR) state = IN_CHAR_SPACE; if (state == IN_CHAR_SPACE) continue; return (0); case ':': return ((state == IN_CHAR || state == IN_CHAR_SPACE) ? len : 0); } } return (0); }
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc) { unsigned int c, l; if (e <= p) return -1; if (rb_enc_asciicompat(enc)) { c = (unsigned char)*p; if (!ISASCII(c)) return -1; if (len) *len = 1; return c; } l = rb_enc_precise_mbclen(p, e, enc); if (!MBCLEN_CHARFOUND_P(l)) return -1; c = rb_enc_mbc_to_codepoint(p, e, enc); if (!rb_enc_isascii(c, enc)) return -1; if (len) *len = l; return c; }
/* * static int * inet_net_pton_ipv4(src, dst, size) * convert IPv4 network number from presentation to network format. * accepts hex octets, hex strings, decimal octets, and /CIDR. * "size" is in bytes and describes "dst". * return: * number of bits, either imputed classfully or specified with /CIDR, * or -1 if some failure occurred (check errno). ENOENT means it was * not an IPv4 network specification. * note: * network byte order assumed. this means 192.5.5.240/28 has * 0b11110000 in its fourth octet. * note: * On Windows we store the error in the thread errno, not * in the winsock error code. This is to avoid loosing the * actual last winsock error. So use macro ERRNO to fetch the * errno this funtion sets when returning (-1), not SOCKERRNO. * author: * Paul Vixie (ISC), June 1996 */ static int inet_net_pton_ipv4(const char *src, unsigned char *dst, size_t size) { static const char xdigits[] = "0123456789abcdef"; static const char digits[] = "0123456789"; int n, ch, tmp = 0, dirty, bits; const unsigned char *odst = dst; ch = *src++; if (ch == '0' && (src[0] == 'x' || src[0] == 'X') && ISASCII(src[1]) && ISXDIGIT(src[1])) { /* Hexadecimal: Eat nybble string. */ if (!size) goto emsgsize; dirty = 0; src++; /* skip x or X. */ while ((ch = *src++) != '\0' && ISASCII(ch) && ISXDIGIT(ch)) { if (ISUPPER(ch)) ch = tolower(ch); n = aresx_sztosi(strchr(xdigits, ch) - xdigits); if (dirty == 0) tmp = n; else tmp = (tmp << 4) | n; if (++dirty == 2) { if (!size--) goto emsgsize; *dst++ = (unsigned char) tmp; dirty = 0; } } if (dirty) { /* Odd trailing nybble? */ if (!size--) goto emsgsize; *dst++ = (unsigned char) (tmp << 4); } } else if (ISASCII(ch) && ISDIGIT(ch)) { /* Decimal: eat dotted digit string. */ for (;;) { tmp = 0; do { n = aresx_sztosi(strchr(digits, ch) - digits); tmp *= 10; tmp += n; if (tmp > 255) goto enoent; } while ((ch = *src++) != '\0' && ISASCII(ch) && ISDIGIT(ch)); if (!size--) goto emsgsize; *dst++ = (unsigned char) tmp; if (ch == '\0' || ch == '/') break; if (ch != '.') goto enoent; ch = *src++; if (!ISASCII(ch) || !ISDIGIT(ch)) goto enoent; } } else goto enoent; bits = -1; if (ch == '/' && ISASCII(src[0]) && ISDIGIT(src[0]) && dst > odst) { /* CIDR width specifier. Nothing can follow it. */ ch = *src++; /* Skip over the /. */ bits = 0; do { n = aresx_sztosi(strchr(digits, ch) - digits); bits *= 10; bits += n; if (bits > 32) goto enoent; } while ((ch = *src++) != '\0' && ISASCII(ch) && ISDIGIT(ch)); if (ch != '\0') goto enoent; } /* Firey death and destruction unless we prefetched EOS. */ if (ch != '\0') goto enoent; /* If nothing was written to the destination, we found no address. */ if (dst == odst) goto enoent; /* If no CIDR spec was given, infer width from net class. */ if (bits == -1) { if (*odst >= 240) /* Class E */ bits = 32; else if (*odst >= 224) /* Class D */ bits = 8; else if (*odst >= 192) /* Class C */ bits = 24; else if (*odst >= 128) /* Class B */ bits = 16; else /* Class A */ bits = 8; /* If imputed mask is narrower than specified octets, widen. */ if (bits < ((dst - odst) * 8)) bits = aresx_sztosi(dst - odst) * 8; /* * If there are no additional bits specified for a class D * address adjust bits to 4. */ if (bits == 8 && *odst == 224) bits = 4; } /* Extend network to cover the actual mask. */ while (bits > ((dst - odst) * 8)) { if (!size--) goto emsgsize; *dst++ = '\0'; } return (bits); enoent: SET_ERRNO(ENOENT); return (-1); emsgsize: SET_ERRNO(EMSGSIZE); return (-1); }
cset_init::cset_init() { if (initialised) return; initialised = 1; for (int i = 0; i <= UCHAR_MAX; i++) { csalpha.v[i] = ISASCII(i) && isalpha(i); csupper.v[i] = ISASCII(i) && isupper(i); cslower.v[i] = ISASCII(i) && islower(i); csdigit.v[i] = ISASCII(i) && isdigit(i); csxdigit.v[i] = ISASCII(i) && isxdigit(i); csspace.v[i] = ISASCII(i) && isspace(i); cspunct.v[i] = ISASCII(i) && ispunct(i); csalnum.v[i] = ISASCII(i) && isalnum(i); csprint.v[i] = ISASCII(i) && isprint(i); csgraph.v[i] = ISASCII(i) && isgraph(i); cscntrl.v[i] = ISASCII(i) && iscntrl(i); } }
static char *tls_text_name(X509_NAME *name, int nid, const char *label, const TLS_SESS_STATE *TLScontext, int gripe) { const char *myname = "tls_text_name"; int pos; X509_NAME_ENTRY *entry; ASN1_STRING *entry_str; int asn1_type; int utf8_length; unsigned char *utf8_value; int ch; unsigned char *cp; if (name == 0 || (pos = X509_NAME_get_index_by_NID(name, nid, -1)) < 0) { if (gripe != DONT_GRIPE) { msg_warn("%s: %s: peer certificate has no %s", myname, TLScontext->namaddr, label); tls_print_errors(); } return (0); } #if 0 /* * If the match is required unambiguous, insist that that no other values * be present. */ if (X509_NAME_get_index_by_NID(name, nid, pos) >= 0) { msg_warn("%s: %s: multiple %ss in peer certificate", myname, TLScontext->namaddr, label); return (0); } #endif if ((entry = X509_NAME_get_entry(name, pos)) == 0) { /* This should not happen */ msg_warn("%s: %s: error reading peer certificate %s entry", myname, TLScontext->namaddr, label); tls_print_errors(); return (0); } if ((entry_str = X509_NAME_ENTRY_get_data(entry)) == 0) { /* This should not happen */ msg_warn("%s: %s: error reading peer certificate %s data", myname, TLScontext->namaddr, label); tls_print_errors(); return (0); } /* * XXX Convert everything into UTF-8. This is a super-set of ASCII, so we * don't have to bother with separate code paths for ASCII-like content. * If the payload is ASCII then we won't waste lots of CPU cycles * converting it into UTF-8. It's up to OpenSSL to do something * reasonable when converting ASCII formats that contain non-ASCII * content. * * XXX Don't bother optimizing the string length error check. It is not * worth the complexity. */ asn1_type = ASN1_STRING_type(entry_str); if ((utf8_length = ASN1_STRING_to_UTF8(&utf8_value, entry_str)) < 0) { msg_warn("%s: %s: error decoding peer %s of ASN.1 type=%d", myname, TLScontext->namaddr, label, asn1_type); tls_print_errors(); return (0); } /* * No returns without cleaning up. A good optimizer will replace multiple * blocks of identical code by jumps to just one such block. */ #define TLS_TEXT_NAME_RETURN(x) do { \ char *__tls_text_name_temp = (x); \ OPENSSL_free(utf8_value); \ return (__tls_text_name_temp); \ } while (0) /* * Remove trailing null characters. They would give false alarms with the * length check and with the embedded null check. */ #define TRIM0(s, l) do { while ((l) > 0 && (s)[(l)-1] == 0) --(l); } while (0) TRIM0(utf8_value, utf8_length); /* * Enforce the length limit, because the caller will copy the result into * a fixed-length buffer. */ if (utf8_length >= CCERT_BUFSIZ) { msg_warn("%s: %s: peer %s too long: %d", myname, TLScontext->namaddr, label, utf8_length); TLS_TEXT_NAME_RETURN(0); } /* * Reject embedded nulls in ASCII or UTF-8 names. OpenSSL is responsible * for producing properly-formatted UTF-8. */ if (utf8_length != strlen((char *) utf8_value)) { msg_warn("%s: %s: NULL character in peer %s", myname, TLScontext->namaddr, label); TLS_TEXT_NAME_RETURN(0); } /* * Reject non-printable ASCII characters in UTF-8 content. * * Note: the code below does not find control characters in illegal UTF-8 * sequences. It's OpenSSL's job to produce valid UTF-8, and reportedly, * it does validation. */ for (cp = utf8_value; (ch = *cp) != 0; cp++) { if (ISASCII(ch) && !ISPRINT(ch)) { msg_warn("%s: %s: non-printable content in peer %s", myname, TLScontext->namaddr, label); TLS_TEXT_NAME_RETURN(0); } } TLS_TEXT_NAME_RETURN(mystrdup((char *) utf8_value)); }
// TODO: specify this per file type? int is_word_boundary(int c) { return ISASCII(c) && !(('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'); }
cset_init::cset_init() { if (initialised) return; initialised = 1; #ifdef __FreeBSD__ (void) setlocale(LC_CTYPE, ""); #endif for (int i = 0; i <= UCHAR_MAX; i++) { csalpha.v[i] = ISASCII(i) && isalpha(i); csupper.v[i] = ISASCII(i) && isupper(i); cslower.v[i] = ISASCII(i) && islower(i); csdigit.v[i] = ISASCII(i) && isdigit(i); csxdigit.v[i] = ISASCII(i) && isxdigit(i); csspace.v[i] = ISASCII(i) && isspace(i); cspunct.v[i] = ISASCII(i) && ispunct(i); csalnum.v[i] = ISASCII(i) && isalnum(i); csprint.v[i] = ISASCII(i) && isprint(i); csgraph.v[i] = ISASCII(i) && isgraph(i); cscntrl.v[i] = ISASCII(i) && iscntrl(i); } }