/* * Decodes a string in RFC 3986 percent-encoded representation. */ int percent_decode(const char *in, size_t ilen, char *out, size_t *olen) { size_t len; for (len = 0; ilen && *in; --ilen, ++in) { if (*in != '%') { if (++len < *olen) *out++ = *in; } else if (ilen >= 3 && is_xdigit(in[1]) && is_xdigit(in[2])) { if (++len < *olen) *out++ = unhex(in[1]) << 4 | unhex(in[2]); in += 2; } else { errno = EINVAL; return (-1); } } if (len < *olen) *out = '\0'; if (len >= *olen) { /* overflow */ *olen = len; errno = ENOSPC; return (-1); } *olen = len; return (0); }
uint32_t convert_string_to_hexadecimal(_IN_ const char *str) { uint32_t value = 0; uint32_t multiplier = 1; for (const char *p = str + strlen(str) - 1; p >= str; p --) { int c = *p; if (!is_xdigit(c)) { FDC_ASSERT(p == str + 1 && c == 'x' && str[0] == '0', str[0], str[1]); break; } if (is_digit(c)) { value += (c - '0') * multiplier; } else { value += (to_lower(c) - 'a' + 10) * multiplier; } multiplier *= 16; } return value; }
static bool number(struct scanner *s, int64_t *out, int *out_tok) { bool is_float = false, is_hex = false; const char *start = s->s + s->pos; char *end; if (lit(s, "0x")) { while (is_xdigit(peek(s))) next(s); is_hex = true; } else { while (is_digit(peek(s))) next(s); is_float = chr(s, '.'); while (is_digit(peek(s))) next(s); } if (s->s + s->pos == start) return false; errno = 0; if (is_hex) *out = strtoul(start, &end, 16); else if (is_float) *out = strtod(start, &end); else *out = strtoul(start, &end, 10); if (errno != 0 || s->s + s->pos != end) *out_tok = ERROR_TOK; else *out_tok = (is_float ? FLOAT : INTEGER); return true; }
unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) { unsigned long result = 0,value; if (!base) { base = 10; if (*cp == '0') { base = 8; cp++; if ((*cp == 'x') && is_xdigit(cp[1])) { cp++; base = 16; } } } while (is_xdigit(*cp) && (value = is_digit(*cp) ? *cp-'0' : (is_lower(*cp) ? toupper(*cp) : *cp)-'A'+10) < base) { result = result*base + value; cp++; } if (endp) *endp = (char *)cp; return result; }
//input: *c=='[' **pc==':' static u16 bracket_class(u8 *c,u8 **pc,u8 **sc,u8 not,u8 sc_folded) { u8 char_class[CHAR_CLASS_MAX+1];//don't forget the 0 terminating char u16 r=bracket_char_class_get(c,pc,not,sc_folded,&char_class[0]); if(r!=OK) return r; if((STREQ(char_class,"alnum")&&is_alnum(**sc)) ||(STREQ(char_class,"alpha")&&is_alpha(**sc)) ||(STREQ(char_class,"blank")&&is_blank(**sc)) ||(STREQ(char_class,"cntrl")&&is_cntrl(**sc)) ||(STREQ(char_class,"digit")&&is_digit(**sc)) ||(STREQ(char_class,"graph")&&is_graph(**sc)) ||(STREQ(char_class,"lower")&&is_lower(**sc)) ||(STREQ(char_class,"print")&&is_print(**sc)) ||(STREQ(char_class,"punct")&&is_punct(**sc)) ||(STREQ(char_class,"space")&&is_space(**sc)) ||(STREQ(char_class,"upper")&&is_upper(**sc)) ||(STREQ(char_class,"xdigit")&&is_xdigit(**sc))) return bracket_matched(c,pc,not); *c=*(*pc)++; return OK; }
static void output_tables (const char *filename, const char *version) { FILE *stream; unsigned int ch; stream = fopen (filename, "w"); if (stream == NULL) { fprintf (stderr, "cannot open '%s' for writing\n", filename); exit (1); } fprintf (stream, "escape_char /\n"); fprintf (stream, "comment_char %%\n"); fprintf (stream, "\n"); fprintf (stream, "%% Generated automatically by gen-unicode-ctype for Unicode %s.\n", version); fprintf (stream, "\n"); fprintf (stream, "LC_IDENTIFICATION\n"); fprintf (stream, "title \"Unicode %s FDCC-set\"\n", version); fprintf (stream, "source \"UnicodeData.txt, PropList.txt\"\n"); fprintf (stream, "address \"\"\n"); fprintf (stream, "contact \"\"\n"); fprintf (stream, "email \"[email protected]\"\n"); fprintf (stream, "tel \"\"\n"); fprintf (stream, "fax \"\"\n"); fprintf (stream, "language \"\"\n"); fprintf (stream, "territory \"Earth\"\n"); fprintf (stream, "revision \"%s\"\n", version); { time_t now; char date[11]; now = time (NULL); strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now)); fprintf (stream, "date \"%s\"\n", date); } fprintf (stream, "category \"unicode:2001\";LC_CTYPE\n"); fprintf (stream, "END LC_IDENTIFICATION\n"); fprintf (stream, "\n"); /* Verifications. */ for (ch = 0; ch < 0x110000; ch++) { /* toupper restriction: "Only characters specified for the keywords lower and upper shall be specified. */ if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch))) fprintf (stderr, "%s is not upper|lower but toupper(0x%04X) = 0x%04X\n", ucs_symbol (ch), ch, to_upper (ch)); /* tolower restriction: "Only characters specified for the keywords lower and upper shall be specified. */ if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch))) fprintf (stderr, "%s is not upper|lower but tolower(0x%04X) = 0x%04X\n", ucs_symbol (ch), ch, to_lower (ch)); /* alpha restriction: "Characters classified as either upper or lower shall automatically belong to this class. */ if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch)) fprintf (stderr, "%s is upper|lower but not alpha\n", ucs_symbol (ch)); /* alpha restriction: "No character specified for the keywords cntrl, digit, punct or space shall be specified." */ if (is_alpha (ch) && is_cntrl (ch)) fprintf (stderr, "%s is alpha and cntrl\n", ucs_symbol (ch)); if (is_alpha (ch) && is_digit (ch)) fprintf (stderr, "%s is alpha and digit\n", ucs_symbol (ch)); if (is_alpha (ch) && is_punct (ch)) fprintf (stderr, "%s is alpha and punct\n", ucs_symbol (ch)); if (is_alpha (ch) && is_space (ch)) fprintf (stderr, "%s is alpha and space\n", ucs_symbol (ch)); /* space restriction: "No character specified for the keywords upper, lower, alpha, digit, graph or xdigit shall be specified." upper, lower, alpha already checked above. */ if (is_space (ch) && is_digit (ch)) fprintf (stderr, "%s is space and digit\n", ucs_symbol (ch)); if (is_space (ch) && is_graph (ch)) fprintf (stderr, "%s is space and graph\n", ucs_symbol (ch)); if (is_space (ch) && is_xdigit (ch)) fprintf (stderr, "%s is space and xdigit\n", ucs_symbol (ch)); /* cntrl restriction: "No character specified for the keywords upper, lower, alpha, digit, punct, graph, print or xdigit shall be specified." upper, lower, alpha already checked above. */ if (is_cntrl (ch) && is_digit (ch)) fprintf (stderr, "%s is cntrl and digit\n", ucs_symbol (ch)); if (is_cntrl (ch) && is_punct (ch)) fprintf (stderr, "%s is cntrl and punct\n", ucs_symbol (ch)); if (is_cntrl (ch) && is_graph (ch)) fprintf (stderr, "%s is cntrl and graph\n", ucs_symbol (ch)); if (is_cntrl (ch) && is_print (ch)) fprintf (stderr, "%s is cntrl and print\n", ucs_symbol (ch)); if (is_cntrl (ch) && is_xdigit (ch)) fprintf (stderr, "%s is cntrl and xdigit\n", ucs_symbol (ch)); /* punct restriction: "No character specified for the keywords upper, lower, alpha, digit, cntrl, xdigit or as the <space> character shall be specified." upper, lower, alpha, cntrl already checked above. */ if (is_punct (ch) && is_digit (ch)) fprintf (stderr, "%s is punct and digit\n", ucs_symbol (ch)); if (is_punct (ch) && is_xdigit (ch)) fprintf (stderr, "%s is punct and xdigit\n", ucs_symbol (ch)); if (is_punct (ch) && (ch == 0x0020)) fprintf (stderr, "%s is punct\n", ucs_symbol (ch)); /* graph restriction: "No character specified for the keyword cntrl shall be specified." Already checked above. */ /* print restriction: "No character specified for the keyword cntrl shall be specified." Already checked above. */ /* graph - print relation: differ only in the <space> character. How is this possible if there are more than one space character?! I think susv2/xbd/locale.html should speak of "space characters", not "space character". */ if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch))) fprintf (stderr, "%s is print but not graph|<space>\n", ucs_symbol (ch)); if (!is_print (ch) && (is_graph (ch) || ch == 0x0020)) fprintf (stderr, "%s is graph|<space> but not print\n", ucs_symbol (ch)); } fprintf (stream, "LC_CTYPE\n"); output_charclass (stream, "upper", is_upper); output_charclass (stream, "lower", is_lower); output_charclass (stream, "alpha", is_alpha); output_charclass (stream, "digit", is_digit); output_charclass (stream, "outdigit", is_outdigit); output_charclass (stream, "blank", is_blank); output_charclass (stream, "space", is_space); output_charclass (stream, "cntrl", is_cntrl); output_charclass (stream, "punct", is_punct); output_charclass (stream, "xdigit", is_xdigit); output_charclass (stream, "graph", is_graph); output_charclass (stream, "print", is_print); output_charclass (stream, "class \"combining\";", is_combining); output_charclass (stream, "class \"combining_level3\";", is_combining_level3); output_charmap (stream, "toupper", to_upper); output_charmap (stream, "tolower", to_lower); output_charmap (stream, "map \"totitle\";", to_title); output_widthmap (stream); fprintf (stream, "END LC_CTYPE\n"); if (ferror (stream) || fclose (stream)) { fprintf (stderr, "error writing to '%s'\n", filename); exit (1); } }
long int strtol(const char *str, char **endptr, int base) { const char *buf = str; long int value = 0; int sign = 1, k = 0; if (base < 2 || base > 36) { return 0; } /* swallow white spaces */ while (*buf == ' ' || *buf == '\t') { ++buf; } /* parse sign if any */ if (*buf == '-') { sign = -1; ++buf; } else if (*buf == '+') { sign = 1; ++buf; } /* parse base */ if (base == 0) { if (*buf == '0') { if (to_lower(*(++buf)) == 'x' && is_xdigit(buf[1])) { ++buf; base = 16; } else { base = 8; } } else { base = 10; } } else if (base == 16 && buf[0] == '0' && to_lower(buf[1]) == 'x') { str += 2; } /* parse alpha-numerical string */ while (is_alnum(*buf)) { if (is_alpha(*buf)) { k = to_lower(*buf) - 'a' + 10; if (k > base) { break; } } else { k = *buf - '0'; } value = value * base + k; ++buf; } if (endptr != NULL) { *endptr = (char *)buf; } return sign * value; }
const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; }
inline bool is_xdigit<unicode::char_t> (unicode::char_t c) { return c.value <= 127 && is_xdigit(static_cast<char>(c.value)); }