Beispiel #1
0
/*
 * Decodes a string in RFC 3986 percent-encoded representation.
 */
int
percent_decode(const char *in, size_t ilen, char *out, size_t *olen)
{
	size_t len;

	for (len = 0; ilen && *in; --ilen, ++in) {
		if (*in != '%') {
			if (++len < *olen)
				*out++ = *in;
		} else if (ilen >= 3 && is_xdigit(in[1]) && is_xdigit(in[2])) {
			if (++len < *olen)
				*out++ = unhex(in[1]) << 4 | unhex(in[2]);
			in += 2;
		} else {
			errno = EINVAL;
			return (-1);
		}
	}
	if (len < *olen)
		*out = '\0';
	if (len >= *olen) {
		/* overflow */
		*olen = len;
		errno = ENOSPC;
		return (-1);
	}
	*olen = len;
	return (0);
}
Beispiel #2
0
uint32_t
convert_string_to_hexadecimal(_IN_ const char *str)
{
    uint32_t value = 0;
    uint32_t multiplier = 1;

    for (const char *p = str + strlen(str) - 1; p >= str; p --) {
	int c = *p;

	if (!is_xdigit(c)) {
	    FDC_ASSERT(p == str + 1 && c == 'x' && str[0] == '0',
		       str[0], str[1]);
	    break;
	}

	if (is_digit(c)) {
	    value += (c - '0') * multiplier;
	} else {
	    value += (to_lower(c) - 'a' + 10) * multiplier;
	}

	multiplier *= 16;
    }

    return value;
}
Beispiel #3
0
static bool
number(struct scanner *s, int64_t *out, int *out_tok)
{
    bool is_float = false, is_hex = false;
    const char *start = s->s + s->pos;
    char *end;

    if (lit(s, "0x")) {
        while (is_xdigit(peek(s))) next(s);
        is_hex = true;
    }
    else {
        while (is_digit(peek(s))) next(s);
        is_float = chr(s, '.');
        while (is_digit(peek(s))) next(s);
    }
    if (s->s + s->pos == start)
        return false;

    errno = 0;
    if (is_hex)
        *out = strtoul(start, &end, 16);
    else if (is_float)
        *out = strtod(start, &end);
    else
        *out = strtoul(start, &end, 10);
    if (errno != 0 || s->s + s->pos != end)
        *out_tok = ERROR_TOK;
    else
        *out_tok = (is_float ? FLOAT : INTEGER);
    return true;
}
Beispiel #4
0
unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
{
	unsigned long result = 0,value;

	if (!base) {
		base = 10;
		if (*cp == '0') {
			base = 8;
			cp++;
			if ((*cp == 'x') && is_xdigit(cp[1])) {
				cp++;
				base = 16;
			}
		}
	}
	while (is_xdigit(*cp) && (value = is_digit(*cp) ? *cp-'0' : (is_lower(*cp)
	    ? toupper(*cp) : *cp)-'A'+10) < base) {
		result = result*base + value;
		cp++;
	}
	if (endp)
		*endp = (char *)cp;
	return result;
}
Beispiel #5
0
//input: *c=='[' **pc==':'
static u16 bracket_class(u8 *c,u8 **pc,u8 **sc,u8 not,u8 sc_folded)
{
  u8 char_class[CHAR_CLASS_MAX+1];//don't forget the 0 terminating char

  u16 r=bracket_char_class_get(c,pc,not,sc_folded,&char_class[0]);
  if(r!=OK) return r;

  if((STREQ(char_class,"alnum")&&is_alnum(**sc))
     ||(STREQ(char_class,"alpha")&&is_alpha(**sc))
     ||(STREQ(char_class,"blank")&&is_blank(**sc))
     ||(STREQ(char_class,"cntrl")&&is_cntrl(**sc))
     ||(STREQ(char_class,"digit")&&is_digit(**sc))
     ||(STREQ(char_class,"graph")&&is_graph(**sc))
     ||(STREQ(char_class,"lower")&&is_lower(**sc))
     ||(STREQ(char_class,"print")&&is_print(**sc))
     ||(STREQ(char_class,"punct")&&is_punct(**sc))
     ||(STREQ(char_class,"space")&&is_space(**sc))
     ||(STREQ(char_class,"upper")&&is_upper(**sc))
     ||(STREQ(char_class,"xdigit")&&is_xdigit(**sc)))
    return bracket_matched(c,pc,not);
  *c=*(*pc)++;
  return OK;
}
Beispiel #6
0
static void
output_tables (const char *filename, const char *version)
{
  FILE *stream;
  unsigned int ch;

  stream = fopen (filename, "w");
  if (stream == NULL)
    {
      fprintf (stderr, "cannot open '%s' for writing\n", filename);
      exit (1);
    }

  fprintf (stream, "escape_char /\n");
  fprintf (stream, "comment_char %%\n");
  fprintf (stream, "\n");
  fprintf (stream, "%% Generated automatically by gen-unicode-ctype for Unicode %s.\n",
	   version);
  fprintf (stream, "\n");

  fprintf (stream, "LC_IDENTIFICATION\n");
  fprintf (stream, "title     \"Unicode %s FDCC-set\"\n", version);
  fprintf (stream, "source    \"UnicodeData.txt, PropList.txt\"\n");
  fprintf (stream, "address   \"\"\n");
  fprintf (stream, "contact   \"\"\n");
  fprintf (stream, "email     \"[email protected]\"\n");
  fprintf (stream, "tel       \"\"\n");
  fprintf (stream, "fax       \"\"\n");
  fprintf (stream, "language  \"\"\n");
  fprintf (stream, "territory \"Earth\"\n");
  fprintf (stream, "revision  \"%s\"\n", version);
  {
    time_t now;
    char date[11];
    now = time (NULL);
    strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now));
    fprintf (stream, "date      \"%s\"\n", date);
  }
  fprintf (stream, "category  \"unicode:2001\";LC_CTYPE\n");
  fprintf (stream, "END LC_IDENTIFICATION\n");
  fprintf (stream, "\n");

  /* Verifications. */
  for (ch = 0; ch < 0x110000; ch++)
    {
      /* toupper restriction: "Only characters specified for the keywords
	 lower and upper shall be specified.  */
      if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch)))
	fprintf (stderr,
		 "%s is not upper|lower but toupper(0x%04X) = 0x%04X\n",
		 ucs_symbol (ch), ch, to_upper (ch));

      /* tolower restriction: "Only characters specified for the keywords
	 lower and upper shall be specified.  */
      if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch)))
	fprintf (stderr,
		 "%s is not upper|lower but tolower(0x%04X) = 0x%04X\n",
		 ucs_symbol (ch), ch, to_lower (ch));

      /* alpha restriction: "Characters classified as either upper or lower
	 shall automatically belong to this class.  */
      if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch))
	fprintf (stderr, "%s is upper|lower but not alpha\n", ucs_symbol (ch));

      /* alpha restriction: "No character specified for the keywords cntrl,
	 digit, punct or space shall be specified."  */
      if (is_alpha (ch) && is_cntrl (ch))
	fprintf (stderr, "%s is alpha and cntrl\n", ucs_symbol (ch));
      if (is_alpha (ch) && is_digit (ch))
	fprintf (stderr, "%s is alpha and digit\n", ucs_symbol (ch));
      if (is_alpha (ch) && is_punct (ch))
	fprintf (stderr, "%s is alpha and punct\n", ucs_symbol (ch));
      if (is_alpha (ch) && is_space (ch))
	fprintf (stderr, "%s is alpha and space\n", ucs_symbol (ch));

      /* space restriction: "No character specified for the keywords upper,
	 lower, alpha, digit, graph or xdigit shall be specified."
	 upper, lower, alpha already checked above.  */
      if (is_space (ch) && is_digit (ch))
	fprintf (stderr, "%s is space and digit\n", ucs_symbol (ch));
      if (is_space (ch) && is_graph (ch))
	fprintf (stderr, "%s is space and graph\n", ucs_symbol (ch));
      if (is_space (ch) && is_xdigit (ch))
	fprintf (stderr, "%s is space and xdigit\n", ucs_symbol (ch));

      /* cntrl restriction: "No character specified for the keywords upper,
	 lower, alpha, digit, punct, graph, print or xdigit shall be
	 specified."  upper, lower, alpha already checked above.  */
      if (is_cntrl (ch) && is_digit (ch))
	fprintf (stderr, "%s is cntrl and digit\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_punct (ch))
	fprintf (stderr, "%s is cntrl and punct\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_graph (ch))
	fprintf (stderr, "%s is cntrl and graph\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_print (ch))
	fprintf (stderr, "%s is cntrl and print\n", ucs_symbol (ch));
      if (is_cntrl (ch) && is_xdigit (ch))
	fprintf (stderr, "%s is cntrl and xdigit\n", ucs_symbol (ch));

      /* punct restriction: "No character specified for the keywords upper,
	 lower, alpha, digit, cntrl, xdigit or as the <space> character shall
	 be specified."  upper, lower, alpha, cntrl already checked above.  */
      if (is_punct (ch) && is_digit (ch))
	fprintf (stderr, "%s is punct and digit\n", ucs_symbol (ch));
      if (is_punct (ch) && is_xdigit (ch))
	fprintf (stderr, "%s is punct and xdigit\n", ucs_symbol (ch));
      if (is_punct (ch) && (ch == 0x0020))
	fprintf (stderr, "%s is punct\n", ucs_symbol (ch));

      /* graph restriction: "No character specified for the keyword cntrl
	 shall be specified."  Already checked above.  */

      /* print restriction: "No character specified for the keyword cntrl
	 shall be specified."  Already checked above.  */

      /* graph - print relation: differ only in the <space> character.
	 How is this possible if there are more than one space character?!
	 I think susv2/xbd/locale.html should speak of "space characters",
	 not "space character".  */
      if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch)))
	fprintf (stderr,
		 "%s is print but not graph|<space>\n", ucs_symbol (ch));
      if (!is_print (ch) && (is_graph (ch) || ch == 0x0020))
	fprintf (stderr,
		 "%s is graph|<space> but not print\n", ucs_symbol (ch));
    }

  fprintf (stream, "LC_CTYPE\n");
  output_charclass (stream, "upper", is_upper);
  output_charclass (stream, "lower", is_lower);
  output_charclass (stream, "alpha", is_alpha);
  output_charclass (stream, "digit", is_digit);
  output_charclass (stream, "outdigit", is_outdigit);
  output_charclass (stream, "blank", is_blank);
  output_charclass (stream, "space", is_space);
  output_charclass (stream, "cntrl", is_cntrl);
  output_charclass (stream, "punct", is_punct);
  output_charclass (stream, "xdigit", is_xdigit);
  output_charclass (stream, "graph", is_graph);
  output_charclass (stream, "print", is_print);
  output_charclass (stream, "class \"combining\";", is_combining);
  output_charclass (stream, "class \"combining_level3\";", is_combining_level3);
  output_charmap (stream, "toupper", to_upper);
  output_charmap (stream, "tolower", to_lower);
  output_charmap (stream, "map \"totitle\";", to_title);
  output_widthmap (stream);
  fprintf (stream, "END LC_CTYPE\n");

  if (ferror (stream) || fclose (stream))
    {
      fprintf (stderr, "error writing to '%s'\n", filename);
      exit (1);
    }
}
Beispiel #7
0
long int strtol(const char *str, char **endptr, int base)
{
  const char *buf = str;
  long int value = 0;
  int sign = 1, k = 0;

  if (base < 2 || base > 36)
  {
    return 0;
  }

  /* swallow white spaces */
  while (*buf == ' ' || *buf == '\t')
  {
    ++buf;
  }

  /* parse sign if any */
  if (*buf == '-')
  {
    sign = -1;
    ++buf;
  }
  else if (*buf == '+')
  {
    sign = 1;
    ++buf;
  }

  /* parse base */
  if (base == 0)
  {
    if (*buf == '0')
    {
      if (to_lower(*(++buf)) == 'x' && is_xdigit(buf[1]))
      {
        ++buf;
        base = 16;
      }
      else
      {
        base = 8;
      }
    }
    else
    {
      base = 10;
    }
  }
  else if (base == 16 && buf[0] == '0' && to_lower(buf[1]) == 'x')
  {
    str += 2;
  }

  /* parse alpha-numerical string */
  while (is_alnum(*buf))
  {
    if (is_alpha(*buf)) 
    {
      k = to_lower(*buf) - 'a' + 10;
      if (k > base)
      {
        break;
      }
    }
    else
    {
      k = *buf - '0';
    }

    value = value * base + k;
    ++buf;
  }

  if (endptr != NULL)
  {
    *endptr = (char *)buf;
  }

  return sign * value;
}
Beispiel #8
0
 const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; }
Beispiel #9
0
inline bool is_xdigit<unicode::char_t> (unicode::char_t c)
{
    return c.value <= 127 && is_xdigit(static_cast<char>(c.value));
}