/********************************************************************* * * Function : is_hex_sequence * * Description : Checks the first four characters of a string * and decides if they are a valid hex sequence * (like '\x40'). * * Parameters : * 1 : sequence = The string to check * * Returns : Non-zero if it's valid sequence, or * Zero if it isn't. * *********************************************************************/ int pcrs::is_hex_sequence(const char *sequence) { return (sequence[0] == '\\' && sequence[1] == 'x' && is_hex_digit(sequence[2]) && is_hex_digit(sequence[3])); }
Long VG_(strtoll16) ( Char* str, Char** endptr ) { Bool neg = False; Long n = 0, digit = 0; // Skip leading whitespace. while (VG_(isspace)(*str)) str++; // Allow a leading '-' or '+'. if (*str == '-') { str++; neg = True; } else if (*str == '+') { str++; } // Allow leading "0x", but only if there's a hex digit // following it. if (*str == '0' && (*(str+1) == 'x' || *(str+1) == 'X') && is_hex_digit( *(str+2), &digit )) { str += 2; } while (is_hex_digit(*str, &digit)) { n = 16*n + digit; str++; } if (neg) n = -n; if (endptr) *endptr = str; // Record first failing character. return n; }
static inline value unescape_bytea_9x(const char *str) { value v_res; char *res; size_t n_hex_pairs = 0; const char *end = str; /* Length calculation and encoding verification */ while (*end != '\0') { if (isspace(*end)) end++; else if (is_hex_digit(*end)) { end++; if (is_hex_digit(*end)) { end++; n_hex_pairs++; } else return raise_invalid_hex_encoding(); } else return raise_invalid_hex_encoding(); } /* Assumption: string has not changed since length calculation above! */ v_res = caml_alloc_string(n_hex_pairs); res = String_val(v_res); while (str < end) { if (isspace(*str)) str++; else { *res = (char) ((unhexdigit(*str) << 4) | unhexdigit(str[1])); str += 2; res++; } } return v_res; }
ULong VG_(strtoull16) ( const HChar* str, HChar** endptr ) { Bool converted = False; ULong n = 0; Long digit = 0; const HChar* str0 = str; // Skip leading whitespace. while (VG_(isspace)(*str)) str++; // Allow a leading '+'. if (*str == '+') { str++; } // Allow leading "0x", but only if there's a hex digit // following it. if (*str == '0' && (*(str+1) == 'x' || *(str+1) == 'X') && is_hex_digit( *(str+2), &digit )) { str += 2; } while (is_hex_digit(*str, &digit)) { converted = True; // Ok, we've actually converted a digit. n = 16*n + digit; str++; } if (!converted) str = str0; // If nothing converted, endptr points to // the start of the string. if (endptr) *endptr = (HChar *)str; // Record first failing character. return n; }
static bool is_escape_char(char const *& s, unsigned& result) { unsigned d1, d2; if (*s != '\\' || *(s + 1) == 0) { return false; } if (*(s + 1) == 'x' && is_hex_digit(*(s + 2), d1) && is_hex_digit(*(s + 3), d2)) { result = d1*16 + d2; s += 4; return true; } switch (*(s + 1)) { case 'a': result = '\a'; s += 2; return true; case 'b': result = '\b'; s += 2; return true; #if 0 case 'e': result = '\e'; s += 2; return true; #endif case 'f': result = '\f'; s += 2; return true; case 'n': result = '\n'; s += 2; return true; case 'r': result = '\r'; s += 2; return true; case 't': result = '\t'; s += 2; return true; case 'v': result = '\v'; s += 2; return true; default: result = *(s + 1); s += 2; return true; } return false; }
static int get_escape_len(const char *s, int len) { switch (*s) { case 'u': return len < 6 ? JSON_STRING_INCOMPLETE : is_hex_digit(s[1]) && is_hex_digit(s[2]) && is_hex_digit(s[3]) && is_hex_digit(s[4]) ? 5 : JSON_STRING_INVALID; case '"': case '\\': case '/': case 'b': case 'f': case 'n': case 'r': case 't': return len < 2 ? JSON_STRING_INCOMPLETE : 1; default: return JSON_STRING_INVALID; } }
/** Декодирует строку str из процентного представления */ std::string UrlParser::percent_decode(const std::string &str) const { enum State { General, FirstPercentEncodedDigit, SecondPercentEncodedDigit } state = General; char first_char = '\0', second_char = '\0'; std::stringstream result; for (std::string::const_iterator it = str.begin(); it != str.end(); it++) { switch (state) { case General: if (*it != '%') result << *it; else { state = FirstPercentEncodedDigit; first_char = second_char = '\0'; } break; case FirstPercentEncodedDigit: first_char = *it; if (is_hex_digit(first_char)) { state = SecondPercentEncodedDigit; } else { result << "%" << first_char; state = General; } break; case SecondPercentEncodedDigit: second_char = *it; if (is_hex_digit(second_char)) { result << char( (hex_digit_to_int(first_char) << 4) | hex_digit_to_int(second_char)); state = General; } else { result << "%" << first_char << second_char; state = General; } break; } } return result.str(); }
//returns the value of the first two digits //of a hexadecimal string unsigned char get_char_from_hex_str(char *string) { unsigned char hex_num; //error checking, even though it means we're redundantly //calling get_hex_val if (!is_hex_digit(string[0])) { exit(-2); } if (!is_hex_digit(string[1])) { exit(-2); } hex_num = 16 * get_hex_val(string[0]); hex_num += get_hex_val(string[1]); return hex_num; }
static int CheatDatabase_Parse_PartNumber(void) { unsigned char PartNumberChar; unsigned long PartNumber; #ifdef CPCEMU_FRIENDLY if (CheatDatabase_QuotedText_Length!=1) { /* error with part number - quoted text is too long */ CheatDatabase_Warning("Part Number string is too long. Should be 1 char Maximum!\r\n"); } #endif PartNumberChar = CheatDatabase_QuotedText[0]; #ifdef CPCEMU_FRIENDLY if (!(is_hex_digit(PartNumberChar))) { /* part number char is invalid */ CheatDatabase_Warning("Part Number is not a valid hex digit!\r\n"); } #endif /* convert from text to value */ PartNumber = CheatDatabase_GetHexDigit(PartNumberChar); return PartNumber; }
static int CheatDatabase_Parse_Addr(void) { unsigned long Addr; unsigned long i; #ifdef CPCEMU_FRIENDLY if (CheatDatabase_QuotedText_Length!=4) { CheatDatabase_Warning("Addr string is not 4 chars!\r\n"); } #endif #ifdef CPCEMU_FRIENDLY for (i=0; i<CheatDatabase_QuotedText_Length; i++) { if (!is_hex_digit(CheatDatabase_QuotedText[i])) { CheatDatabase_Warning("Addr string contains non hex-digit chars!\r\n"); } } #endif /* addr contains value */ Addr = CheatDatabase_Parse_GetHexNumber(4); return Addr; }
/* check the @line starts with "%lx-%lx" format */ static bool is_vma_range_fmt(char *line) { while (*line && is_hex_digit(*line)) line++; if (*line++ != '-') return false; while (*line && is_hex_digit(*line)) line++; if (*line++ != ' ') return false; return true; }
static unsigned long CheatDatabase_Parse_GetHexNumber(int MaxDigits) { int DigitsGot; unsigned long i; unsigned long HexNumber = 0; /* construct address, and skip any non-hex digit chars */ DigitsGot = 0; for (i=0; i<CheatDatabase_QuotedText_Length; i++) { if (is_hex_digit(CheatDatabase_QuotedText[i])) { HexNumber = HexNumber<<4; HexNumber = HexNumber | CheatDatabase_GetHexDigit(CheatDatabase_QuotedText[i]); DigitsGot++; } if (DigitsGot>=MaxDigits) break; } return HexNumber; }
static int CheatDatabase_Parse_OldByte(void) { unsigned long OldByte; unsigned long i; #ifdef CPCEMU_FRIENDLY if (CheatDatabase_QuotedText_Length!=2) { CheatDatabase_Warning("Old byte string is not 2 chars!\r\n"); } #endif #ifdef CPCEMU_FRIENDLY for (i=0; i<CheatDatabase_QuotedText_Length; i++) { if (!is_hex_digit(CheatDatabase_QuotedText[i])) { CheatDatabase_Warning("Old byte string contains non hex-digit chars!\r\n"); } } #endif /* addr contains value */ OldByte = CheatDatabase_Parse_GetHexNumber(2); return OldByte; }
/* returns a pointer to the start of (?=[^ -])[0-9A-F]{4,5,6}[^0-9A-F], * or null if not found */ static const gchar *find_codepoint (const gchar *str) { guint i, len; /* what we are searching for is ascii; in this case, we don't have to * worry about multibyte characters at all */ len = strlen (str); for (i = 0; i + 3 < len; i++) { if ( ( !(i > 0) || is_blank(str[i-1]) ) && is_hex_digit (str[i+0]) && is_hex_digit (str[i+1]) && is_hex_digit (str[i+2]) && is_hex_digit (str[i+3]) && is_blank_or_hex_or(i+4, is_blank_or_hex_or(i+5, (i+6 < len) || !is_hex_digit (str[i+6]))) ) return str + i; } return NULL; }
int main() { char character; printf("Enter a hexadecimal number: "); scanf("%c", &character); character = is_hex_digit(character); printf("%d", character); printf("\n\n"); return 0; }
void test_is_hex_digit() { Long x; CHECK( is_hex_digit('0', &x) && 0 == x ); CHECK( is_hex_digit('1', &x) && 1 == x ); CHECK( is_hex_digit('9', &x) && 9 == x ); CHECK( is_hex_digit('a', &x) && 10 == x ); CHECK( is_hex_digit('f', &x) && 15 == x ); CHECK( is_hex_digit('A', &x) && 10 == x ); CHECK( is_hex_digit('F', &x) && 15 == x ); }
static bool matching_id(char *idstr, char *list_id) { int i; if (memcmp(idstr, list_id, 3)) return false; for (i = 3; i < 7; i++) { char c = toupper(idstr[i]); if (!is_hex_digit(c) || (list_id[i] != 'X' && c != toupper(list_id[i]))) return false; } return true; }
string xml_html_parser::parse_entity () { string r= s->read (1); if (test (s, "#")) { r << s->read (1); if (test (s, "x") || test (s, "X")) { r << s->read (1); while (s && is_hex_digit (s[0])) r << s->read (1); } else while (s && is_digit (s[0])) r << s->read (1); } else while (s && is_name_char (s[0])) r << s->read (1); if (test (s, ";")) r << s->read (1); string x= expand_entity (r); if (x == r || r == "<" || r == "&") return x; s->write (x); return ""; }
/* Parse a string of hex digits starting at HEX, supply them as the value of register REGNO, skip any whitespace, and return a pointer to the next character. There is a function in monitor.c, monitor_supply_register, which is supposed to do this job. However, there is some rather odd stuff in there (whitespace characters don't terminate numbers, for example) that is incorrect for ROM68k. It's basically impossible to safely tweak monitor_supply_register --- it's used by a zillion other monitors; who knows what behaviors they're depending on. So instead, we'll just use our own function, which can behave exactly the way we want it to. */ static char * rom68k_supply_one_register (int regno, unsigned char *hex) { ULONGEST value; unsigned char regbuf[MAX_REGISTER_SIZE]; value = 0; while (*hex != '\0') if (is_hex_digit (*hex)) value = (value * 16) + hex_digit_value (*hex++); else break; /* Skip any whitespace. */ while (is_whitespace (*hex)) hex++; store_unsigned_integer (regbuf, DEPRECATED_REGISTER_RAW_SIZE (regno), value); supply_register (regno, regbuf); return hex; }
void HexColorEntry::onEntryChange() { std::string text = m_entry.getText(); int r, g, b; // Remove non hex digits while (text.size() > 0 && !is_hex_digit(text[0])) text.erase(0, 1); // Fill with zeros at the end of the text while (text.size() < 6) text.push_back('0'); // Convert text (Base 16) to integer int hex = std::strtol(text.c_str(), NULL, 16); r = (hex & 0xff0000) >> 16; g = (hex & 0xff00) >> 8; b = (hex & 0xff); ColorChange(app::Color::fromRgb(r, g, b)); }
static gunichar json_scanner_get_unichar (JsonScanner *scanner, guint *line_p, guint *position_p) { gunichar uchar; gchar ch; gint i; uchar = 0; for (i = 0; i < 4; i++) { ch = json_scanner_get_char (scanner, line_p, position_p); if (is_hex_digit (ch)) uchar += ((gunichar) to_hex_digit (ch) << ((3 - i) * 4)); else break; } g_assert (g_unichar_validate (uchar) || g_unichar_type (uchar) == G_UNICODE_SURROGATE); return uchar; }
string xml_html_parser::expand_entities (string s) { string r; int i, n= N(s); for (i=0; i<n; ) { if (s[i] == '&' || s[i] == '%') { int start= i++; if (i<n && s[i] == '#') { i++; if (i<n && (s[i] == 'x' || s[i] == 'X')) { i++; while (i<n && is_hex_digit (s[i])) i++; } else while (i<n && is_digit (s[i])) i++; } else while (i<n && is_name_char (s[i])) i++; if (i<n && s[i] == ';') i++; r << expand_entity (s (start, i)); } else r << s[i++]; } if (r == s) return r; return expand_entities (r); }
bool is_number_token(const std::string& token) { bool period = false; bool hex = false; if (token.size() >= 2 and token[0] == '0' and (token[1] == 'x' or token[1] == 'X')) hex = true; for (size_t i = 0; i < token.size(); i++) { auto c = token[i]; if (hex) { if (i < 3) continue; if (is_hex_digit(c)) continue; } if (c == '.' and not period) { period = true; continue; } else if (not is_number(c) and not (c == '_')) { return false; } } return true; }
static void next_token(token_t *tok) { const char *p = tok->p + tok->size; if (tok->kind == TOK_ERROR) return; while (!is_ident_first(*p)) { if (is_space(*p)) { p++; } else if (is_digit(*p)) { tok->kind = TOK_INTEGER; tok->p = p; tok->size = 1; if (p[1] == 'x' || p[1] == 'X') tok->size = 2; while (is_hex_digit(p[tok->size])) tok->size++; return; } else if (p[0] == '.' && p[1] == '.' && p[2] == '.') { tok->kind = TOK_DOTDOTDOT; tok->p = p; tok->size = 3; return; } else if (*p) { tok->kind = *p; tok->p = p; tok->size = 1; return; } else { tok->kind = TOK_END; tok->p = p; tok->size = 0; return; } } tok->kind = TOK_IDENTIFIER; tok->p = p; tok->size = 1; while (is_ident_next(p[tok->size])) tok->size++; switch (*p) { case '_': if (tok->size == 5 && !memcmp(p, "_Bool", 5)) tok->kind = TOK__BOOL; if (tok->size == 7 && !memcmp(p,"__cdecl",7)) tok->kind = TOK_CDECL; if (tok->size == 9 && !memcmp(p,"__stdcall",9))tok->kind = TOK_STDCALL; break; case 'c': if (tok->size == 4 && !memcmp(p, "char", 4)) tok->kind = TOK_CHAR; if (tok->size == 5 && !memcmp(p, "const", 5)) tok->kind = TOK_CONST; break; case 'd': if (tok->size == 6 && !memcmp(p, "double", 6)) tok->kind = TOK_DOUBLE; break; case 'e': if (tok->size == 4 && !memcmp(p, "enum", 4)) tok->kind = TOK_ENUM; break; case 'f': if (tok->size == 5 && !memcmp(p, "float", 5)) tok->kind = TOK_FLOAT; break; case 'i': if (tok->size == 3 && !memcmp(p, "int", 3)) tok->kind = TOK_INT; break; case 'l': if (tok->size == 4 && !memcmp(p, "long", 4)) tok->kind = TOK_LONG; break; case 's': if (tok->size == 5 && !memcmp(p, "short", 5)) tok->kind = TOK_SHORT; if (tok->size == 6 && !memcmp(p, "signed", 6)) tok->kind = TOK_SIGNED; if (tok->size == 6 && !memcmp(p, "struct", 6)) tok->kind = TOK_STRUCT; break; case 'u': if (tok->size == 5 && !memcmp(p, "union", 5)) tok->kind = TOK_UNION; if (tok->size == 8 && !memcmp(p,"unsigned",8)) tok->kind = TOK_UNSIGNED; break; case 'v': if (tok->size == 4 && !memcmp(p, "void", 4)) tok->kind = TOK_VOID; if (tok->size == 8 && !memcmp(p,"volatile",8)) tok->kind = TOK_VOLATILE; break; } }
/* Print a \ escape sequence starting at ESCSTART. Return the number of characters in the escape sequence besides the backslash. If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o is an octal digit; otherwise they are of the form \ooo. */ long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0) { const wchar_t *p = escstart + 1; int esc_value = 0; /* Value of \nnn escape. */ int esc_length; /* Length of \nnn escape. */ if (*p == L'x') { /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */ for (esc_length = 0, ++p; esc_length < 2 && is_hex_digit(*p); ++esc_length, ++p) esc_value = esc_value * 16 + hex_to_bin(*p); if (esc_length == 0) this->fatal_error(_(L"missing hexadecimal number in escape")); this->append_format_output(L"%lc", esc_value); } else if (is_octal_digit(*p)) { /* Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). Allow \ooo if octal_0 && *p != L'0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b. */ for (esc_length = 0, p += octal_0 && *p == L'0'; esc_length < 3 && is_octal_digit(*p); ++esc_length, ++p) esc_value = esc_value * 8 + octal_to_bin(*p); this->append_format_output(L"%c", esc_value); } else if (*p && wcschr(L"\"\\abcfnrtv", *p)) print_esc_char(*p++); else if (*p == L'u' || *p == L'U') { wchar_t esc_char = *p; p++; uint32_t uni_value = 0; for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++) { if (! is_hex_digit(*p)) { /* Escape sequence must be done. Complain if we didn't get anything */ if (esc_length == 0) { this->fatal_error(_(L"Missing hexadecimal number in Unicode escape")); } break; } uni_value = uni_value * 16 + hex_to_bin(*p); p++; } /* PCA GNU printf respects the limitations described in ISO N717, about which universal characters "shall not" be specified. I believe this limitation is for the benefit of compilers; I see no reason to impose it in builtin_printf. If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code points, so just use that. If not defined, use the %lc printf conversion; this probably won't do anything good if your wide character set is not Unicode, but such platforms are exceedingly rare. */ if (uni_value > 0x10FFFF) { this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char, (esc_char == L'u' ? 4 : 8), uni_value); } else { #if defined(__STDC_ISO_10646__) this->append_output(uni_value); #else this->append_format_output(L"%lc", uni_value); } #endif } else { this->append_format_output(L"%lc", L'\\'); if (*p) { this->append_format_output(L"%lc", *p); p++; } } return p - escstart - 1; }
static void json_scanner_get_token_ll (JsonScanner *scanner, GTokenType *token_p, GTokenValue *value_p, guint *line_p, guint *position_p) { JsonScannerConfig *config; GTokenType token; gboolean in_comment_multi; gboolean in_comment_single; gboolean in_string_sq; gboolean in_string_dq; GString *gstring; GTokenValue value; guchar ch; config = scanner->config; (*value_p).v_int64 = 0; if ((scanner->text >= scanner->text_end && scanner->input_fd < 0) || scanner->token == G_TOKEN_EOF) { *token_p = G_TOKEN_EOF; return; } in_comment_multi = FALSE; in_comment_single = FALSE; in_string_sq = FALSE; in_string_dq = FALSE; gstring = NULL; do /* while (ch != 0) */ { gboolean dotted_float = FALSE; ch = json_scanner_get_char (scanner, line_p, position_p); value.v_int64 = 0; token = G_TOKEN_NONE; /* this is *evil*, but needed ;( * we first check for identifier first character, because it * might interfere with other key chars like slashes or numbers */ if (config->scan_identifier && ch && strchr (config->cset_identifier_first, ch)) goto identifier_precedence; switch (ch) { case 0: token = G_TOKEN_EOF; (*position_p)++; /* ch = 0; */ break; case '/': if (!config->scan_comment_multi || json_scanner_peek_next_char (scanner) != '*') goto default_case; json_scanner_get_char (scanner, line_p, position_p); token = G_TOKEN_COMMENT_MULTI; in_comment_multi = TRUE; gstring = g_string_new (NULL); while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '*' && json_scanner_peek_next_char (scanner) == '/') { json_scanner_get_char (scanner, line_p, position_p); in_comment_multi = FALSE; break; } else gstring = g_string_append_c (gstring, ch); } ch = 0; break; case '\'': if (!config->scan_string_sq) goto default_case; token = G_TOKEN_STRING; in_string_sq = TRUE; gstring = g_string_new (NULL); while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '\'') { in_string_sq = FALSE; break; } else gstring = g_string_append_c (gstring, ch); } ch = 0; break; case '"': if (!config->scan_string_dq) goto default_case; token = G_TOKEN_STRING; in_string_dq = TRUE; gstring = g_string_new (NULL); while ((ch = json_scanner_get_char (scanner, line_p, position_p)) != 0) { if (ch == '"') { in_string_dq = FALSE; break; } else { if (ch == '\\') { ch = json_scanner_get_char (scanner, line_p, position_p); switch (ch) { guint i; guint fchar; case 0: break; case '\\': gstring = g_string_append_c (gstring, '\\'); break; case 'n': gstring = g_string_append_c (gstring, '\n'); break; case 't': gstring = g_string_append_c (gstring, '\t'); break; case 'r': gstring = g_string_append_c (gstring, '\r'); break; case 'b': gstring = g_string_append_c (gstring, '\b'); break; case 'f': gstring = g_string_append_c (gstring, '\f'); break; case 'u': fchar = json_scanner_peek_next_char (scanner); if (is_hex_digit (fchar)) { gunichar ucs; ucs = json_scanner_get_unichar (scanner, line_p, position_p); if (g_unichar_type (ucs) == G_UNICODE_SURROGATE) { /* read next surrogate */ if ('\\' == json_scanner_get_char (scanner, line_p, position_p) && 'u' == json_scanner_get_char (scanner, line_p, position_p)) { gunichar ucs_lo = json_scanner_get_unichar (scanner, line_p, position_p); g_assert (g_unichar_type (ucs_lo) == G_UNICODE_SURROGATE); ucs = (((ucs & 0x3ff) << 10) | (ucs_lo & 0x3ff)) + 0x10000; } } g_assert (g_unichar_validate (ucs)); gstring = g_string_append_unichar (gstring, ucs); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': i = ch - '0'; fchar = json_scanner_peek_next_char (scanner); if (fchar >= '0' && fchar <= '7') { ch = json_scanner_get_char (scanner, line_p, position_p); i = i * 8 + ch - '0'; fchar = json_scanner_peek_next_char (scanner); if (fchar >= '0' && fchar <= '7') { ch = json_scanner_get_char (scanner, line_p, position_p); i = i * 8 + ch - '0'; } } gstring = g_string_append_c (gstring, i); break; default: gstring = g_string_append_c (gstring, ch); break; } } else gstring = g_string_append_c (gstring, ch); } } ch = 0; break; case '.': if (!config->scan_float) goto default_case; token = G_TOKEN_FLOAT; dotted_float = TRUE; ch = json_scanner_get_char (scanner, line_p, position_p); goto number_parsing; case '$': if (!config->scan_hex_dollar) goto default_case; token = G_TOKEN_HEX; ch = json_scanner_get_char (scanner, line_p, position_p); goto number_parsing; case '0': if (config->scan_octal) token = G_TOKEN_OCTAL; else token = G_TOKEN_INT; ch = json_scanner_peek_next_char (scanner); if (config->scan_hex && (ch == 'x' || ch == 'X')) { token = G_TOKEN_HEX; json_scanner_get_char (scanner, line_p, position_p); ch = json_scanner_get_char (scanner, line_p, position_p); if (ch == 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_UNEXP_EOF; (*position_p)++; break; } if (json_scanner_char_2_num (ch, 16) < 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_DIGIT_RADIX; ch = 0; break; } } else if (config->scan_binary && (ch == 'b' || ch == 'B')) { token = G_TOKEN_BINARY; json_scanner_get_char (scanner, line_p, position_p); ch = json_scanner_get_char (scanner, line_p, position_p); if (ch == 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_UNEXP_EOF; (*position_p)++; break; } if (json_scanner_char_2_num (ch, 10) < 0) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; ch = 0; break; } } else ch = '0'; /* fall through */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': number_parsing: { gboolean in_number = TRUE; gchar *endptr; if (token == G_TOKEN_NONE) token = G_TOKEN_INT; gstring = g_string_new (dotted_float ? "0." : ""); gstring = g_string_append_c (gstring, ch); do /* while (in_number) */ { gboolean is_E; is_E = token == G_TOKEN_FLOAT && (ch == 'e' || ch == 'E'); ch = json_scanner_peek_next_char (scanner); if (json_scanner_char_2_num (ch, 36) >= 0 || (config->scan_float && ch == '.') || (is_E && (ch == '+' || ch == '-'))) { ch = json_scanner_get_char (scanner, line_p, position_p); switch (ch) { case '.': if (token != G_TOKEN_INT && token != G_TOKEN_OCTAL) { value.v_error = token == G_TOKEN_FLOAT ? G_ERR_FLOAT_MALFORMED : G_ERR_FLOAT_RADIX; token = G_TOKEN_ERROR; in_number = FALSE; } else { token = G_TOKEN_FLOAT; gstring = g_string_append_c (gstring, ch); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': gstring = g_string_append_c (gstring, ch); break; case '-': case '+': if (token != G_TOKEN_FLOAT) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; in_number = FALSE; } else gstring = g_string_append_c (gstring, ch); break; case 'e': case 'E': if ((token != G_TOKEN_HEX && !config->scan_float) || (token != G_TOKEN_HEX && token != G_TOKEN_OCTAL && token != G_TOKEN_FLOAT && token != G_TOKEN_INT)) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; in_number = FALSE; } else { if (token != G_TOKEN_HEX) token = G_TOKEN_FLOAT; gstring = g_string_append_c (gstring, ch); } break; default: if (token != G_TOKEN_HEX) { token = G_TOKEN_ERROR; value.v_error = G_ERR_NON_DIGIT_IN_CONST; in_number = FALSE; } else gstring = g_string_append_c (gstring, ch); break; } } else in_number = FALSE; } while (in_number); endptr = NULL; if (token == G_TOKEN_FLOAT) value.v_float = g_strtod (gstring->str, &endptr); else { guint64 ui64 = 0; switch (token) { case G_TOKEN_BINARY: ui64 = g_ascii_strtoull (gstring->str, &endptr, 2); break; case G_TOKEN_OCTAL: ui64 = g_ascii_strtoull (gstring->str, &endptr, 8); break; case G_TOKEN_INT: ui64 = g_ascii_strtoull (gstring->str, &endptr, 10); break; case G_TOKEN_HEX: ui64 = g_ascii_strtoull (gstring->str, &endptr, 16); break; default: ; } if (scanner->config->store_int64) value.v_int64 = ui64; else value.v_int = ui64; } if (endptr && *endptr) { token = G_TOKEN_ERROR; if (*endptr == 'e' || *endptr == 'E') value.v_error = G_ERR_NON_DIGIT_IN_CONST; else value.v_error = G_ERR_DIGIT_RADIX; } g_string_free (gstring, TRUE); gstring = NULL; ch = 0; } /* number_parsing:... */ break; default: default_case: { if (config->cpair_comment_single && ch == config->cpair_comment_single[0]) { token = G_TOKEN_COMMENT_SINGLE; in_comment_single = TRUE; gstring = g_string_new (NULL); ch = json_scanner_get_char (scanner, line_p, position_p); while (ch != 0) { if (ch == config->cpair_comment_single[1]) { in_comment_single = FALSE; ch = 0; break; } gstring = g_string_append_c (gstring, ch); ch = json_scanner_get_char (scanner, line_p, position_p); } /* ignore a missing newline at EOF for single line comments */ if (in_comment_single && config->cpair_comment_single[1] == '\n') in_comment_single = FALSE; } else if (config->scan_identifier && ch && strchr (config->cset_identifier_first, ch)) { identifier_precedence: if (config->cset_identifier_nth && ch && strchr (config->cset_identifier_nth, json_scanner_peek_next_char (scanner))) { token = G_TOKEN_IDENTIFIER; gstring = g_string_new (NULL); gstring = g_string_append_c (gstring, ch); do { ch = json_scanner_get_char (scanner, line_p, position_p); gstring = g_string_append_c (gstring, ch); ch = json_scanner_peek_next_char (scanner); } while (ch && strchr (config->cset_identifier_nth, ch)); ch = 0; } else if (config->scan_identifier_1char) { token = G_TOKEN_IDENTIFIER; value.v_identifier = g_new0 (gchar, 2); value.v_identifier[0] = ch; ch = 0; } } if (ch) { if (config->char_2_token) token = ch; else { token = G_TOKEN_CHAR; value.v_char = ch; } ch = 0; } } /* default_case:... */ break; } g_assert (ch == 0 && token != G_TOKEN_NONE); /* paranoid */ } while (ch != 0); if (in_comment_multi || in_comment_single || in_string_sq || in_string_dq) { token = G_TOKEN_ERROR; if (gstring) { g_string_free (gstring, TRUE); gstring = NULL; } (*position_p)++; if (in_comment_multi || in_comment_single) value.v_error = G_ERR_UNEXP_EOF_IN_COMMENT; else /* (in_string_sq || in_string_dq) */ value.v_error = G_ERR_UNEXP_EOF_IN_STRING; } if (gstring) { value.v_string = g_string_free (gstring, FALSE); gstring = NULL; } if (token == G_TOKEN_IDENTIFIER) { if (config->scan_symbols) { JsonScannerKey *key; guint scope_id; scope_id = scanner->scope_id; key = json_scanner_lookup_internal (scanner, scope_id, value.v_identifier); if (!key && scope_id && scanner->config->scope_0_fallback) key = json_scanner_lookup_internal (scanner, 0, value.v_identifier); if (key) { g_free (value.v_identifier); token = G_TOKEN_SYMBOL; value.v_symbol = key->value; } } if (token == G_TOKEN_IDENTIFIER && config->scan_identifier_NULL && strlen (value.v_identifier) == 4) { gchar *null_upper = "NULL"; gchar *null_lower = "null"; if (scanner->config->case_sensitive) { if (value.v_identifier[0] == null_upper[0] && value.v_identifier[1] == null_upper[1] && value.v_identifier[2] == null_upper[2] && value.v_identifier[3] == null_upper[3]) token = G_TOKEN_IDENTIFIER_NULL; } else { if ((value.v_identifier[0] == null_upper[0] || value.v_identifier[0] == null_lower[0]) && (value.v_identifier[1] == null_upper[1] || value.v_identifier[1] == null_lower[1]) && (value.v_identifier[2] == null_upper[2] || value.v_identifier[2] == null_lower[2]) && (value.v_identifier[3] == null_upper[3] || value.v_identifier[3] == null_lower[3])) token = G_TOKEN_IDENTIFIER_NULL; } } } *token_p = token; *value_p = value; }
static bool is_escape_char(char const *& s, unsigned& result) { unsigned d1, d2, d3; if (*s != '\\' || *(s + 1) == 0) { return false; } if (*(s + 1) == 'x' && is_hex_digit(*(s + 2), d1) && is_hex_digit(*(s + 3), d2)) { result = d1*16 + d2; s += 4; return true; } /* C-standard octal escapes: either 1, 2, or 3 octal digits, * stopping either at 3 digits or at the first non-digit character. */ /* 1 octal digit */ if (is_octal_digit(*(s + 1), d1) && !is_octal_digit(*(s + 2), d2)) { result = d1; s += 2; return true; } /* 2 octal digits */ if (is_octal_digit(*(s + 1), d1) && is_octal_digit(*(s + 2), d2) && !is_octal_digit(*(s + 3), d3)) { result = d1 * 8 + d2; s += 3; return true; } /* 3 octal digits */ if (is_octal_digit(*(s + 1), d1) && is_octal_digit(*(s + 2), d2) && is_octal_digit(*(s + 3), d3)) { result = d1*64 + d2*8 + d3; s += 4; return true; } switch (*(s + 1)) { case 'a': result = '\a'; s += 2; return true; case 'b': result = '\b'; s += 2; return true; #if 0 case 'e': result = '\e'; s += 2; return true; #endif case 'f': result = '\f'; s += 2; return true; case 'n': result = '\n'; s += 2; return true; case 'r': result = '\r'; s += 2; return true; case 't': result = '\t'; s += 2; return true; case 'v': result = '\v'; s += 2; return true; default: result = *(s + 1); s += 2; return true; } return false; }
/* Split a line into arguments, where every argument can be in the * following programming-language REPL-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * The number of arguments is stored into *argc, and an array * of sds is returned. The caller should sdsfree() all the returned * strings and finally zfree() the array itself. * * Note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. */ sds *sdssplitargs(char *line, int *argc) { char *p = line; char *current = NULL; char **vector = NULL; *argc = 0; while(1) { /* skip blanks */ while(*p && isspace(*p)) p++; if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ int insq=0; /* set to 1 if we are in 'single quotes' */ int done=0; if (current == NULL) current = sdsempty(); while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { unsigned char byte; byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); current = sdscatlen(current,(char*)&byte,1); p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current = sdscatlen(current,&c,1); } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current = sdscatlen(current,"'",1); } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { current = sdscatlen(current,p,1); } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: current = sdscatlen(current,p,1); break; } } if (*p) p++; } /* add the token to the vector */ vector = zrealloc(vector,((*argc)+1)*sizeof(char*)); vector[*argc] = current; (*argc)++; current = NULL; } else { return vector; } } err: while((*argc)--) sdsfree(vector[*argc]); zfree(vector); if (current) sdsfree(current); return NULL; }
int regex_parser::process_escape(const char *re, int ptr, int_set *chars){ if (ptr==strlen(re)){ return (++ptr); //fatal("regex_parser:: process_escape: \\ in last position."); } char c=re[ptr]; int next; if(is_x(c)){ if(ptr>strlen(re)-3) fatal("regex_parser::process_escape: invalid hex escape sequence."); else if (!is_hex_digit(re[ptr+1]) || !is_hex_digit(re[ptr+2])) fatal("regex_parser::process_escape: invalid hex escape sequence."); else{ char tmp[5]; tmp[0]='0';tmp[1]=c;tmp[2]=re[ptr+1];tmp[3]=re[ptr+2]; tmp[4]='\0'; sscanf(tmp,"0x%x", &next); chars->insert(next); ptr=ptr+3; } }else if (is_oct_digit(c)){ if(ptr>strlen(re)-3) {next=escaped(c);ptr++;chars->insert(next);} //normal escape sequence else if (!is_oct_digit(re[ptr+1]) || !is_oct_digit(re[ptr+2])) {next=escaped(c);ptr++;chars->insert(next);} //normal escape sequence else{ //really an octal sequence! char tmp[5]; tmp[0]='0';tmp[1]=c;tmp[2]=re[ptr+1];tmp[3]=re[ptr+2]; tmp[4]='\0'; sscanf(tmp,"0%o", &next); chars->insert(next); ptr=ptr+3; } }else if(c=='s'){ chars->insert('\t'); chars->insert('\n'); chars->insert('\r'); chars->insert('\x0C'); chars->insert('\x20'); ptr++; }else if(c=='S'){ chars->insert('\t'); chars->insert('\n'); chars->insert('\r'); chars->insert('\x0C'); chars->insert('\x20'); chars->negate(); ptr++; }else if(c=='d'){ chars->insert('0');chars->insert('1');chars->insert('2'); chars->insert('3');chars->insert('4');chars->insert('5'); chars->insert('6');chars->insert('7');chars->insert('8'); chars->insert('9'); ptr++; }else if(c=='D'){ chars->insert('0');chars->insert('1');chars->insert('2'); chars->insert('3');chars->insert('4');chars->insert('5'); chars->insert('6');chars->insert('7');chars->insert('8'); chars->insert('9'); chars->negate(); ptr++; }else if(c=='w'){ chars->insert('_'); chars->insert('0');chars->insert('1');chars->insert('2'); chars->insert('3');chars->insert('4');chars->insert('5'); chars->insert('6');chars->insert('7');chars->insert('8'); chars->insert('9'); chars->insert('a');chars->insert('b');chars->insert('c'); chars->insert('d');chars->insert('e');chars->insert('f'); chars->insert('g');chars->insert('h');chars->insert('i'); chars->insert('j');chars->insert('k');chars->insert('l'); chars->insert('m');chars->insert('n');chars->insert('o'); chars->insert('p');chars->insert('q');chars->insert('r'); chars->insert('s');chars->insert('t');chars->insert('u'); chars->insert('v');chars->insert('w');chars->insert('x'); chars->insert('y');chars->insert('z'); chars->insert('A');chars->insert('B');chars->insert('C'); chars->insert('D');chars->insert('E');chars->insert('F'); chars->insert('G');chars->insert('H');chars->insert('I'); chars->insert('J');chars->insert('K');chars->insert('L'); chars->insert('M');chars->insert('N');chars->insert('O'); chars->insert('P');chars->insert('Q');chars->insert('R'); chars->insert('S');chars->insert('T');chars->insert('U'); chars->insert('V');chars->insert('W');chars->insert('X'); chars->insert('Y');chars->insert('Z'); ptr++; }else if(c=='W'){ chars->insert('_'); chars->insert('0');chars->insert('1');chars->insert('2'); chars->insert('3');chars->insert('4');chars->insert('5'); chars->insert('6');chars->insert('7');chars->insert('8'); chars->insert('9'); chars->insert('a');chars->insert('b');chars->insert('c'); chars->insert('d');chars->insert('e');chars->insert('f'); chars->insert('g');chars->insert('h');chars->insert('i'); chars->insert('j');chars->insert('k');chars->insert('l'); chars->insert('m');chars->insert('n');chars->insert('o'); chars->insert('p');chars->insert('q');chars->insert('r'); chars->insert('s');chars->insert('t');chars->insert('u'); chars->insert('v');chars->insert('w');chars->insert('x'); chars->insert('y');chars->insert('z'); chars->insert('A');chars->insert('B');chars->insert('C'); chars->insert('D');chars->insert('E');chars->insert('F'); chars->insert('G');chars->insert('H');chars->insert('I'); chars->insert('J');chars->insert('K');chars->insert('L'); chars->insert('M');chars->insert('N');chars->insert('O'); chars->insert('P');chars->insert('Q');chars->insert('R'); chars->insert('S');chars->insert('T');chars->insert('U'); chars->insert('V');chars->insert('W');chars->insert('X'); chars->insert('Y');chars->insert('Z'); chars->negate(); ptr++; }else{ next=escaped(c); chars->insert(next); ptr++; } return ptr; }
dstr *dstr_split_args(const char *line, int *argc) { const char *p = line; dstr current = NULL; dstr *argv = NULL; *argc = 0; for (;;) { while (*p && isspace(*p)) ++p; if (*p) { int inq = 0; /* 1 if in quotes */ int insq = 0; /* 1 if in single quotes */ int done = 0; if (current == NULL) current = dstr_new_len("", 0); while (!done) { /* FIXME */ if (inq) { if (*p == '\\' && *(p + 1) == 'x' && is_hex_digit(*(p + 2)) && is_hex_digit(*(p + 3))) { unsigned char byte = 16 * hex_digit_to_int(*(p + 2)) + hex_digit_to_int(*(p + 3)); p += 3; current = dstr_cat_len(current, (char *)&byte, 1); } else if (*p == '\\' && *(p + 1)) { char c; ++p; switch (*p) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; default: c = *p; break; } current = dstr_cat_len(current, &c, 1); } else if (*p == '"') { /* closing quote must be followed by a space or not at all */ if (*(p + 1) && !isspace(*(p + 1))) goto err; done = 1; /* unterminated quotes */ } else if (*p == '\0') goto err; else current = dstr_cat_len(current, p, 1); } else if (insq) { if (*p == '\\' && *(p + 1) == '\'') { ++p; current = dstr_cat_len(current, "'", 1); } else if (*p == '\'') { /* closing quote must be followed by a space or not at all */ if (*(p + 1) && !isspace(*(p + 1))) goto err; done = 1; /* unterminated quotes */ } else if (*p == '\0') goto err; else current = dstr_cat_len(current, p, 1); } else switch (*p) { case ' ': case '\0': case '\n': case '\r': case '\t': done = 1; break; case '"': inq = 1; break; case '\'': insq = 1; break; default: current = dstr_cat_len(current, p, 1); break; } if (*p) ++p; } if (RESIZE(argv, (*argc + 1) * sizeof (char *)) == NULL) goto err; argv[*argc] = current; ++*argc; current = NULL; } else return argv; } err: { int i; for (i = 0; i < *argc; ++i) dstr_free(argv[i]); FREE(argv); if (current) dstr_free(current); return NULL; } }