// between pointer, check string is number - need to be changed more functions static char is_digit_string(char *f, char *t) { if(f == t) { if(is_digit_char(*f)) return 1; else return 0; } int is_hex = 0; int i = 0; // 0x, 0X while(f != t) { if(i == 1 && *(f-1) == '0' && (*f == 'x' || *f == 'X')) { is_hex = 1; } // none hex else if(!is_hex && !is_digit_char(*f)) { return 0; } // hex else if(is_hex && !is_hex_char(*f)) { return 0; } f++; i++; } // need to be added function ---------------- // 23e // 23e+1 return 1; }
static size_t js_scan_digits(jsparser_t *p) { size_t start = p->pos; while (1) { js_ensure_buf(p, 1); if (!is_digit_char(js(p)[0])) break; p->pos++; } return p->pos - start; }
int uri_parse(const char *uri_s, struct uri *uri) { const char *p, *q; uri_init(uri); /* Scheme, section 3.1. */ p = uri_s; if (!is_alpha_char(*p)) goto fail; q = p; while (is_alpha_char(*q) || is_digit_char(*q) || *q == '+' || *q == '-' || *q == '.') q++; if (*q != ':') goto fail; uri->scheme = mkstr(p, q); lowercase(uri->scheme); /* Authority, section 3.2. */ p = q + 1; if (*p == '/' && *(p + 1) == '/') { char *authority = NULL; p += 2; q = p; while (!(*q == '/' || *q == '?' || *q == '#' || *q == '\0')) q++; authority = mkstr(p, q); if (uri_parse_authority(uri, authority)) { free(authority); goto fail; } free(authority); p = q; } q = strchr(p, '\0'); uri->path = mkstr(p, q); return 0; fail: uri_free(uri); return -EINVAL; }
/* Parse a URI string into a struct URI. Any parts of the URI that are absent will become NULL entries in the structure, except for the port which will be -1. Returns NULL on error. See RFC 3986, section 3 for syntax. */ struct uri *uri_parse(struct uri *uri, const char *uri_s) { const char *p, *q; uri_init(uri); /* Scheme, section 3.1. */ p = uri_s; if (!is_alpha_char(*p)) goto fail; for (q = p; is_alpha_char(*q) || is_digit_char(*q) || *q == '+' || *q == '-' || *q == '.'; q++) ; if (*q != ':') goto fail; uri->scheme = mkstr(p, q); /* "An implementation should accept uppercase letters as equivalent to lowercase in scheme names (e.g., allow "HTTP" as well as "http") for the sake of robustness..." */ lowercase(uri->scheme); /* Authority, section 3.2. */ p = q + 1; if (*p == '/' && *(p + 1) == '/') { char *authority = NULL; p += 2; for (q = p; !(*q == '/' || *q == '?' || *q == '#' || *q == '\0'); q++) ; authority = mkstr(p, q); if (uri_parse_authority(uri, authority) == NULL) { free(authority); goto fail; } free(authority); p = q; } if (uri->port == -1) uri->port = scheme_default_port(uri->scheme); /* Path, section 3.3. We include the query and fragment in the path. The path is also not percent-decoded because we just pass it on to the origin server. */ q = strchr(p, '\0'); uri->path = mkstr(p, q); return uri; fail: uri_free(uri); return NULL; }
static Symbols tokenize(const QByteArray &input, int lineNum = 1, TokenizeMode mode = TokenizeCpp) { Symbols symbols; const char *begin = input; const char *data = begin; while (*data) { if (mode == TokenizeCpp) { int column = 0; const char *lexem = data; int state = 0; Token token = NOTOKEN; for (;;) { if (static_cast<signed char>(*data) < 0) { ++data; continue; } int nextindex = keywords[state].next; int next = 0; if (*data == keywords[state].defchar) next = keywords[state].defnext; else if (!state || nextindex) next = keyword_trans[nextindex][(int)*data]; if (!next) break; state = next; token = keywords[state].token; ++data; } // suboptimal, is_ident_char should use a table if (keywords[state].ident && is_ident_char(*data)) token = keywords[state].ident; if (token == NOTOKEN) { // an error really ++data; continue; } ++column; if (token > SPECIAL_TREATMENT_MARK) { switch (token) { case QUOTE: data = skipQuote(data); token = STRING_LITERAL; // concatenate multi-line strings for easier // STRING_LITERAAL handling in moc if (!Preprocessor::preprocessOnly && !symbols.isEmpty() && symbols.last().token == STRING_LITERAL) { QByteArray newString = symbols.last().unquotedLexem(); newString += input.mid(lexem - begin + 1, data - lexem - 2); newString.prepend('\"'); newString.append('\"'); symbols.last() = Symbol(symbols.last().lineNum, STRING_LITERAL, newString); continue; } break; case SINGLEQUOTE: while (*data && (*data != '\'' || (*(data-1)=='\\' && *(data-2)!='\\'))) ++data; if (*data) ++data; token = CHARACTER_LITERAL; break; case LANGLE_SCOPE: // split <:: into two tokens, < and :: token = LANGLE; data -= 2; break; case DIGIT: while (is_digit_char(*data)) ++data; if (!*data || *data != '.') { token = INTEGER_LITERAL; if (data - lexem == 1 && (*data == 'x' || *data == 'X') && *lexem == '0') { ++data; while (is_hex_char(*data)) ++data; } break; } token = FLOATING_LITERAL; ++data; // fall through case FLOATING_LITERAL: while (is_digit_char(*data)) ++data; if (*data == '+' || *data == '-') ++data; if (*data == 'e' || *data == 'E') { ++data; while (is_digit_char(*data)) ++data; } if (*data == 'f' || *data == 'F' || *data == 'l' || *data == 'L') ++data; break; case HASH: if (column == 1) { mode = PreparePreprocessorStatement; while (*data && (*data == ' ' || *data == '\t')) ++data; if (is_ident_char(*data)) mode = TokenizePreprocessorStatement; continue; } break; case NEWLINE: ++lineNum; continue; case BACKSLASH: { const char *rewind = data; while (*data && (*data == ' ' || *data == '\t')) ++data; if (*data && *data == '\n') { ++data; continue; } data = rewind; } break; case CHARACTER: while (is_ident_char(*data)) ++data; token = IDENTIFIER; break; case C_COMMENT: if (*data) { if (*data == '\n') ++lineNum; ++data; if (*data) { if (*data == '\n') ++lineNum; ++data; } } while (*data && (*(data-1) != '/' || *(data-2) != '*')) { if (*data == '\n') ++lineNum; ++data; } token = WHITESPACE; // one comment, one whitespace // fall through; case WHITESPACE: if (column == 1) column = 0; while (*data && (*data == ' ' || *data == '\t')) ++data; if (Preprocessor::preprocessOnly) // tokenize whitespace break; continue; case CPP_COMMENT: while (*data && *data != '\n') ++data; continue; // ignore safely, the newline is a separator default: continue; //ignore } } #ifdef USE_LEXEM_STORE if (!Preprocessor::preprocessOnly && token != IDENTIFIER && token != STRING_LITERAL && token != FLOATING_LITERAL && token != INTEGER_LITERAL) symbols += Symbol(lineNum, token); else #endif symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); } else { // Preprocessor const char *lexem = data; int state = 0; Token token = NOTOKEN; if (mode == TokenizePreprocessorStatement) { state = pp_keyword_trans[0][(int)'#']; mode = TokenizePreprocessor; } for (;;) { if (static_cast<signed char>(*data) < 0) { ++data; continue; } int nextindex = pp_keywords[state].next; int next = 0; if (*data == pp_keywords[state].defchar) next = pp_keywords[state].defnext; else if (!state || nextindex) next = pp_keyword_trans[nextindex][(int)*data]; if (!next) break; state = next; token = pp_keywords[state].token; ++data; } // suboptimal, is_ident_char should use a table if (pp_keywords[state].ident && is_ident_char(*data)) token = pp_keywords[state].ident; switch (token) { case NOTOKEN: ++data; break; case PP_IFDEF: symbols += Symbol(lineNum, PP_IF); symbols += Symbol(lineNum, PP_DEFINED); continue; case PP_IFNDEF: symbols += Symbol(lineNum, PP_IF); symbols += Symbol(lineNum, PP_NOT); symbols += Symbol(lineNum, PP_DEFINED); continue; case PP_INCLUDE: mode = TokenizeInclude; break; case PP_QUOTE: data = skipQuote(data); token = PP_STRING_LITERAL; break; case PP_SINGLEQUOTE: while (*data && (*data != '\'' || (*(data-1)=='\\' && *(data-2)!='\\'))) ++data; if (*data) ++data; token = PP_CHARACTER_LITERAL; break; case PP_DIGIT: while (is_digit_char(*data)) ++data; if (!*data || *data != '.') { token = PP_INTEGER_LITERAL; if (data - lexem == 1 && (*data == 'x' || *data == 'X') && *lexem == '0') { ++data; while (is_hex_char(*data)) ++data; } break; } token = PP_FLOATING_LITERAL; ++data; // fall through case PP_FLOATING_LITERAL: while (is_digit_char(*data)) ++data; if (*data == '+' || *data == '-') ++data; if (*data == 'e' || *data == 'E') { ++data; while (is_digit_char(*data)) ++data; } if (*data == 'f' || *data == 'F' || *data == 'l' || *data == 'L') ++data; break; case PP_CHARACTER: if (mode == PreparePreprocessorStatement) { // rewind entire token to begin data = lexem; mode = TokenizePreprocessorStatement; continue; } while (is_ident_char(*data)) ++data; token = PP_IDENTIFIER; break; case PP_C_COMMENT: if (*data) { if (*data == '\n') ++lineNum; ++data; if (*data) { if (*data == '\n') ++lineNum; ++data; } } while (*data && (*(data-1) != '/' || *(data-2) != '*')) { if (*data == '\n') ++lineNum; ++data; } token = PP_WHITESPACE; // one comment, one whitespace // fall through; case PP_WHITESPACE: while (*data && (*data == ' ' || *data == '\t')) ++data; continue; // the preprocessor needs no whitespace case PP_CPP_COMMENT: while (*data && *data != '\n') ++data; continue; // ignore safely, the newline is a separator case PP_NEWLINE: ++lineNum; mode = TokenizeCpp; break; case PP_BACKSLASH: { const char *rewind = data; while (*data && (*data == ' ' || *data == '\t')) ++data; if (*data && *data == '\n') { ++data; continue; } data = rewind; } break; case PP_LANGLE: if (mode != TokenizeInclude) break; token = PP_STRING_LITERAL; while (*data && *data != '\n' && *(data-1) != '>') ++data; break; default: break; } if (mode == PreparePreprocessorStatement) continue; #ifdef USE_LEXEM_STORE if (token != PP_IDENTIFIER && token != PP_STRING_LITERAL && token != PP_FLOATING_LITERAL && token != PP_INTEGER_LITERAL) symbols += Symbol(lineNum, token); else #endif symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); } } symbols += Symbol(); // eof symbol return symbols; }
static const char *http_read_credentials(const char *s, struct http_credentials *credentials) { const char *p; char *scheme; credentials->scheme = AUTH_UNKNOWN; s = read_token(s, &scheme); if (s == NULL) return NULL; if (str_equal_i(scheme, "Basic")) { http_credentials_init_basic(credentials); } else if (str_equal_i(scheme, "Digest")) { http_credentials_init_digest(credentials); } else { free(scheme); return NULL; } free(scheme); while (is_space_char(*s)) s++; if (credentials->scheme == AUTH_BASIC) { p = s; /* Read base64. */ while (is_alpha_char(*p) || is_digit_char(*p) || *p == '+' || *p == '/' || *p == '=') p++; credentials->u.basic = mkstr(s, p); while (is_space_char(*p)) p++; s = p; } else if (credentials->scheme == AUTH_DIGEST) { char *name, *value; while (*s != '\0') { p = read_token(s, &name); if (p == NULL) goto bail; while (is_space_char(*p)) p++; /* It's not legal to combine multiple Authorization or Proxy-Authorization values. The productions are "Authorization" ":" credentials (section 14.8) "Proxy-Authorization" ":" credentials (section 14.34) Contrast this with WWW-Authenticate and Proxy-Authenticate and their handling in http_read_challenge. */ if (*p != '=') goto bail; p++; while (is_space_char(*p)) p++; p = read_token_or_quoted_string(p, &value); if (p == NULL) { free(name); goto bail; } if (str_equal_i(name, "username")) { if (credentials->u.digest.username != NULL) goto bail; credentials->u.digest.username = Strdup(value); } else if (str_equal_i(name, "realm")) { if (credentials->u.digest.realm != NULL) goto bail; credentials->u.digest.realm = Strdup(value); } else if (str_equal_i(name, "nonce")) { if (credentials->u.digest.nonce != NULL) goto bail; credentials->u.digest.nonce = Strdup(value); } else if (str_equal_i(name, "uri")) { if (credentials->u.digest.uri != NULL) goto bail; credentials->u.digest.uri = Strdup(value); } else if (str_equal_i(name, "response")) { if (credentials->u.digest.response != NULL) goto bail; credentials->u.digest.response = Strdup(value); } else if (str_equal_i(name, "algorithm")) { if (str_equal_i(value, "MD5")) credentials->u.digest.algorithm = ALGORITHM_MD5; else credentials->u.digest.algorithm = ALGORITHM_MD5; } else if (str_equal_i(name, "qop")) { if (str_equal_i(value, "auth")) credentials->u.digest.qop = QOP_AUTH; else if (str_equal_i(value, "auth-int")) credentials->u.digest.qop = QOP_AUTH_INT; else credentials->u.digest.qop = QOP_NONE; } else if (str_equal_i(name, "cnonce")) { if (credentials->u.digest.cnonce != NULL) goto bail; credentials->u.digest.cnonce = Strdup(value); } else if (str_equal_i(name, "nc")) { if (credentials->u.digest.nc != NULL) goto bail; credentials->u.digest.nc = Strdup(value); } free(name); free(value); while (is_space_char(*p)) p++; if (*p == ',') { p++; while (is_space_char(*p)) p++; if (*p == '\0') goto bail; } s = p; } } return s; bail: http_credentials_free(credentials); return NULL; }
char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comment){ int i = 0; char cur_comment[FIRST_COMMENT_MAX_LENGTH]; cur_comment[0]=0; int ccl=0; int cmd=0; int len = _len; if (_len > QUERY_DIGEST_MAX_LENGTH) { len = QUERY_DIGEST_MAX_LENGTH; } char *r = (char *) malloc(len + SIZECHAR); char *p_r = r; char *p_r_t = r; char prev_char = 0; char qutr_char = 0; char flag = 0; char fc=0; int fc_len=0; char fns=0; bool lowercase=0; lowercase=mysql_thread___query_digests_lowercase; while(i < len) { // ================================================= // START - read token char and set flag what's going on. // ================================================= if(flag == 0) { // store current position p_r_t = p_r; // comment type 1 - start with '/*' if(prev_char == '/' && *s == '*') { ccl=0; flag = 1; if (*(s+1)=='!') cmd=1; } // comment type 2 - start with '#' else if(*s == '#') { flag = 2; } // string - start with ' else if(*s == '\'' || *s == '"') { flag = 3; qutr_char = *s; } // may be digit - start with digit else if(is_token_char(prev_char) && is_digit_char(*s)) { flag = 4; if(len == i+1) continue; } // not above case - remove duplicated space char else { flag = 0; if (fns==0 && is_space_char(*s)) { s++; i++; continue; } if (fns==0) fns=1; if(is_space_char(prev_char) && is_space_char(*s)){ prev_char = ' '; *p_r = ' '; s++; i++; continue; } } } // ================================================= // PROCESS and FINISH - do something on each case // ================================================= else { // -------- // comment // -------- if (flag == 1) { if (cmd) { if (ccl<FIRST_COMMENT_MAX_LENGTH-1) { cur_comment[ccl]=*s; ccl++; } } if (fc==0) { fc=1; } if (fc==1) { if (fc_len<FIRST_COMMENT_MAX_LENGTH-1) { if (*first_comment==NULL) { *first_comment=(char *)malloc(FIRST_COMMENT_MAX_LENGTH); } char *c=*first_comment+fc_len; *c = !is_space_char(*s) ? *s : ' '; fc_len++; } if (prev_char == '*' && *s == '/') { if (fc_len>=2) fc_len-=2; char *c=*first_comment+fc_len; *c=0; //*first_comment[fc_len]=0; fc=2; } } } if( // comment type 1 - /* .. */ (flag == 1 && prev_char == '*' && *s == '/') || // comment type 2 - # ... \n (flag == 2 && (*s == '\n' || *s == '\r')) ) { p_r = flag == 1 ? p_r_t - SIZECHAR : p_r_t; if (cmd) { cur_comment[ccl]=0; if (ccl>=2) { ccl-=2; cur_comment[ccl]=0; char el=0; int fcc=0; while (el==0 && fcc<ccl ) { switch (cur_comment[fcc]) { case '/': case '*': case '!': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ' ': fcc++; break; default: el=1; break; } } if (el) { memcpy(p_r,cur_comment+fcc,ccl-fcc); p_r+=(ccl-fcc); *p_r++=' '; } } cmd=0; } prev_char = ' '; flag = 0; s++; i++; continue; } // -------- // string // -------- else if(flag == 3) { // Last char process if(len == i + 1) { p_r = p_r_t; *p_r++ = '?'; flag = 0; break; } // need to be ignored case if(p_r > p_r_t + SIZECHAR) { if( (prev_char == '\\' && *s == '\\') || // to process '\\\\', '\\' (prev_char == '\\' && *s == qutr_char) || // to process '\'' (prev_char == qutr_char && *s == qutr_char) // to process '''' ) { prev_char = 'X'; s++; i++; continue; } } // satisfied closing string - swap string to ? if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char)) { p_r = p_r_t; *p_r++ = '?'; flag = 0; if(i < len) s++; i++; continue; } } // -------- // digit // -------- else if(flag == 4) { // last single char if(p_r_t == p_r) { *p_r++ = '?'; i++; continue; } // token char or last char if(is_token_char(*s) || len == i+1) { if(is_digit_string(p_r_t, p_r)) { p_r = p_r_t; *p_r++ = '?'; if(len == i+1) { if(is_token_char(*s)) *p_r++ = *s; i++; continue; } } flag = 0; } } } // ================================================= // COPY CHAR // ================================================= // convert every space char to ' ' if (lowercase==0) { *p_r++ = !is_space_char(*s) ? *s : ' '; } else { *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; } prev_char = *s++; i++; } // remove a trailing space if (p_r>r) { char *e=p_r; e--; if (*e==' ') { *e=0; } } *p_r = 0; // process query stats return r; }
char *mysql_query_digest_and_first_comment(char *s, int len, char *first_comment){ int i = 0; char *r = (char *) malloc(len + SIZECHAR); char *p_r = r; char *p_r_t = r; char prev_char = 0; char qutr_char = 0; char flag = 0; char fc=0; int fc_len=0; char fns=0; while(i < len) { // ================================================= // START - read token char and set flag what's going on. // ================================================= if(flag == 0) { // store current position p_r_t = p_r; // comment type 1 - start with '/*' if(prev_char == '/' && *s == '*') { flag = 1; } // comment type 2 - start with '#' else if(*s == '#') { flag = 2; } // string - start with ' else if(*s == '\'' || *s == '"') { flag = 3; qutr_char = *s; } // may be digit - start with digit else if(is_token_char(prev_char) && is_digit_char(*s)) { flag = 4; if(len == i+1) continue; } // not above case - remove duplicated space char else { flag = 0; if (fns==0 && is_space_char(*s)) { s++; i++; continue; } if (fns==0) fns=1; if(is_space_char(prev_char) && is_space_char(*s)){ prev_char = ' '; *p_r = ' '; s++; i++; continue; } } } // ================================================= // PROCESS and FINISH - do something on each case // ================================================= else { // -------- // comment // -------- if (flag == 1) { if (fc==0) { fc=1; } if (fc==1) { if (fc_len<FIRST_COMMENT_MAX_LENGTH-1) { first_comment[fc_len]= !is_space_char(*s) ? *s : ' '; fc_len++; } if (prev_char == '*' && *s == '/') { if (fc_len>=2) fc_len-=2; first_comment[fc_len]=0; fc=2; } } } if( // comment type 1 - /* .. */ (flag == 1 && prev_char == '*' && *s == '/') || // comment type 2 - # ... \n (flag == 2 && (*s == '\n' || *s == '\r')) ) { p_r = flag == 1 ? p_r_t - SIZECHAR : p_r_t; prev_char = ' '; flag = 0; s++; i++; continue; } // -------- // string // -------- else if(flag == 3) { // Last char process if(len == i + 1) { p_r = p_r_t; *p_r++ = '?'; flag = 0; break; } // need to be ignored case if(p_r > p_r_t + SIZECHAR) { if( (prev_char == '\\' && *s == '\\') || // to process '\\\\', '\\' (prev_char == '\\' && *s == qutr_char) || // to process '\'' (prev_char == qutr_char && *s == qutr_char) // to process '''' ) { prev_char = 'X'; s++; i++; continue; } } // satisfied closing string - swap string to ? if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char)) { p_r = p_r_t; *p_r++ = '?'; flag = 0; if(i < len) s++; i++; continue; } } // -------- // digit // -------- else if(flag == 4) { // last single char if(p_r_t == p_r) { *p_r++ = '?'; i++; continue; } // token char or last char if(is_token_char(*s) || len == i+1) { if(is_digit_string(p_r_t, p_r)) { p_r = p_r_t; *p_r++ = '?'; if(len == i+1) { if(is_token_char(*s)) *p_r++ = *s; i++; continue; } } flag = 0; } } } // ================================================= // COPY CHAR // ================================================= // convert every space char to ' ' *p_r++ = !is_space_char(*s) ? *s : ' '; prev_char = *s++; i++; } // remove a trailing space if (p_r>r) { char *e=p_r; e--; if (*e==' ') { *e=0; } } *p_r = 0; // process query stats return r; }
static int is_hex_char(char c) { return is_digit_char(c) || (c >= 'A' && c <= 'F') || c == 'x'; }
token_t * read_token(lexer_state * state) { token_t * t; int type, value_size; char * left = state->ptr; char * right; char c; // Ignore spaces. while ((c = *left) && (c == ' ')) left++; right = left; if (c == ';') { while (c != '\n') c = *(++right); type = T_COMMENT; } else if (c == '\n') { right++; type = T_NEWLINE; } else if (is_name_char(c)) { while (is_name_char(c)) c = *(++right); type = T_NAME; } else if (c == ':') { c = *(++right); while (is_name_char(c)) c = *(++right); type = T_LABEL; } else if (is_digit_char(c)) { if (*(left + 1) == 'x') // look-ahead { while (is_hex_char(c)) c = *(++right); type = T_INT_HEX; } else { while (is_digit_char(c)) c = *(++right); type = T_INT_DEC; } } else if (c == '[' || c == ']') { right++; type = c == '[' ? T_BRACKET_L : T_BRACKET_R; } else if (c == ',') { right++; type = T_COMMA; } else if (c == '+') { right++; type = T_PLUS; } else if (c == 0) { return NULL; } else { printf("c: '%c' (0x%02x)\n", c, (int)c); CRASH("unhandled token"); } t = (token_t *)malloc(sizeof(token_t)); t->type = type; t->size = value_size = right - left; t->value = (char *)malloc(value_size + 1); strlcpy(t->value, left, value_size + 1); state->ptr = right; return t; }