/** * Parses the request method. */ int parse_request_method(struct parser *p) { char first; int r; if (ready(p) == 0) return PARSING_WAIT; first = buffer_get(&(p->buffer), p->mark); if (first == m_get[0]) { r = parse_constant(p, m_get, sizeof(m_get) - 1); if (r != PARSING_DONE) return r; p->request.method = METHOD_GET; } else if (first == m_head[0]) { r = parse_constant(p, m_head, sizeof(m_head) - 1); if (r != PARSING_DONE) return r; p->request.method = METHOD_HEAD; } else if (is_token_char(first)) { // other methods if (ready(p) < 2) return PARSING_WAIT; advance_mark(p, 1); while (is_token_char(buffer_get(&(p->buffer), p->mark))) { if (ready(p) < 2) return PARSING_WAIT; advance_mark(p, 1); } if (buffer_get(&(p->buffer), p->mark) != ' ') return PARSING_ERROR; advance_mark(p, 1); p->request.method = METHOD_OTHER; } else return PARSING_ERROR; return PARSING_DONE; }
/* See section 4.2 of RFC 2616 for header format. */ int http_parse_header(struct http_header **result, const char *header) { const char *p, *q; size_t value_len, value_offset; struct http_header *node, **prev; *result = NULL; prev = result; p = header; while (*p != '\0' && !is_crlf(p)) { /* Get the field name. */ q = p; while (*q != '\0' && is_token_char(*q)) q++; if (*q != ':') { http_header_free(*result); return 400; } node = (struct http_header *) safe_malloc(sizeof(*node)); node->name = mkstr(p, q); node->value = NULL; node->next = NULL; value_len = 0; value_offset = 0; /* Copy the header field value until we hit a CRLF. */ p = q + 1; p = skip_lws(p); for (;;) { q = p; while (*q != '\0' && !is_space_char(*q) && !is_crlf(q)) { /* Section 2.2 of RFC 2616 disallows control characters. */ if (iscntrl((int) (unsigned char) *q)) { http_header_node_free(node); return 400; } q++; } strbuf_append(&node->value, &value_len, &value_offset, p, q - p); p = skip_lws(q); if (is_crlf(p)) break; /* Replace LWS with a single space. */ strbuf_append_str(&node->value, &value_len, &value_offset, " "); } *prev = node; prev = &node->next; p = skip_crlf(p); } return 0; }
/** * Parses an HTTP header name. Some headers are treated especially, as * Content-Length, while others are ignored. */ int parse_header_name(struct parser *p) { int n, r; n = sizeof(h_content_length) - 1; if (ready(p) < n) return PARSING_WAIT; // Note: If you need to check for other header names, // implement an actual FSM instead of comparing to value if (p->state != PARSING_HEADER_NAME_ANY && p->request.method == METHOD_OTHER && buffer_istarts_with( &(p->buffer), p->mark, h_content_length, n)) { // Content-Length p->state = PARSING_HEADER_CONTENT_LENGTH; advance_mark(p, n); return PARSING_DONE; } // other headers p->state = PARSING_HEADER_NAME_ANY; if (ready(p) < 2) return PARSING_WAIT; if (!is_token_char(buffer_get(&(p->buffer), p->mark))) return PARSING_ERROR; advance_mark(p, 1); while (is_token_char(buffer_get(&(p->buffer), p->mark))) { if (ready(p) < 2) return PARSING_WAIT; advance_mark(p, 1); } if (buffer_get(&(p->buffer), p->mark) != ':') return PARSING_ERROR; advance_mark(p, 1); p->state = PARSING_HEADER_VALUE; return PARSING_DONE; }
/* Read a token from a space-separated string. This only recognizes space as a separator, so the string must already have had LWS normalized. http_header_parse does this normalization. */ static const char *read_token(const char *s, char **token) { const char *t; while (*s == ' ') s++; t = s; while (is_token_char(*t)) t++; if (s == t) return NULL; *token = mkstr(s, t); return t; }
int libsconf_lex(libsconf_t *conf, libsconf_token_type_e default_tok) { char buf[512]; char read = 0; int pos = 0; conf->intern_tok.type = default_tok; while ((fread(&read, 1, 1, conf->intern_file)) > 0) { if (is_separator(read)) { /* No token found yet */ if (!pos) continue; else /* token finish */ break; } else if (is_token_char(read, conf)) break; buf[pos++] = read; } /* Store the content of the token if needed */ if (conf->intern_tok.type == TOK_DATA || conf->intern_tok.type == TOK_ID) { if ((conf->intern_tok.content = calloc(1, pos + 1)) == NULL) return -1; strncpy(conf->intern_tok.content, buf, pos); } return !!feof(conf->intern_file); }
int http_parse_request_line(const char *line, struct http_request *request) { const char *p, *q; struct uri *uri; char *uri_s; http_request_init(request); p = line; while (*p == ' ') p++; /* Method (CONNECT, GET, etc.). */ q = p; while (is_token_char(*q)) q++; if (p == q) goto badreq; request->method = mkstr(p, q); /* URI. */ p = q; while (*p == ' ') p++; q = p; while (*q != '\0' && *q != ' ') q++; if (p == q) goto badreq; uri_s = mkstr(p, q); /* RFC 2616, section 5.1.1: The method is case-sensitive. RFC 2616, section 5.1.2: Request-URI = "*" | absoluteURI | abs_path | authority The absoluteURI form is REQUIRED when the request is being made to a proxy... The authority form is only used by the CONNECT method. */ if (strcmp(request->method, "CONNECT") == 0) { uri = uri_parse_authority(&request->uri, uri_s); } else { uri = uri_parse(&request->uri, uri_s); } free(uri_s); if (uri == NULL) /* The URI parsing failed. */ goto badreq; /* Version number. */ p = q; while (*p == ' ') p++; if (*p == '\0') { /* No HTTP/X.X version number indicates version 0.9. */ request->version = HTTP_09; } else { q = parse_http_version(p, &request->version); if (p == q) goto badreq; } return 0; badreq: http_request_free(request); return 400; }
char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comment){ int i = 0; char cur_comment[FIRST_COMMENT_MAX_LENGTH]; cur_comment[0]=0; int ccl=0; int cmd=0; int len = _len; if (_len > QUERY_DIGEST_MAX_LENGTH) { len = QUERY_DIGEST_MAX_LENGTH; } char *r = (char *) malloc(len + SIZECHAR); char *p_r = r; char *p_r_t = r; char prev_char = 0; char qutr_char = 0; char flag = 0; char fc=0; int fc_len=0; char fns=0; bool lowercase=0; lowercase=mysql_thread___query_digests_lowercase; while(i < len) { // ================================================= // START - read token char and set flag what's going on. // ================================================= if(flag == 0) { // store current position p_r_t = p_r; // comment type 1 - start with '/*' if(prev_char == '/' && *s == '*') { ccl=0; flag = 1; if (*(s+1)=='!') cmd=1; } // comment type 2 - start with '#' else if(*s == '#') { flag = 2; } // string - start with ' else if(*s == '\'' || *s == '"') { flag = 3; qutr_char = *s; } // may be digit - start with digit else if(is_token_char(prev_char) && is_digit_char(*s)) { flag = 4; if(len == i+1) continue; } // not above case - remove duplicated space char else { flag = 0; if (fns==0 && is_space_char(*s)) { s++; i++; continue; } if (fns==0) fns=1; if(is_space_char(prev_char) && is_space_char(*s)){ prev_char = ' '; *p_r = ' '; s++; i++; continue; } } } // ================================================= // PROCESS and FINISH - do something on each case // ================================================= else { // -------- // comment // -------- if (flag == 1) { if (cmd) { if (ccl<FIRST_COMMENT_MAX_LENGTH-1) { cur_comment[ccl]=*s; ccl++; } } if (fc==0) { fc=1; } if (fc==1) { if (fc_len<FIRST_COMMENT_MAX_LENGTH-1) { if (*first_comment==NULL) { *first_comment=(char *)malloc(FIRST_COMMENT_MAX_LENGTH); } char *c=*first_comment+fc_len; *c = !is_space_char(*s) ? *s : ' '; fc_len++; } if (prev_char == '*' && *s == '/') { if (fc_len>=2) fc_len-=2; char *c=*first_comment+fc_len; *c=0; //*first_comment[fc_len]=0; fc=2; } } } if( // comment type 1 - /* .. */ (flag == 1 && prev_char == '*' && *s == '/') || // comment type 2 - # ... \n (flag == 2 && (*s == '\n' || *s == '\r')) ) { p_r = flag == 1 ? p_r_t - SIZECHAR : p_r_t; if (cmd) { cur_comment[ccl]=0; if (ccl>=2) { ccl-=2; cur_comment[ccl]=0; char el=0; int fcc=0; while (el==0 && fcc<ccl ) { switch (cur_comment[fcc]) { case '/': case '*': case '!': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case ' ': fcc++; break; default: el=1; break; } } if (el) { memcpy(p_r,cur_comment+fcc,ccl-fcc); p_r+=(ccl-fcc); *p_r++=' '; } } cmd=0; } prev_char = ' '; flag = 0; s++; i++; continue; } // -------- // string // -------- else if(flag == 3) { // Last char process if(len == i + 1) { p_r = p_r_t; *p_r++ = '?'; flag = 0; break; } // need to be ignored case if(p_r > p_r_t + SIZECHAR) { if( (prev_char == '\\' && *s == '\\') || // to process '\\\\', '\\' (prev_char == '\\' && *s == qutr_char) || // to process '\'' (prev_char == qutr_char && *s == qutr_char) // to process '''' ) { prev_char = 'X'; s++; i++; continue; } } // satisfied closing string - swap string to ? if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char)) { p_r = p_r_t; *p_r++ = '?'; flag = 0; if(i < len) s++; i++; continue; } } // -------- // digit // -------- else if(flag == 4) { // last single char if(p_r_t == p_r) { *p_r++ = '?'; i++; continue; } // token char or last char if(is_token_char(*s) || len == i+1) { if(is_digit_string(p_r_t, p_r)) { p_r = p_r_t; *p_r++ = '?'; if(len == i+1) { if(is_token_char(*s)) *p_r++ = *s; i++; continue; } } flag = 0; } } } // ================================================= // COPY CHAR // ================================================= // convert every space char to ' ' if (lowercase==0) { *p_r++ = !is_space_char(*s) ? *s : ' '; } else { *p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' '; } prev_char = *s++; i++; } // remove a trailing space if (p_r>r) { char *e=p_r; e--; if (*e==' ') { *e=0; } } *p_r = 0; // process query stats return r; }
char *mysql_query_digest_and_first_comment(char *s, int len, char *first_comment){ int i = 0; char *r = (char *) malloc(len + SIZECHAR); char *p_r = r; char *p_r_t = r; char prev_char = 0; char qutr_char = 0; char flag = 0; char fc=0; int fc_len=0; char fns=0; while(i < len) { // ================================================= // START - read token char and set flag what's going on. // ================================================= if(flag == 0) { // store current position p_r_t = p_r; // comment type 1 - start with '/*' if(prev_char == '/' && *s == '*') { flag = 1; } // comment type 2 - start with '#' else if(*s == '#') { flag = 2; } // string - start with ' else if(*s == '\'' || *s == '"') { flag = 3; qutr_char = *s; } // may be digit - start with digit else if(is_token_char(prev_char) && is_digit_char(*s)) { flag = 4; if(len == i+1) continue; } // not above case - remove duplicated space char else { flag = 0; if (fns==0 && is_space_char(*s)) { s++; i++; continue; } if (fns==0) fns=1; if(is_space_char(prev_char) && is_space_char(*s)){ prev_char = ' '; *p_r = ' '; s++; i++; continue; } } } // ================================================= // PROCESS and FINISH - do something on each case // ================================================= else { // -------- // comment // -------- if (flag == 1) { if (fc==0) { fc=1; } if (fc==1) { if (fc_len<FIRST_COMMENT_MAX_LENGTH-1) { first_comment[fc_len]= !is_space_char(*s) ? *s : ' '; fc_len++; } if (prev_char == '*' && *s == '/') { if (fc_len>=2) fc_len-=2; first_comment[fc_len]=0; fc=2; } } } if( // comment type 1 - /* .. */ (flag == 1 && prev_char == '*' && *s == '/') || // comment type 2 - # ... \n (flag == 2 && (*s == '\n' || *s == '\r')) ) { p_r = flag == 1 ? p_r_t - SIZECHAR : p_r_t; prev_char = ' '; flag = 0; s++; i++; continue; } // -------- // string // -------- else if(flag == 3) { // Last char process if(len == i + 1) { p_r = p_r_t; *p_r++ = '?'; flag = 0; break; } // need to be ignored case if(p_r > p_r_t + SIZECHAR) { if( (prev_char == '\\' && *s == '\\') || // to process '\\\\', '\\' (prev_char == '\\' && *s == qutr_char) || // to process '\'' (prev_char == qutr_char && *s == qutr_char) // to process '''' ) { prev_char = 'X'; s++; i++; continue; } } // satisfied closing string - swap string to ? if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char)) { p_r = p_r_t; *p_r++ = '?'; flag = 0; if(i < len) s++; i++; continue; } } // -------- // digit // -------- else if(flag == 4) { // last single char if(p_r_t == p_r) { *p_r++ = '?'; i++; continue; } // token char or last char if(is_token_char(*s) || len == i+1) { if(is_digit_string(p_r_t, p_r)) { p_r = p_r_t; *p_r++ = '?'; if(len == i+1) { if(is_token_char(*s)) *p_r++ = *s; i++; continue; } } flag = 0; } } } // ================================================= // COPY CHAR // ================================================= // convert every space char to ' ' *p_r++ = !is_space_char(*s) ? *s : ' '; prev_char = *s++; i++; } // remove a trailing space if (p_r>r) { char *e=p_r; e--; if (*e==' ') { *e=0; } } *p_r = 0; // process query stats return r; }
int parser_context::parse_one(const char *input, int state, int *sym, int *readnum) const { if (!input || !*input) return 0; /* comment state */ if (state == 1) { if (input[0] == '\n') { *sym = _NEWLINE; *readnum = 1; } else if (input[0] == '/' && input[1] == '*') { *sym = _OCOMMENT; *readnum = 2; } else if (input[0] == '*' && input[1] == '/') { *sym = _CCOMMENT; *readnum = 2; } else { *sym = -1; int i = 0; while (input[i]) { if (input[i] == '\n') { break; } else if (input[i] == '/' && input[i+1] == '*') { break; } else if (input[i] == '*' &&input[i+1] == '/') { break; } i++; } *readnum = i; } } else { int c = -1, s = -1; if (input[0] == '\n') { c = 1; s = _NEWLINE; } else if (isspace(input[0])) { c = 1; for (; isspace(input[c]); c++); } else if (input[0] == '/' && input[1] == '*') { c = 2; s = _OCOMMENT; } else if (input[0] == '/' && input[1] == '/') { c = 2; s = _LCOMMENT; } else if (input[0] == '*' && input[1] == '/') { c = 2; s = _CCOMMENT; } else if (input[0] == '{') { c = 1; s = _LCURLY; } else if (input[0] == '}') { c = 1; s = _RCURLY; } else if (input[0] == '(') { c = 1; s = _LPARENT; } else if (input[0] == ')') { c = 1; s = _RPARENT; } else if (input[0] == ';') { c = 1; s = _TERM; } else if (input[0] == '=') { c = 1; s = _EQUAL; } else if (input[0] == '+' && input[1] == '=') { c = 2; s = _PLUSEQUAL; } else if (input[0] == '.') { c = 1; s = _DOT; } else if (input[0] == ',') { c = 1; s = _COMMA; } else if (is_token_char(input[0])) { c = 1; for (; is_token_char(input[c]); c++); if (input[c] == '?') { c++; s = _PARTIAL_TOKEN; } else { s = _TOKEN; } } else if (input[0] == '?') { c = 1; s = _PARTIAL_TOKEN; } else if (input[0] == '*') { c = 1; s = _TOKEN; } else if (input[0] == '\"') { c = 1; for (; input[c] && input[c] != '\"'; c++); if (input[c] == '\"') { c++; s = _STRING; } else { c = -1; } } *readnum = c; *sym = s; if (*readnum == -1) return -1; } return 1; }