コード例 #1
0
ファイル: c_tokenizer.c プロジェクト: 3manuek/proxysql-1
// between pointer, check string is number - need to be changed more functions
static char is_digit_string(char *f, char *t)
{
	if(f == t)
	{
		if(is_digit_char(*f))
			return 1;
		else
			return 0;
	}

	int is_hex = 0;
	int i = 0;

	// 0x, 0X
	while(f != t)
	{
		if(i == 1 && *(f-1) == '0' && (*f == 'x' || *f == 'X'))
		{
			is_hex = 1;
		}

		// none hex
		else if(!is_hex && !is_digit_char(*f))
		{
			return 0;
		}

		// hex
		else if(is_hex && !is_hex_char(*f))
		{
			return 0;
		}
		f++;
		i++;
	}

	// need to be added function ----------------
	// 23e
	// 23e+1

	return 1;
}
コード例 #2
0
ファイル: js.c プロジェクト: micha/json-table
static size_t js_scan_digits(jsparser_t *p) {
  size_t start = p->pos;

  while (1) {
    js_ensure_buf(p, 1);
    if (!is_digit_char(js(p)[0])) break;
    p->pos++;
  }

  return p->pos - start;
}
コード例 #3
0
ファイル: utils.c プロジェクト: tfb-bull/lcap
int uri_parse(const char *uri_s, struct uri *uri)
{
    const char *p, *q;

    uri_init(uri);

    /* Scheme, section 3.1. */
    p = uri_s;
    if (!is_alpha_char(*p))
        goto fail;

    q = p;
    while (is_alpha_char(*q)
           || is_digit_char(*q)
           || *q == '+'
           || *q == '-'
           || *q == '.')
        q++;

    if (*q != ':')
        goto fail;

    uri->scheme = mkstr(p, q);
    lowercase(uri->scheme);

    /* Authority, section 3.2. */
    p = q + 1;
    if (*p == '/' && *(p + 1) == '/') {
        char *authority = NULL;

        p += 2;
        q = p;
        while (!(*q == '/' || *q == '?' || *q == '#' || *q == '\0'))
            q++;

        authority = mkstr(p, q);
        if (uri_parse_authority(uri, authority)) {
            free(authority);
            goto fail;
        }
        free(authority);
        p = q;
    }

    q = strchr(p, '\0');
    uri->path = mkstr(p, q);

    return 0;

fail:
    uri_free(uri);
    return -EINVAL;
}
コード例 #4
0
ファイル: http.c プロジェクト: mogi57/nmap-5.61TEST4-android
/* Parse a URI string into a struct URI. Any parts of the URI that are absent
   will become NULL entries in the structure, except for the port which will be
   -1. Returns NULL on error. See RFC 3986, section 3 for syntax. */
struct uri *uri_parse(struct uri *uri, const char *uri_s)
{
    const char *p, *q;

    uri_init(uri);

    /* Scheme, section 3.1. */
    p = uri_s;
    if (!is_alpha_char(*p))
        goto fail;
    for (q = p; is_alpha_char(*q) || is_digit_char(*q) || *q == '+' || *q == '-' || *q == '.'; q++)
        ;
    if (*q != ':')
        goto fail;
    uri->scheme = mkstr(p, q);
    /* "An implementation should accept uppercase letters as equivalent to
       lowercase in scheme names (e.g., allow "HTTP" as well as "http") for the
       sake of robustness..." */
    lowercase(uri->scheme);

    /* Authority, section 3.2. */
    p = q + 1;
    if (*p == '/' && *(p + 1) == '/') {
        char *authority = NULL;

        p += 2;
        for (q = p; !(*q == '/' || *q == '?' || *q == '#' || *q == '\0'); q++)
            ;
        authority = mkstr(p, q);
        if (uri_parse_authority(uri, authority) == NULL) {
            free(authority);
            goto fail;
        }
        free(authority);

        p = q;
    }
    if (uri->port == -1)
        uri->port = scheme_default_port(uri->scheme);

    /* Path, section 3.3. We include the query and fragment in the path. The
       path is also not percent-decoded because we just pass it on to the origin
       server. */
    q = strchr(p, '\0');
    uri->path = mkstr(p, q);

    return uri;

fail:
    uri_free(uri);
    return NULL;
}
コード例 #5
0
ファイル: preprocessor.cpp プロジェクト: AtlantisCD9/Qt
static Symbols tokenize(const QByteArray &input, int lineNum = 1, TokenizeMode mode = TokenizeCpp)
{
    Symbols symbols;
    const char *begin = input;
    const char *data = begin;
    while (*data) {
        if (mode == TokenizeCpp) {
            int column = 0;

            const char *lexem = data;
            int state = 0;
            Token token = NOTOKEN;
            for (;;) {
                if (static_cast<signed char>(*data) < 0) {
                    ++data;
                    continue;
                }
                int nextindex = keywords[state].next;
                int next = 0;
                if (*data == keywords[state].defchar)
                    next = keywords[state].defnext;
                else if (!state || nextindex)
                    next = keyword_trans[nextindex][(int)*data];
                if (!next)
                    break;
                state = next;
                token = keywords[state].token;
                ++data;
            }

            // suboptimal, is_ident_char  should use a table
            if (keywords[state].ident && is_ident_char(*data))
                token = keywords[state].ident;

            if (token == NOTOKEN) {
                // an error really
                ++data;
                continue;
            }

            ++column;

            if (token > SPECIAL_TREATMENT_MARK) {
                switch (token) {
                case QUOTE:
                    data = skipQuote(data);
                    token = STRING_LITERAL;
                    // concatenate multi-line strings for easier
                    // STRING_LITERAAL handling in moc
                    if (!Preprocessor::preprocessOnly
                        && !symbols.isEmpty()
                        && symbols.last().token == STRING_LITERAL) {

                        QByteArray newString = symbols.last().unquotedLexem();
                        newString += input.mid(lexem - begin + 1, data - lexem - 2);
                        newString.prepend('\"');
                        newString.append('\"');
                        symbols.last() = Symbol(symbols.last().lineNum,
                                                STRING_LITERAL,
                                                newString);
                        continue;
                    }
                    break;
                case SINGLEQUOTE:
                    while (*data && (*data != '\''
                                     || (*(data-1)=='\\'
                                         && *(data-2)!='\\')))
                        ++data;
                    if (*data)
                        ++data;
                    token = CHARACTER_LITERAL;
                    break;
                case LANGLE_SCOPE:
                    // split <:: into two tokens, < and ::
                    token = LANGLE;
                    data -= 2;
                    break;
                case DIGIT:
                    while (is_digit_char(*data))
                        ++data;
                    if (!*data || *data != '.') {
                        token = INTEGER_LITERAL;
                        if (data - lexem == 1 &&
                            (*data == 'x' || *data == 'X')
                            && *lexem == '0') {
                            ++data;
                            while (is_hex_char(*data))
                                ++data;
                        }
                        break;
                    }
                    token = FLOATING_LITERAL;
                    ++data;
                    // fall through
                case FLOATING_LITERAL:
                    while (is_digit_char(*data))
                        ++data;
                    if (*data == '+' || *data == '-')
                        ++data;
                    if (*data == 'e' || *data == 'E') {
                        ++data;
                        while (is_digit_char(*data))
                            ++data;
                    }
                    if (*data == 'f' || *data == 'F'
                        || *data == 'l' || *data == 'L')
                        ++data;
                    break;
                case HASH:
                    if (column == 1) {
                        mode = PreparePreprocessorStatement;
                        while (*data && (*data == ' ' || *data == '\t'))
                            ++data;
                        if (is_ident_char(*data))
                            mode = TokenizePreprocessorStatement;
                        continue;
                    }
                    break;
                case NEWLINE:
                    ++lineNum;
                    continue;
                case BACKSLASH:
                {
                    const char *rewind = data;
                    while (*data && (*data == ' ' || *data == '\t'))
                        ++data;
                    if (*data && *data == '\n') {
                        ++data;
                        continue;
                    }
                    data = rewind;
                } break;
                case CHARACTER:
                    while (is_ident_char(*data))
                        ++data;
                    token = IDENTIFIER;
                    break;
                case C_COMMENT:
                    if (*data) {
                        if (*data == '\n')
                            ++lineNum;
                        ++data;
                        if (*data) {
                            if (*data == '\n')
                                ++lineNum;
                            ++data;
                        }
                    }
                    while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
                        if (*data == '\n')
                            ++lineNum;
                        ++data;
                    }
                    token = WHITESPACE; // one comment, one whitespace
                    // fall through;
                case WHITESPACE:
                    if (column == 1)
                        column = 0;
                    while (*data && (*data == ' ' || *data == '\t'))
                        ++data;
                    if (Preprocessor::preprocessOnly) // tokenize whitespace
                        break;
                    continue;
                case CPP_COMMENT:
                    while (*data && *data != '\n')
                        ++data;
                    continue; // ignore safely, the newline is a separator
                default:
                    continue; //ignore
                }
            }
#ifdef USE_LEXEM_STORE
            if (!Preprocessor::preprocessOnly
                && token != IDENTIFIER
                && token != STRING_LITERAL
                && token != FLOATING_LITERAL
                && token != INTEGER_LITERAL)
                symbols += Symbol(lineNum, token);
            else
#endif
                symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);

        } else { //   Preprocessor

            const char *lexem = data;
            int state = 0;
            Token token = NOTOKEN;
            if (mode == TokenizePreprocessorStatement) {
                state = pp_keyword_trans[0][(int)'#'];
                mode = TokenizePreprocessor;
            }
            for (;;) {
                if (static_cast<signed char>(*data) < 0) {
                    ++data;
                    continue;
                }

                int nextindex = pp_keywords[state].next;
                int next = 0;
                if (*data == pp_keywords[state].defchar)
                    next = pp_keywords[state].defnext;
                else if (!state || nextindex)
                    next = pp_keyword_trans[nextindex][(int)*data];
                if (!next)
                    break;
                state = next;
                token = pp_keywords[state].token;
                ++data;
            }
            // suboptimal, is_ident_char  should use a table
            if (pp_keywords[state].ident && is_ident_char(*data))
                token = pp_keywords[state].ident;

            switch (token) {
            case NOTOKEN:
                ++data;
                break;
            case PP_IFDEF:
                symbols += Symbol(lineNum, PP_IF);
                symbols += Symbol(lineNum, PP_DEFINED);
                continue;
            case PP_IFNDEF:
                symbols += Symbol(lineNum, PP_IF);
                symbols += Symbol(lineNum, PP_NOT);
                symbols += Symbol(lineNum, PP_DEFINED);
                continue;
            case PP_INCLUDE:
                mode = TokenizeInclude;
                break;
            case PP_QUOTE:
                data = skipQuote(data);
                token = PP_STRING_LITERAL;
                break;
            case PP_SINGLEQUOTE:
                while (*data && (*data != '\''
                                 || (*(data-1)=='\\'
                                     && *(data-2)!='\\')))
                    ++data;
                if (*data)
                    ++data;
                token = PP_CHARACTER_LITERAL;
                break;
            case PP_DIGIT:
                while (is_digit_char(*data))
                    ++data;
                if (!*data || *data != '.') {
                    token = PP_INTEGER_LITERAL;
                    if (data - lexem == 1 &&
                        (*data == 'x' || *data == 'X')
                        && *lexem == '0') {
                        ++data;
                        while (is_hex_char(*data))
                            ++data;
                    }
                    break;
                }
                token = PP_FLOATING_LITERAL;
                ++data;
                // fall through
            case PP_FLOATING_LITERAL:
                while (is_digit_char(*data))
                    ++data;
                if (*data == '+' || *data == '-')
                    ++data;
                if (*data == 'e' || *data == 'E') {
                    ++data;
                    while (is_digit_char(*data))
                        ++data;
                }
                if (*data == 'f' || *data == 'F'
                    || *data == 'l' || *data == 'L')
                    ++data;
                break;
            case PP_CHARACTER:
                if (mode == PreparePreprocessorStatement) {
                    // rewind entire token to begin
                    data = lexem;
                    mode = TokenizePreprocessorStatement;
                    continue;
                }
                while (is_ident_char(*data))
                    ++data;
                token = PP_IDENTIFIER;
                break;
            case PP_C_COMMENT:
                if (*data) {
                    if (*data == '\n')
                        ++lineNum;
                    ++data;
                    if (*data) {
                        if (*data == '\n')
                            ++lineNum;
                        ++data;
                    }
                }
                while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
                    if (*data == '\n')
                        ++lineNum;
                    ++data;
                }
                token = PP_WHITESPACE; // one comment, one whitespace
                // fall through;
            case PP_WHITESPACE:
                while (*data && (*data == ' ' || *data == '\t'))
                    ++data;
                continue; // the preprocessor needs no whitespace
            case PP_CPP_COMMENT:
                while (*data && *data != '\n')
                    ++data;
                continue; // ignore safely, the newline is a separator
            case PP_NEWLINE:
                ++lineNum;
                mode = TokenizeCpp;
                break;
            case PP_BACKSLASH:
            {
                const char *rewind = data;
                while (*data && (*data == ' ' || *data == '\t'))
                    ++data;
                if (*data && *data == '\n') {
                    ++data;
                    continue;
                }
                data = rewind;
            } break;
            case PP_LANGLE:
                if (mode != TokenizeInclude)
                    break;
                token = PP_STRING_LITERAL;
                while (*data && *data != '\n' && *(data-1) != '>')
                    ++data;
                break;
            default:
                break;
            }
            if (mode == PreparePreprocessorStatement)
                continue;
#ifdef USE_LEXEM_STORE
            if (token != PP_IDENTIFIER
                && token != PP_STRING_LITERAL
                && token != PP_FLOATING_LITERAL
                && token != PP_INTEGER_LITERAL)
                symbols += Symbol(lineNum, token);
            else
#endif
                symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
        }
    }
    symbols += Symbol(); // eof symbol
    return symbols;
}
コード例 #6
0
ファイル: http.c プロジェクト: mogi57/nmap-5.61TEST4-android
static const char *http_read_credentials(const char *s,
    struct http_credentials *credentials)
{
    const char *p;
    char *scheme;

    credentials->scheme = AUTH_UNKNOWN;

    s = read_token(s, &scheme);
    if (s == NULL)
        return NULL;
    if (str_equal_i(scheme, "Basic")) {
        http_credentials_init_basic(credentials);
    } else if (str_equal_i(scheme, "Digest")) {
        http_credentials_init_digest(credentials);
    } else {
        free(scheme);
        return NULL;
    }
    free(scheme);

    while (is_space_char(*s))
        s++;
    if (credentials->scheme == AUTH_BASIC) {
        p = s;
        /* Read base64. */
        while (is_alpha_char(*p) || is_digit_char(*p) || *p == '+' || *p == '/' || *p == '=')
            p++;
        credentials->u.basic = mkstr(s, p);
        while (is_space_char(*p))
            p++;
        s = p;
    } else if (credentials->scheme == AUTH_DIGEST) {
        char *name, *value;

        while (*s != '\0') {
            p = read_token(s, &name);
            if (p == NULL)
                goto bail;
            while (is_space_char(*p))
                p++;
            /* It's not legal to combine multiple Authorization or
               Proxy-Authorization values. The productions are
                 "Authorization" ":" credentials  (section 14.8)
                 "Proxy-Authorization" ":" credentials  (section 14.34)
               Contrast this with WWW-Authenticate and Proxy-Authenticate and
               their handling in http_read_challenge. */
            if (*p != '=')
                goto bail;
            p++;
            while (is_space_char(*p))
                p++;
            p = read_token_or_quoted_string(p, &value);
            if (p == NULL) {
                free(name);
                goto bail;
            }
            if (str_equal_i(name, "username")) {
                if (credentials->u.digest.username != NULL)
                    goto bail;
                credentials->u.digest.username = Strdup(value);
            } else if (str_equal_i(name, "realm")) {
                if (credentials->u.digest.realm != NULL)
                    goto bail;
                credentials->u.digest.realm = Strdup(value);
            } else if (str_equal_i(name, "nonce")) {
                if (credentials->u.digest.nonce != NULL)
                    goto bail;
                credentials->u.digest.nonce = Strdup(value);
            } else if (str_equal_i(name, "uri")) {
                if (credentials->u.digest.uri != NULL)
                    goto bail;
                credentials->u.digest.uri = Strdup(value);
            } else if (str_equal_i(name, "response")) {
                if (credentials->u.digest.response != NULL)
                    goto bail;
                credentials->u.digest.response = Strdup(value);
            } else if (str_equal_i(name, "algorithm")) {
                if (str_equal_i(value, "MD5"))
                    credentials->u.digest.algorithm = ALGORITHM_MD5;
                else
                    credentials->u.digest.algorithm = ALGORITHM_MD5;
            } else if (str_equal_i(name, "qop")) {
                if (str_equal_i(value, "auth"))
                    credentials->u.digest.qop = QOP_AUTH;
                else if (str_equal_i(value, "auth-int"))
                    credentials->u.digest.qop = QOP_AUTH_INT;
                else
                    credentials->u.digest.qop = QOP_NONE;
            } else if (str_equal_i(name, "cnonce")) {
                if (credentials->u.digest.cnonce != NULL)
                    goto bail;
                credentials->u.digest.cnonce = Strdup(value);
            } else if (str_equal_i(name, "nc")) {
                if (credentials->u.digest.nc != NULL)
                    goto bail;
                credentials->u.digest.nc = Strdup(value);
            }
            free(name);
            free(value);
            while (is_space_char(*p))
                p++;
            if (*p == ',') {
                p++;
                while (is_space_char(*p))
                    p++;
                if (*p == '\0')
                    goto bail;
            }
            s = p;
        }
    }

    return s;

bail:
    http_credentials_free(credentials);

    return NULL;
}
コード例 #7
0
ファイル: c_tokenizer.c プロジェクト: 3manuek/proxysql-1
char *mysql_query_digest_and_first_comment(char *s, int _len, char **first_comment){
	int i = 0;

	char cur_comment[FIRST_COMMENT_MAX_LENGTH];
	cur_comment[0]=0;
	int ccl=0;
	int cmd=0;

	int len = _len;
	if (_len > QUERY_DIGEST_MAX_LENGTH) {
		len = QUERY_DIGEST_MAX_LENGTH;
	}
	char *r = (char *) malloc(len + SIZECHAR);

	char *p_r = r;
	char *p_r_t = r;

	char prev_char = 0;
	char qutr_char = 0;

	char flag = 0;
	char fc=0;
	int fc_len=0;

	char fns=0;

	bool lowercase=0;
	lowercase=mysql_thread___query_digests_lowercase;

	while(i < len)
	{
		// =================================================
		// START - read token char and set flag what's going on.
		// =================================================
		if(flag == 0)
		{
			// store current position
			p_r_t = p_r;

			// comment type 1 - start with '/*'
			if(prev_char == '/' && *s == '*')
			{
				ccl=0;
				flag = 1;
				if (*(s+1)=='!')
					cmd=1;
			}

			// comment type 2 - start with '#'
			else if(*s == '#')
			{
				flag = 2;
			}

			// string - start with '
			else if(*s == '\'' || *s == '"')
			{
				flag = 3;
				qutr_char = *s;
			}

			// may be digit - start with digit
			else if(is_token_char(prev_char) && is_digit_char(*s))
			{
				flag = 4;
				if(len == i+1)
					continue;
			}

			// not above case - remove duplicated space char
			else
			{
				flag = 0;
				if (fns==0 && is_space_char(*s)) {
					s++;
					i++;
					continue;
				}
				if (fns==0) fns=1;
				if(is_space_char(prev_char) && is_space_char(*s)){
					prev_char = ' ';
					*p_r = ' ';
					s++;
					i++;
					continue;
				}
			}
		}

		// =================================================
		// PROCESS and FINISH - do something on each case
		// =================================================
		else
		{
			// --------
			// comment
			// --------
			if (flag == 1) {
				if (cmd) {
					if (ccl<FIRST_COMMENT_MAX_LENGTH-1) {
						cur_comment[ccl]=*s;
						ccl++;
					}
				}
				if (fc==0) {
					fc=1;
				}
				if (fc==1) {
					if (fc_len<FIRST_COMMENT_MAX_LENGTH-1) {
						if (*first_comment==NULL) {
							*first_comment=(char *)malloc(FIRST_COMMENT_MAX_LENGTH);
						}
						char *c=*first_comment+fc_len;
						*c = !is_space_char(*s) ? *s : ' ';
						fc_len++;
					}
					if (prev_char == '*' && *s == '/') {
						if (fc_len>=2) fc_len-=2;
						char *c=*first_comment+fc_len;
						*c=0;
						//*first_comment[fc_len]=0;
						fc=2;
					}
				}
			}
			if(
				// comment type 1 - /* .. */
				(flag == 1 && prev_char == '*' && *s == '/') ||

				// comment type 2 - # ... \n
				(flag == 2 && (*s == '\n' || *s == '\r'))
			)
			{
				p_r = flag == 1 ? p_r_t - SIZECHAR : p_r_t;
				if (cmd) {
					cur_comment[ccl]=0;
					if (ccl>=2) {
						ccl-=2;
						cur_comment[ccl]=0;
						char el=0;
						int fcc=0;
						while (el==0 && fcc<ccl ) {
							switch (cur_comment[fcc]) {
								case '/':
								case '*':
								case '!':
								case '0':
								case '1':
								case '2':
								case '3':
								case '4':
								case '5':
								case '6':
								case '7':
								case '8':
								case '9':
								case ' ':
									fcc++;
									break;
								default:
									el=1;
									break;
							}
						}
						if (el) {
							memcpy(p_r,cur_comment+fcc,ccl-fcc);
							p_r+=(ccl-fcc);
							*p_r++=' ';
						}
					}
					cmd=0;
				}
				prev_char = ' ';
				flag = 0;
				s++;
				i++;
				continue;
			}

			// --------
			// string
			// --------
			else if(flag == 3)
			{
				// Last char process
				if(len == i + 1)
				{
					p_r = p_r_t;
					*p_r++ = '?';
					flag = 0;
					break;
				}

				// need to be ignored case
				if(p_r > p_r_t + SIZECHAR)
				{
					if(
						(prev_char == '\\' && *s == '\\') ||		// to process '\\\\', '\\'
						(prev_char == '\\' && *s == qutr_char) ||	// to process '\''
						(prev_char == qutr_char && *s == qutr_char)	// to process ''''
					)
					{
						prev_char = 'X';
						s++;
						i++;
						continue;
					}
				}

				// satisfied closing string - swap string to ?
				if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char))
				{
						p_r = p_r_t;
						*p_r++ = '?';
						flag = 0;
						if(i < len)
							s++;
						i++;
						continue;
				}
			}

			// --------
			// digit
			// --------
			else if(flag == 4)
			{
				// last single char
				if(p_r_t == p_r)
				{
					*p_r++ = '?';
					i++;
					continue;
				}

				// token char or last char
				if(is_token_char(*s) || len == i+1)
				{
					if(is_digit_string(p_r_t, p_r))
					{
						p_r = p_r_t;
						*p_r++ = '?';
						if(len == i+1)
						{
							if(is_token_char(*s))
								*p_r++ = *s;
							i++;
							continue;
						}


					}
					flag = 0;
				}
			}
		}

		// =================================================
		// COPY CHAR
		// =================================================
		// convert every space char to ' '
		if (lowercase==0) {
			*p_r++ = !is_space_char(*s) ? *s : ' ';
		} else {
			*p_r++ = !is_space_char(*s) ? (tolower(*s)) : ' ';
		}
		prev_char = *s++;

		i++;
	}

	// remove a trailing space
	if (p_r>r) {
		char *e=p_r;
		e--;
		if (*e==' ') {
			*e=0;
		}
	}

	*p_r = 0;

	// process query stats
	return r;
}
コード例 #8
0
ファイル: c_tokenizer.c プロジェクト: hand-code/proxysql
char *mysql_query_digest_and_first_comment(char *s, int len, char *first_comment){
	int i = 0;

	char *r = (char *) malloc(len + SIZECHAR);

	char *p_r = r;
	char *p_r_t = r;

	char prev_char = 0;
	char qutr_char = 0;

	char flag = 0;
	char fc=0;
	int fc_len=0;

	char fns=0;

	while(i < len)
	{
		// =================================================
		// START - read token char and set flag what's going on.
		// =================================================
		if(flag == 0)
		{
			// store current position
			p_r_t = p_r;

			// comment type 1 - start with '/*'
			if(prev_char == '/' && *s == '*')
			{
				flag = 1;
			}

			// comment type 2 - start with '#'
			else if(*s == '#')
			{
				flag = 2;
			}

			// string - start with '
			else if(*s == '\'' || *s == '"')
			{
				flag = 3;
				qutr_char = *s;
			}

			// may be digit - start with digit
			else if(is_token_char(prev_char) && is_digit_char(*s))
			{
				flag = 4;
				if(len == i+1)
					continue;
			}

			// not above case - remove duplicated space char
			else
			{
				flag = 0;
				if (fns==0 && is_space_char(*s)) {
					s++;
					i++;
					continue;
				}
				if (fns==0) fns=1;
				if(is_space_char(prev_char) && is_space_char(*s)){
					prev_char = ' ';
					*p_r = ' ';
					s++;
					i++;
					continue;
				}
			}
		}

		// =================================================
		// PROCESS and FINISH - do something on each case
		// =================================================
		else
		{
			// --------
			// comment
			// --------
			if (flag == 1) {
				if (fc==0) {
					fc=1;
				}
				if (fc==1) {
					if (fc_len<FIRST_COMMENT_MAX_LENGTH-1) {
						first_comment[fc_len]= !is_space_char(*s) ? *s : ' ';
						fc_len++;
					}
					if (prev_char == '*' && *s == '/') {
						if (fc_len>=2) fc_len-=2;
						first_comment[fc_len]=0;
						fc=2;
					}
				}
			}
			if(
				// comment type 1 - /* .. */
				(flag == 1 && prev_char == '*' && *s == '/') ||

				// comment type 2 - # ... \n
				(flag == 2 && (*s == '\n' || *s == '\r'))
			)
			{
				p_r = flag == 1 ? p_r_t - SIZECHAR : p_r_t;
				prev_char = ' ';
				flag = 0;
				s++;
				i++;
				continue;
			}

			// --------
			// string
			// --------
			else if(flag == 3)
			{
				// Last char process
				if(len == i + 1)
				{
					p_r = p_r_t;
					*p_r++ = '?';
					flag = 0;
					break;
				}

				// need to be ignored case
				if(p_r > p_r_t + SIZECHAR)
				{
					if(
						(prev_char == '\\' && *s == '\\') ||		// to process '\\\\', '\\'
						(prev_char == '\\' && *s == qutr_char) ||	// to process '\''
						(prev_char == qutr_char && *s == qutr_char)	// to process ''''
					)
					{
						prev_char = 'X';
						s++;
						i++;
						continue;
					}
				}

				// satisfied closing string - swap string to ?
				if(*s == qutr_char && (len == i+1 || *(s + SIZECHAR) != qutr_char))
				{
						p_r = p_r_t;
						*p_r++ = '?';
						flag = 0;
						if(i < len)
							s++;
						i++;
						continue;
				}
			}

			// --------
			// digit
			// --------
			else if(flag == 4)
			{
				// last single char
				if(p_r_t == p_r)
				{
					*p_r++ = '?';
					i++;
					continue;
				}

				// token char or last char
				if(is_token_char(*s) || len == i+1)
				{
					if(is_digit_string(p_r_t, p_r))
					{
						p_r = p_r_t;
						*p_r++ = '?';
						if(len == i+1)
						{
							if(is_token_char(*s))
								*p_r++ = *s;
							i++;
							continue;
						}


					}
					flag = 0;
				}
			}
		}

		// =================================================
		// COPY CHAR
		// =================================================
		// convert every space char to ' '
		*p_r++ = !is_space_char(*s) ? *s : ' ';
		prev_char = *s++;

		i++;
	}

	// remove a trailing space
	if (p_r>r) {
		char *e=p_r;
		e--;
		if (*e==' ') {
			*e=0;
		}
	}

	*p_r = 0;

	// process query stats
	return r;
}
コード例 #9
0
ファイル: lexer.c プロジェクト: nsillik/dcpu16-asm-c
static int is_hex_char(char c)
{
  return is_digit_char(c) || (c >= 'A' && c <= 'F') || c == 'x';
}
コード例 #10
0
ファイル: lexer.c プロジェクト: nsillik/dcpu16-asm-c
token_t * read_token(lexer_state * state)
{
  token_t * t;
  int type, value_size;
  char * left = state->ptr;
  char * right;
  char c;

  // Ignore spaces.
  while ((c = *left) && (c == ' ')) left++;
  right = left;

  if (c == ';')
  {
    while (c != '\n') c = *(++right);
    type = T_COMMENT;
  }
  else if (c == '\n')
  {
    right++;
    type = T_NEWLINE;
  }
  else if (is_name_char(c))
  {
    while (is_name_char(c)) c = *(++right);
    type = T_NAME;
  }
  else if (c == ':')
  {
    c = *(++right);
    while (is_name_char(c)) c = *(++right);
    type = T_LABEL;
  }
  else if (is_digit_char(c))
  {
    if (*(left + 1) == 'x') // look-ahead
    {
      while (is_hex_char(c)) c = *(++right);
      type = T_INT_HEX;
    }
    else
    {
      while (is_digit_char(c)) c = *(++right);
      type = T_INT_DEC;
    }
  }
  else if (c == '[' || c == ']')
  {
    right++;
    type = c == '[' ? T_BRACKET_L : T_BRACKET_R;
  }
  else if (c == ',')
  {
    right++;
    type = T_COMMA;
  }
  else if (c == '+')
  {
    right++;
    type = T_PLUS;
  }
  else if (c == 0)
  {
    return NULL;
  }
  else
  {
    printf("c: '%c' (0x%02x)\n", c, (int)c);
    CRASH("unhandled token");
  }

  t = (token_t *)malloc(sizeof(token_t));
  t->type = type;
  t->size = value_size = right - left;
  t->value = (char *)malloc(value_size + 1);
  strlcpy(t->value, left, value_size + 1);

  state->ptr = right;

  return t;
}