コード例 #1
0
ファイル: escape.c プロジェクト: riddochc/ruby
static VALUE
optimized_unescape(VALUE str, VALUE encoding)
{
    long i, len, beg = 0;
    VALUE dest = 0;
    const char *cstr;
    int cr, origenc, encidx = rb_to_encoding_index(encoding);

    len  = RSTRING_LEN(str);
    cstr = RSTRING_PTR(str);

    for (i = 0; i < len; ++i) {
	char buf[1];
	const char c = cstr[i];
	int clen = 0;
	if (c == '%') {
	    if (i + 3 > len) break;
	    if (!ISXDIGIT(cstr[i+1])) continue;
	    if (!ISXDIGIT(cstr[i+2])) continue;
	    buf[0] = ((char_to_number(cstr[i+1]) << 4)
		      | char_to_number(cstr[i+2]));
	    clen = 2;
	}
	else if (c == '+') {
	    buf[0] = ' ';
	}
	else {
	    continue;
	}

	if (!dest) {
	    dest = rb_str_buf_new(len);
	}

	rb_str_cat(dest, cstr + beg, i - beg);
	i += clen;
	beg = i + 1;

	rb_str_cat(dest, buf, 1);
    }

    if (dest) {
	rb_str_cat(dest, cstr + beg, len - beg);
	preserve_original_state(str, dest);
	cr = ENC_CODERANGE_UNKNOWN;
    }
    else {
	dest = rb_str_dup(str);
	cr = ENC_CODERANGE(str);
    }
    origenc = rb_enc_get_index(str);
    if (origenc != encidx) {
	rb_enc_associate_index(dest, encidx);
	if (!ENC_CODERANGE_CLEAN_P(rb_enc_str_coderange(dest))) {
	    rb_enc_associate_index(dest, origenc);
	    if (cr != ENC_CODERANGE_UNKNOWN)
		ENC_CODERANGE_SET(dest, cr);
	}
    }
    return dest;
}
コード例 #2
0
ファイル: lexer.cpp プロジェクト: gfv/initd
token_sp lexer::read_next_token()
{
    for (;;)
    {
        skip_whitespace();

        if (eof_char())
            return token_sp();

        char const* lex_start = pos;

        if (is_ascii_whitespace(peek_char()))
        {
            advance_char();
            while (!eof_char() && is_ascii_whitespace(peek_char()))
                advance_char();

            return make_unique<simple_token>(text_range(lex_start, pos), token_type::whitespace);
        }
        else if (is_single_line_comment_start())
        {
            advance_char(2);
            for (;;)
            {
                if (eof_char() || is_single_line_comment_end())
                {
                    advance_char();
                    break;
                }
                else
                    advance_char();
            }
        }
        else if (is_multi_line_comment_start())
        {
            advance_char(2);
            for (;;)
            {
                if (eof_char())
                {
                    error_sink->push(error_tag(text_range::make_empty(pos), "unterminated comment"));
                    break;
                }
                else if (is_multi_line_comment_end())
                {
                    advance_char(2);
                    break;
                }
                else
                    advance_char();
            }
        }
        else if (is_raw_string_literal_start())
        {
            advance_char(2);

            std::string prefix;
            std::string value;

            for (;;)
            {
                if (eof_char()
                 || peek_char() == ' '
                 || peek_char() == ')'
                 || peek_char() == '\t'
                 || peek_char() == '\v'
                 || peek_char() == '\r'
                 || peek_char() == '\n')
                {
                    text_range r(lex_start, pos);
                    error_sink->push(error_tag(r, "expected '(' in raw string literal"));
                    return make_unique<string_literal_token>(r, std::move(value));
                }
                else if (peek_char() == '(')
                {
                    advance_char();
                    break;
                }
                else
                {
                    prefix += peek_char();
                    advance_char();
                }
            }

            for (;;)
            {
                if (eof_char())
                {
                    text_range r(lex_start, pos);
                    error_sink->push(error_tag(r, "unterminated string"));
                    return make_unique<string_literal_token>(r, std::move(value));
                }
                else if (is_raw_string_literal_end(prefix))
                {
                    // as raw-string-literal-end should begins with ')' and std::equals is short-circuited,
                    // raw-string-literal should be lexed in linear time

                    advance_char(2 + prefix.size());

                    return make_unique<string_literal_token>(text_range(lex_start, pos), std::move(value));
                }
                else
                {
                    value += peek_char();
                    advance_char();
                }
            }
        }
        else if (is_identifier_start(peek_char()))
        {
            std::string s(1, peek_char());
            advance_char();
            while (!eof_char() && is_identifier_trail(peek_char()))
            {
                s += peek_char();
                advance_char();
            }

            return make_identifier_token(text_range(lex_start, pos), std::move(s));
        }
        else if (is_number(peek_char()))
        {
            int value = char_to_number(peek_char());
            advance_char();

            while (!eof_char() && is_number(peek_char()))
            {
                value = value * 10 + char_to_number(peek_char());
                advance_char();
            }

            return make_unique<integer_literal_token>(text_range(lex_start, pos), value);
        }
        else if (peek_char() == '\"')
        {
            advance_char();

            std::string value;

            for (;;)
            {
                if (eof_char() || peek_char() == '\n')
                {
                    text_range r(lex_start, pos);
                    error_sink->push(error_tag(r, "unterminated string"));
                    return make_unique<string_literal_token>(r, std::move(value));
                }
                else if (peek_char() == '\"')
                {
                    advance_char();
                    return make_unique<string_literal_token>(text_range(lex_start, pos), std::move(value));
                }
                else if (peek_char() == '\\')
                {
                    const char* escape_start = pos;
                    advance_char();
                    if (!eof_char())
                    {
                        switch (peek_char())
                        {
                        case 'a':
                            value += '\a';
                            break;
                        case 'b':
                            value += '\b';
                            break;
                        case 'f':
                            value += '\f';
                            break;
                        case 'n':
                            value += '\n';
                            break;
                        case 'r':
                            value += '\r';
                            break;
                        case 't':
                            value += '\t';
                            break;
                        case 'v':
                            value += '\v';
                            break;
                        case '\\':
                            value += '\\';
                            break;
                        case '\'':
                            value += '\'';
                            break;
                        case '\"':
                            value += '\"';
                            break;
                        default:
                            error_sink->push(error_tag(text_range(escape_start, pos + 1), "invalid escape character"));
                            value += '\\';
                            value += peek_char();
                            break;
                        }
                        advance_char();
                    }
                }
                else
                {
                    value += peek_char();
                    advance_char();
                }
            }
        }
        else if (peek_char() == '{')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::lbrace);
        }
        else if (peek_char() == '}')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::rbrace);
        }
        else if (peek_char() == '=')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::equals);
        }
        else if (peek_char() == ';')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::semicolon);
        }
        else if (peek_char() == ',')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::comma);
        }
        else if (peek_char() == ':')
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::colon);
        }
        else
        {
            advance_char();
            return make_unique<simple_token>(text_range(lex_start, pos), token_type::unknown);
        }
    }
}