Beispiel #1
0
/*
 * Fetch the current lexer state and throw an error.
 */
TokenizeError Impl::error(const std::string &msg) {
	return TokenizeError{
		Location{
			this->file,
			yyget_lineno(this->scanner),
			this->linepos - static_cast<int>(yyget_leng(this->scanner)),
			static_cast<int>(yyget_leng(this->scanner))
		},
		msg
	};
}
Beispiel #2
0
/*
 * Fetch the current lexer state variables and create a token.
 */
void Impl::token(token_type type) {
	int length = yyget_leng(this->scanner);
	int token_start = this->linepos - length;
	int lineno = yyget_lineno(this->scanner);
	if (type == token_type::ENDLINE) {
		/* don't assign the `\n` for the next line */
		--lineno;
	}

	// to register open and close parenthesis
	// for correct line-wrap-indentation.
	this->track_brackets(type, token_start);

	if (token_needs_payload(type)) {
		this->tokens.push(Token{
			this->file,
			lineno,
			token_start,
			length,
			type,
			yyget_text(this->scanner)
		});
	}
	else {
		this->tokens.push(Token{
			this->file,
			lineno,
			token_start,
			length,
			type
		});
	}
}
Beispiel #3
0
/*
 * measure the indentation of a line
 */
void Impl::handle_indent(int depth) {

	this->linepos -= yyget_leng(this->scanner) - depth;

	if (not this->brackets.empty()) {
		// we're in a pair of brackets,
		// there the indentation is way funnier.

		// check if the content indentation is correct.
		int expected = this->brackets.top().get_content_indent();
		if (depth != expected) {
			// if the expected depth is not correct,
			// then the only thing that is allowed is
			// the closing bracket.
			// the check will be done for the next token in
			// `track_brackets`.
			this->bracketcloseindent_expected = true;
		}

		// don't need to track the indent stack,
		// this is done in the bracket tracking now.
		return;
	}

	// regular indent is enforced when not in a bracket pair
	if ((depth % SPACES_PER_INDENT) > 0) {
		std::ostringstream builder;
		builder << "indentation requires exactly "
		        << SPACES_PER_INDENT
		        << " spaces per level";
		throw this->error(builder.str());
	}

	if (depth == this->previous_indent) {
		// same indent level, ignore
		return;
	}
	else if (depth < this->previous_indent) {
		// current line is further left than the previous one
		int delta = this->previous_indent - depth;
		while (delta > 0) {
			delta -= SPACES_PER_INDENT;
			this->token(token_type::DEDENT);
		}
	}
	else {
		// current line has more depth than the previous one
		int delta = depth - this->previous_indent;
		while (delta > 0) {
			delta -= SPACES_PER_INDENT;
			this->token(token_type::INDENT);
		}
	}
	this->previous_indent = depth;
}
Beispiel #4
0
void FidlParser::lexer_error_callback(const char* error_msg)
{
  const std::string& source_line = get_line(open_files_.back().file_content_, yyget_lineno(lexer_));

  std::cout << open_files_.back().filename_ << ":" << yyget_lineno(lexer_) << ":" << yyget_column(lexer_) - yyget_leng(lexer_) << ": " << error_msg
            << std::endl;
  std::cout << ">" << source_line;

  first_error_ = ParserStatus::LEXING_ERROR;
}
Beispiel #5
0
void wget_css_parse_buffer(
	const char *buf,
	void(*callback_uri)(void *user_ctx, const char *url, size_t len, size_t pos),
	void(*callback_encoding)(void *user_ctx, const char *url, size_t len),
	void *user_ctx)
{
	int token;
	size_t length, pos = 0;
	char *text;
	yyscan_t scanner;

	// let flex operate on buf as a 0 terminated string
	// we could give buflen to this function and use yy_scan_bytes or yy_scan_buffer
	yylex_init(&scanner);
	yy_scan_string(buf, scanner);

	while ((token = yylex(scanner)) != CSSEOF) {
		if (token == IMPORT_SYM) {
			// e.g. @import "http:example.com/index.html"
			pos += yyget_leng(scanner);

			// skip whitespace before URI/STRING
			while ((token = yylex(scanner)) == S)
				pos += yyget_leng(scanner);

			// now token should be STRING or URI
			if (token == STRING)
				token = URI;
		}

		if (token == URI && callback_uri) {
			// e.g. url(http:example.com/index.html)
			text = yyget_text(scanner);
			length = yyget_leng(scanner);

			if (*text == '\'' || *text == '\"') {
				// a string - remove the quotes
				callback_uri(user_ctx, text + 1, length - 2, pos + 1);
			} else {
				// extract URI from url(...)
				if (!wget_strncasecmp_ascii(text, "url(", 4)) {
					char *otext = text;

					// remove trailing ) and any spaces before
					for (length--; c_isspace(text[length - 1]); length--);

					// remove leading url( and any spaces after
					for (length -= 4, text += 4; c_isspace(*text); text++, length--);

					// remove quotes
					if (*text == '\'' || *text == '\"') {
						text++;
						length -= 2;
					}

					callback_uri(user_ctx, text, length, pos + (text - otext));
				}
			}
		} else if (token == CHARSET_SYM && callback_encoding) {
			// e.g. @charset "UTF-8"
			pos += yyget_leng(scanner);

			// skip whitespace before charset name
			while ((token = yylex(scanner)) == S)
				pos += yyget_leng(scanner);

			// now token should be STRING
			if (token == STRING) {
				text = yyget_text(scanner);
				length = yyget_leng(scanner);

				if (*text == '\'' || *text == '\"') {
					// a string - remove the quotes
					callback_encoding(user_ctx, text + 1, length - 2);
				} else {
					// a string without quotes
					callback_encoding(user_ctx, text, length);
				}
			} else {
				error_printf(_("Unknown token after @charset: %d\n"), token);
			}
		}
		pos += yyget_leng(scanner);
	}

	yylex_destroy(scanner);
}
Beispiel #6
0
/* buffer is html-normlike "chunk", if original file is bigger than buffer,
 * we rewind to a space, so we'll know that tokens won't be broken in half at
 * the end of a buffer. All tokens except string-literals of course.
 * So we can assume that after the buffer there is either a space, EOF, or a
 * chunk of text not containing whitespace at all (for which we care only if its
 * a stringliteral)*/
void cli_js_process_buffer(struct parser_state *state, const char *buf, size_t n)
{
	struct scope* current = state->current;
	YYSTYPE val;
	int yv;
	YY_BUFFER_STATE yyb;

	if(!state->global) {
		/* this state has either not been initialized,
		 * or cli_js_parse_done() was already called on it */
		cli_warnmsg(MODULE "invalid state\n");
		return;
	}
	yyb = yy_scan_bytes(buf, n, state->scanner);
	memset(&val, 0, sizeof(val));
	val.vtype = vtype_undefined;
	/* on EOF yylex will return 0 */
	while( (yv=yylex(&val, state->scanner)) != 0)
	{
		const char *text;
		size_t leng;

		val.type = yv;
		switch(yv) {
			case TOK_VAR:
				current->fsm_state = InsideVar;
				break;
			case TOK_IDENTIFIER_NAME:
				text = yyget_text(state->scanner);
				leng = yyget_leng(state->scanner);
				if(current->last_token == TOK_DOT) {
					/* this is a member name, don't normalize
					*/
					TOKEN_SET(&val, string, cli_strdup(text));
					val.type = TOK_UNNORM_IDENTIFIER;
				} else {
					switch(current->fsm_state) {
						case WaitParameterList:
							state->syntax_errors++;
							/* fall through */
						case Base:
						case InsideInitializer:
							TOKEN_SET(&val, cstring, scope_use(current, text, leng));
							break;
						case InsideVar:
						case InsideFunctionDecl:
							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
							current->fsm_state = InsideInitializer;
							current->brackets = 0;
							break;
						case WaitFunctionName:
							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
							current->fsm_state = WaitParameterList;
							break;
					}
				}
				break;
			case TOK_PAR_OPEN:
				switch(current->fsm_state) {
					case WaitFunctionName:
						/* fallthrough */
					case WaitParameterList:
						current->fsm_state = InsideFunctionDecl;
						break;
					default:
						/* noop */
						break;
				}
				break;
			case TOK_PAR_CLOSE:
				switch(current->fsm_state) {
					case WaitFunctionName:
						state->syntax_errors++;
						break;
					case WaitParameterList:
						current->fsm_state = Base;
						break;
					default:
						/* noop */
						break;
				}
				break;
			case TOK_CURLY_BRACE_OPEN:
				switch(current->fsm_state) {
					case WaitFunctionName:
						/* fallthrough */
					case WaitParameterList:
					case InsideFunctionDecl:
						/* in a syntactically correct
						 * file, we would already be in
						 * the Base state when we see a {
						 */
						current->fsm_state = Base;
						/* fall-through */
					case InsideVar:
					case InsideInitializer:
						state->syntax_errors++;
						/* fall-through */
					case Base:
					default:
						current->blocks++;
						break;
				}
				break;
					case TOK_CURLY_BRACE_CLOSE:
				if(current->blocks > 0)
					current->blocks--;
				else
					state->syntax_errors++;
				if(!current->blocks) {
					if(current->parent) {
						/* add dummy FUNCTION token to
						 * mark function end */
						TOKEN_SET(&val, cstring, "}");
						add_token(state, &val);
						TOKEN_SET(&val, scope, NULL);
						val.type = TOK_FUNCTION;

						state->current = current = current->parent;
					} else{
						/* extra } */
						state->syntax_errors++;
				}
				}
				break;
			case TOK_BRACKET_OPEN:
				current->brackets++;
				break;
			case TOK_BRACKET_CLOSE:
				if(current->brackets > 0)
					current->brackets--;
				else
					state->syntax_errors++;
				break;
			case TOK_COMMA:
				if (current->fsm_state == InsideInitializer && current->brackets == 0 && current->blocks == 0) {
					/* initializer ended only if we
					 * encountered a comma, and [] are
					 * balanced.
					 * This avoids switching state on:
					 * var x = [4,y,u];*/
					current->fsm_state = InsideVar;
				}
				break;
			case TOK_SEMICOLON:
				if (current->brackets == 0 && current->blocks == 0) {
					/* avoid switching state on unbalanced []:
					 * var x = [test;testi]; */
					current->fsm_state = Base;
				}
				break;
			case TOK_FUNCTION:
				current = scope_new(state);
				current->fsm_state = WaitFunctionName;
				TOKEN_SET(&val, scope, state->current);
				break;
			case TOK_StringLiteral:
				if(state->tokens.cnt > 1 && state->tokens.data[state->tokens.cnt-1].type == TOK_PLUS) {
					/* see if can fold */
					yystype *prev_string = &state->tokens.data[state->tokens.cnt-2];
					if(prev_string->type == TOK_StringLiteral) {
						char *str = TOKEN_GET(prev_string, string);
						size_t str_len = strlen(str);

						text = yyget_text(state->scanner);
						leng = yyget_leng(state->scanner);


						/* delete TOK_PLUS */
						free_token(&state->tokens.data[--state->tokens.cnt]);

						str = cli_realloc(str, str_len + leng + 1);
						if (!str)
						    break;
						strncpy(str+str_len, text, leng);
						str[str_len + leng] = '\0';
						TOKEN_SET(prev_string, string, str);
						free(val.val.string);
						memset(&val, 0, sizeof(val));
						val.vtype = vtype_undefined;
						continue;
					}
				}
				break;
		}
		if(val.vtype == vtype_undefined) {
			text = yyget_text(state->scanner);
			TOKEN_SET(&val, string, cli_strdup(text));
			abort();
		}
		add_token(state, &val);
		current->last_token = yv;
		memset(&val, 0, sizeof(val));
		val.vtype = vtype_undefined;
	}
}
Beispiel #7
0
void Impl::advance_linepos() {
	this->linepos += yyget_leng(this->scanner);
}