Exemplo n.º 1
0
Arquivo: lex.c Projeto: relrod/arroyo
static void read_string(lexer_state *ls, token_info *info)
{
  // skip leading "
  next(ls);

  int cont = 1;

  while(cont) {
    switch(ls->current) {
    case EOS:
      lexer_error(ls, "unexpected eof in string");
      return;
    case '\n': case '\r':
      lexer_error(ls, "unexpected new line in string");
      return;
    case '"': cont = 0; break;
    default: save_and_next(ls);
    }
  }

  info->string = strdup(ls->buf->buf);

  // skip trailing "
  next(ls);
}
Exemplo n.º 2
0
token_t* lexer_expectmatch(lexer_state_t* lex, const char* end, const char* start, int linenum) {
    if ( lex->error ) return NULL;

    token_t* tok = lexer_nextif(lex, special_token);

    if ( tok == NULL ) {
        lexer_error(lex, "Expected \"%s\" token to match the \"%s\" on line %d.\n", end, start, linenum);
    }
    else if ( strcmp((char*) tok->value, end) != 0 ) {
        lexer_error(lex, "Expected a \"%s\" token to match the \"%s\" on line %d. Got a \"%s\"\n", end, start, linenum, (char*) tok->value);
    }

    return tok;
}
Exemplo n.º 3
0
token_t* lexer_expect_special(lexer_state_t* lex, const char* token) {
    if ( lex->error ) return NULL;

    token_t* tok = lexer_nextif(lex, special_token);

    if ( tok == NULL ) {
        lexer_error(lex, "Expected \"%s\" token.\n", token);
    }
    else if ( strcmp((char*) tok->value, token) != 0 ) {
        lexer_error(lex, "Expected a \"%s\", got a \"%s\"\n", token, (char*) tok->value);
    }

    return tok;
}
Exemplo n.º 4
0
int
tdi_soup_parser_finalize(tdi_soup_parser *self)
{
    PyObject *data, *name;
    tdi_parser_event event;
    int res;

    if (tdi_soup_lexer_finalize(self->lexer) == -1)
        return lexer_error(self);

    while (self->tagstack) {
        if (!(data = PyString_FromString(""))) {
            self->last_error = TDI_PARSER_ERR_ENV;
            return -1;
        }
        name = self->tagstack->name;
        Py_INCREF(name);
        tagstack_pop(&self->tagstack);

        event.type = TDI_PARSER_EVENT_ENDTAG;
        event.info.endtag.name = name;
        event.info.endtag.data = data;
        res = self->cb(&event, self->cb_ctx);
        Py_DECREF(name);
        Py_DECREF(data);
        if (res) {
            self->last_error = TDI_PARSER_ERR_ENV;
            return -1;
        }
    }

    return 0;
}
Exemplo n.º 5
0
Arquivo: lex.c Projeto: relrod/arroyo
static void read_numeric(lexer_state *ls, token_info *info)
{
  int seen_dot  = 0;

  for(;;) {
    if(ls->current == '.') {
      if(seen_dot) break;
      seen_dot = 1;
    }
    else if(!lisdigit(ls->current)) {

      // bad number
      if(lisalpha(ls->current)) {
        while(lisalnum(ls->current)) save_and_next(ls);
        lexer_error(ls, "badly formatted number");
      }

      // end of number
      break;
    }

    save_and_next(ls);
  }

  long double number = strtold(ls->buf->buf, NULL);

  info->string = strdup(ls->buf->buf);
  info->number = number;
}
Exemplo n.º 6
0
int
tdi_soup_parser_feed(tdi_soup_parser *self, PyObject *food)
{
    if (tdi_soup_lexer_feed(self->lexer, food) == -1)
        return lexer_error(self);

    return 0;
}
Exemplo n.º 7
0
token_t* lexer_expect(lexer_state_t* lex, token_types_t tokentype) {
    if ( lex->error ) return NULL;

    token_t* tok = lexer_nextif(lex, tokentype);

    if ( tok == NULL ) {
        lexer_error(lex, "Expected %s token.\n", token_names[tokentype]);
    }

    return tok;
}
Exemplo n.º 8
0
// a string terminated with NULL is extracted, in str, with iter point to NULL
int extract_identifier(code_t *code, int iter, identifier_t *id) {
	char curr = code->code[iter];	
	if(isalpha(curr) == false ) return -1;

	do {		
		id->str[id->iter] = curr;	
		id->iter++;
		printf("%c, %d\n", curr, id->iter);
		
		// [error] exceed max_id_length
		if(id->iter == max_id_length - 1) {
			lexer_error("exceed max_id_length");
			return -1;
		}

		// 
		curr = code->code[++iter];
		if(isalnum(curr) == false) { 
			id->str[id->iter] = '\0';
			return id->iter; 
		}		
	} while(true);
}
Exemplo n.º 9
0
	void lexer::parse_string(line_of_code & output)
	{
		std::string string;
		char string_character = input[i];
		i++;
		std::size_t start = i;
		for(; i < end; i++)
		{
			char byte = input[i];
			switch(byte)
			{
				case '\\':
				{
					if(end - i < 2)
						lexer_error("Backslash at the end of the input");

					i++;

					char next_byte = input[i];
					
					switch(next_byte)
					{
						case 'r':
							string.push_back('\r');
							continue;

						case 'n':
							string.push_back('\n');
							continue;
					}

					if(ail::is_hex_digit(next_byte))
					{
						if(end - i < 2)
							lexer_error("Incomplete hex number escape sequence at the end of the input");

						if(!ail::is_hex_digit(input[i + 1]))
							lexer_error("Invalid hex number escape sequence");

						std::string hex_string = input.substr(i, 2);
						i++;
						char new_byte = ail::string_to_number<char>(hex_string, std::ios_base::hex);
						string.push_back(new_byte);
					}
					else
						lexer_error("Invalid escape sequence: " + ail::hex_string_8(static_cast<uchar>(next_byte)));
					break;
				}

				case '\n':
					lexer_error("Detected a newline in a string");
					break;

				case '\'':
				case '"':
					if(byte == string_character)
					{
						output.lexemes.push_back(lexeme(string));
						i++;
						return;
					}
					string.push_back(byte);
					break;

				default:
					string.push_back(byte);
					break;
			}
		}
		lexer_error("String lacks terminator");
	}
Exemplo n.º 10
0
/*
 * Validate syntax.
 */
token_data * lexer_check(struct inifile *inf, token_data *data)
{
	switch(data->curr) {
	case QUOTE:
		if((inf->options & INIFILE_ALLOW_QUOTE) == 0) {
			return lexer_error(inf, data, "quoted strings is not allowed");
		}
		if(data->seen == QUOTE) {
			if(data->quote.sch != data->quote.ech) {
				return lexer_error(inf, data, "unbalanced quoted string");
			}
		}
		break;
	case WHITESP:
	case CDATA:
	case NONE:
		break;
	case BSECT:
		if(data->seen != NONE) {
			return lexer_error(inf, data, "begin section inside section");
		}
		break;
	case ESECT:
		if(data->seen != BSECT) {
			return lexer_error(inf, data, "end section without begin section");
		}
		break;
	case ASSIGN:
		if(data->seen != NONE) {
			if(data->seen == BSECT) {
				return lexer_error(inf, data, "assignment inide section");
			} else if(data->seen == ASSIGN) {
				if(data->cls == VALUE && inf->options & INIFILE_ASSIGN_INSIDE) {
					/* 
					 * Allow assignment inside value.
					 */
					return data;
				}
				return lexer_error(inf, data, "dual assignment detected (misstype?)");
			} else if(data->seen == ESECT) {
				return lexer_error(inf, data, "assignment to section");
			} else {
				return lexer_error(inf, data, "assignment without keyword");
			}
		}
		if(data->cls != KEYWORD) {
			return lexer_error(inf, data, "assignment to non-keyword");
		}
		break;
	case EOSTR:
		if(data->seen != ASSIGN &&
		   data->seen != ESECT &&
		   data->seen != NONE &&
		   data->seen != QUOTE) {
			if(data->seen == BSECT) {
				return lexer_error(inf, data, "end of string while looking for matching end of section");
			} else {
				return lexer_error(inf, data, "unexpected end of string");
			}
		}
		if(data->seen == QUOTE) {
			if(!data->quote.ech) {
				if(data->prev != MLINE) {
					return lexer_error(inf, data, "unterminated quote");
				}
			}
			if(data->quote.num % 2) {
				return lexer_error(inf, data, "unbalanced number of quotes");
			}
		}
		break;
	case COMMENT:
		if(data->seen == ASSIGN ||
		   data->seen == BSECT) {
			if(data->cls != VALUE) {
				return lexer_error(inf, data, "assignment or section without value");
			}
		}
		break;
	}

	switch(data->prev) {
	case MLINE:
		if(data->curr == EOSTR || data->curr == WHITESP) {
			if((inf->options & INIFILE_ALLOW_MULTILINE) == 0) {
				return lexer_error(inf, data, "multiline value");
			}
			if(data->cls == KEYWORD) {
				return lexer_error(inf, data, "multiline keyword");
			}
		}
		break;
	}
	
	return data;
}
Exemplo n.º 11
0
static int
handle_starttag(tdi_soup_parser *self, tdi_parser_event *event,
                tdi_lexer_event *event_)
{
    PyObject *name, *normname, *tmp, *data;
    int res;

    if (self->inempty && close_empty(self) == -1) return -1;

    /* sanitize */
    if (PyString_GET_SIZE(event_->info.starttag.name) == 0
        && PyList_GET_SIZE(event_->info.starttag.attr) == 0) {
        name = (Py_INCREF(self->lastopen), self->lastopen);
    }
    else {
        name = event_->info.starttag.name;
        Py_INCREF(name);
        Py_CLEAR(self->lastopen);
        self->lastopen = (Py_INCREF(name), name);
    }

    if (!(normname = self->normalize(self->normalize_ctx, name)))
        goto error;

    /* close unnestables */
    while (self->tagstack) {
        res = self->nestable(self->nestable_ctx,
                             self->tagstack->normname, normname);
        if (res == -1)
            goto error_normname;
        if (res) break;

        event->type = TDI_PARSER_EVENT_ENDTAG;
        if (!(data = PyString_FromString("")))
            goto error_normname;
        tmp = self->tagstack->name;
        event->info.endtag.name = (Py_INCREF(tmp), tmp);
        event->info.endtag.data = data;
        tagstack_pop(&self->tagstack);
        res = !self->cb(event, self->cb_ctx) ? 0 : -1;
        Py_DECREF(tmp);
        Py_DECREF(data);
        if (res == -1)
            goto error_normname;
    }

    /* CDATA */
    if (!event_->info.starttag.closed) {
        if ((res = self->cdata(self->cdata_ctx, normname)) == -1)
            goto error_normname;
        if (res) {
            res = tdi_soup_lexer_state_cdata(self->lexer, self->normalize,
                                             self->normalize_ctx, normname);
            if (res == -1) {
                lexer_error(self);
                goto error_normname;
            }
        }
    }

    /* pass event */
    event->type = TDI_PARSER_EVENT_STARTTAG;
    event->info.starttag.name = name;
    event->info.starttag.attr = event_->info.starttag.attr;
    event->info.starttag.closed = event_->info.starttag.closed;
    event->info.starttag.data = event_->info.starttag.data;
    if (self->cb(event, self->cb_ctx))
        goto error_normname;

    /* Maintain stack */
    if (!event_->info.starttag.closed) {
        if (tagstack_push(&self->tagstack, normname, name) == -1)
            goto error_normname;
        if ((res = self->empty(self->empty_ctx, normname)) == -1)
            goto error_normname;
        if (res)
            self->inempty = 1;
    }

    /* cleanup & finish */
    Py_DECREF(normname);
    Py_DECREF(name);
    return 0;

error_normname:
    Py_DECREF(normname);
error:
    Py_DECREF(name);

    if (!self->last_error)
        self->last_error = TDI_PARSER_ERR_ENV;
    return -1;
}
Exemplo n.º 12
0
token_t* _lexer_read_token(lexer_state_t* lex) {
    while ( (lex->source[lex->sourceIndex] == ' '  || lex->source[lex->sourceIndex] == '\t' ||
            lex->source[lex->sourceIndex] == '\n') && lex->sourceIndex < lex->_sourceLen ) {
        if ( lex->source[lex->sourceIndex] == '\n' ) {
            lex->lineNumber++;
            lex->lineIndex = 0;
        }
        else {
            lex->lineIndex++;
        }

        lex->sourceIndex++;
    }

    if ( lex->sourceIndex >= lex->_sourceLen ) {
        return token_init(eof_token, lex->lineNumber, lex->lineIndex, NULL);
    }

    char current = lex->source[lex->sourceIndex];

    if ( current >= '0' && current <= '9' ) {
        unsigned int currAlloc = MALLOC_CHUNK;
        unsigned int index = 0;
        char* str = (char*) malloc(currAlloc * sizeof(char));

        while ( (current >= '0' && current <= '9') && (lex->sourceIndex + index) < lex->_sourceLen ) {
            if ( index >= (currAlloc - 1) ) {
                currAlloc += MALLOC_CHUNK;
                char* temp = (char*) realloc(str, currAlloc);

                if ( !temp ) {
                    lex->sourceIndex += index;
                    lex->lineIndex += index;
                    lexer_error(lex, "Ran out of memory.\n");

                    free(str);
                    return NULL;
                }

                str = temp;
            }

            str[index] = current;
            str[index + 1] = 0;

            current = lex->source[lex->sourceIndex + (++index)];
        }

        if ( current == '.') {
            currAlloc += MALLOC_CHUNK;

            char* temp = (char*) realloc(str, currAlloc);

            if ( !temp ) {
                lex->sourceIndex += index;
                lex->lineIndex += index;
                lexer_error(lex, "Ran out of memory.\n");

                free(str);
                return NULL;
            }

            str = temp;

            str[index] = '.';
            str[index + 1] = 0;

            current = lex->source[lex->sourceIndex + (++index)];

            while ( (current >= '0' && current <= '9') && (lex->sourceIndex + index) < lex->_sourceLen ) {
                if ( index >= (currAlloc - 1) ) {
                    currAlloc += MALLOC_CHUNK;
                    char* temp = (char*) realloc(str, currAlloc);

                    if ( !temp ) {
                        lex->sourceIndex += index;
                        lex->lineIndex += index;
                        lexer_error(lex, "Ran out of memory.\n");

                        free(str);
                        return NULL;
                    }

                    str = temp;
                }

                str[index] = current;
                str[index + 1] = 0;

                current = lex->source[lex->sourceIndex + (++index)];
            }

            lex->sourceIndex += index;
            lex->lineIndex += index;

            double* v = (double*) malloc(sizeof(double));
            *v = atof(str);

            token_t* token = token_init(double_token, lex->lineNumber, lex->lineIndex, (void*) v);

            free(str);
            return token;
        }
        else {
            lex->sourceIndex += index;
            lex->lineIndex += index;

            int* v = (int*) malloc(sizeof(int));
            *v = atol(str);

            token_t* token = token_init(int_token, lex->lineNumber, lex->lineIndex, (void*) v);

            free(str);
            return token;
        }
    }
    else if ( (current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z') ||
              (current >= '0' && current <= '9') || (current == '_')) {
        unsigned int currAlloc = MALLOC_CHUNK;
        char* str = (char*) malloc(currAlloc * sizeof(char));
        str[0] = current;
        str[1] = 0;

        unsigned int index = 1;
        current = lex->source[lex->sourceIndex + index];

        while ( ((current >= 'a' && current <= 'z') || (current >= 'A' && current <= 'Z') ||
                 (current >= '0' && current <= '9') || (current == '_')) &&
                 (lex->sourceIndex + index) < lex->_sourceLen ) {
            if ( index >= (currAlloc - 1) ) {
                currAlloc += MALLOC_CHUNK;
                char* temp = (char*) realloc(str, currAlloc);

                if ( !temp ) {
                    lex->sourceIndex += index;
                    lex->lineIndex += index;
                    lexer_error(lex, "Ran out of memory.\n");

                    free(str);
                    return NULL;
                }

                str = temp;
            }

            str[index] = current;
            str[index + 1] = 0;

            current = lex->source[lex->sourceIndex + (++index)];
        }

        lex->sourceIndex += index;
        lex->lineIndex += index;

        char* temp = (char*) realloc(str, strlen(str) + 1);

        if ( !temp ) {
            lexer_error(lex, "Ran out of memory.\n");

            free(str);
            return NULL;
        }

        bool special = false;

        for ( int i = 0; i < lex->specialTokenLength; i++ ) {
            if ( strcmp(temp, lex->specialTokens[i]) == 0 ) {
                special = true;
                break;
            }
        }

        token_t* token = token_init((special ? special_token : name_token), lex->lineNumber, lex->lineIndex, (void*) temp);

        return token;
    }
    else if ( current == '\"' || current == '\'' ) {
        char initial = current;

        lex->sourceIndex += 1;
        lex->lineIndex += 1;

        current = lex->source[lex->sourceIndex];

        unsigned int currAlloc = MALLOC_CHUNK;
        char* str = (char*) malloc(currAlloc * sizeof(char));
        str[0] = current;
        str[1] = 0;

        unsigned int index = 1;
        current = lex->source[lex->sourceIndex + index];

        while ( !(current == initial && lex->source[lex->sourceIndex + index - 1] != '\\') &&
                 (lex->sourceIndex + index) <= lex->_sourceLen ) {
            if ( index >= (currAlloc - 1) ) {
                currAlloc += MALLOC_CHUNK;
                char* temp = (char*) realloc(str, currAlloc);

                if ( !temp ) {
                    lex->sourceIndex += index;
                    lex->lineIndex += index;
                    lexer_error(lex, "Ran out of memory.\n");

                    free(str);
                    return NULL;
                }

                str = temp;
            }

            if ( lex->source[lex->sourceIndex + index] == '\\' ) {
                if ( lex->source[lex->sourceIndex + index - 1] != '\\' ) {
                    current = lex->source[lex->sourceIndex + (++index)];
                    continue;
                }
            }

            int t = strlen(str);
            str[t] = current;
            str[t + 1] = 0;

            current = lex->source[lex->sourceIndex + (++index)];
        }

        lex->sourceIndex += index;
        lex->lineIndex += index;

        char* temp = (char*) realloc(str, strlen(str) + 1);

        if ( !temp ) {
            lexer_error(lex, "Ran out of memory.\n");

            free(str);
            return NULL;
        }

        token_t* token = token_init(string_token, lex->lineNumber, lex->lineIndex, (void*) temp);

        lex->sourceIndex++;

        return token;
    }
    else {
        for ( int i = 0; i < lex->specialTokenLength; i++ ) {
            bool good = true;

            if ( lex->specialTokens[i] != NULL && lex->specialTokens[i][0] == current ) {
                for ( int j = 0; j < strlen(lex->specialTokens[i]); j++ ) {
                    if ( lex->specialTokens[i][j] != lex->source[j + lex->sourceIndex] ) {
                        good = false;
                        break;
                    }
                }
            }
            else {
                good = false;
            }

            if ( good ) {
                char* copy = (char*) malloc((strlen(lex->specialTokens[i]) + 1) * sizeof(char));;
                strcpy(copy, lex->specialTokens[i]);

                lex->sourceIndex += strlen(copy);
                lex->lineIndex += strlen(copy);

                token_t* tok = token_init(special_token, lex->lineNumber, lex->lineIndex, (void*) copy);

                return tok;
            }
        }

        lexer_error(lex, "Unexpected token %c\n", current);
    }

    return NULL;
}
Exemplo n.º 13
0
Arquivo: lex.c Projeto: relrod/arroyo
static int lex(lexer_state *ls, token_info *info)
{
  buffer_reset(ls->buf);

  if(setjmp(ls->error.buf))
    return TK_ERROR;

  for(;;) {
    switch(ls->current) {

    case '\n': case '\r': { // newline
      inc_line(ls);
      break;
    }

    case ' ': case '\t': { // whitespace
      next(ls);
      break;
    }

    case '-': { // comment or minus
      // minus
      if(next(ls) != '-') return '-';

      // comment, skip line
      while(next(ls) != EOS && !isnewline(ls));
      break;
    }

    case '=': { // EQ
      next(ls);
      return '=';
    }

    case '<': { // LT, LTE, ASSIGN
      next(ls);
      if(ls->current == '=') {
        next(ls);
        return TK_LTE;
      }
      else if(ls->current == '-'){
        next(ls);
        return TK_ASSIGN;
      }
      else return '<';
    }

    case '>': { // GT, GTE
      next(ls);
      if(ls->current == '=') {
        next(ls);
        return TK_GTE;
      }
      else return '>';
    }

    case '/': { // NEQ, DIV
      next(ls);
      if(ls->current == '=') {
        next(ls);
        return TK_NEQ;
      }

      else return '/';
    }

    case '"': { // STRING
      read_string(ls, info);
      return TK_STRING;
    }

    case EOS: { // EOS
      return TK_EOS;
    }

    default: {
      if(lisdigit(ls->current)) { // NUMERIC
        read_numeric(ls, info);
        return TK_REAL;
      }

      if(lisalpha(ls->current)) { // ID or RESERVED
        return read_id_or_reserved(ls, info);
      }

      int c = ls->current;

      // valid operators, single character tokens, etc.
      switch(ls->current) {
      case '+': case '-': case '*': case '/':
      case '!': case '>': case '<': case '=':
      case '(': case ')': case '[': case ']':
      case '{': case '}': case ':': case '.':
      case ',':
        next(ls);
        return c;

      default:
        lexer_error(ls, "unrecognized symbol %c", c);
        next(ls);
      }
    }
    }
  }
}
Exemplo n.º 14
0
	bool lexer::parse_number(line_of_code & output)
	{
		std::size_t start = i;
		char byte = input[i];

		if(ail::is_digit(byte))
		{
			i++;
			if(byte == '0')
			{
				std::size_t remaining_bytes = end - i;
				if(remaining_bytes > 1)
				{
					char next_byte = input[i + 1];
					if(next_byte == 'x')
					{
						i++;
						remaining_bytes = end - i;
						if(remaining_bytes == 0)
							number_parsing_error("Incomplete hex number at the end of the input");

						std::size_t hex_start = i;

						for(; i < end && ail::is_hex_digit(input[i]); i++);
						
						std::size_t hex_length = i - hex_start;
						if(hex_length == 0)
							lexer_error("Incomplete hex number");

						std::string hex_string = input.substr(hex_start, i - end);
						types::unsigned_integer value = ail::string_to_number<types::unsigned_integer>(hex_string, std::ios_base::hex);
						output.lexemes.push_back(lexeme(value));
						return true;
					}
				}
			}

			char const dot = '.';

			bool got_dot = false;
			char last_byte = byte;
			for(; i < end; i++)
			{
				byte = input[i];
				if(byte == dot)
				{
					if(got_dot)
						number_parsing_error("Encountered a floating point value containing multiple dots");
					else
						got_dot = true;
				}
				else if(!ail::is_digit(byte))
					break;

				last_byte = byte;
			}

			if(last_byte == dot)
				number_parsing_error("Encountered a floating point value ending with a dot");

			std::string number_string = input.substr(start, i - start);
			lexeme current_lexeme;
			if(got_dot)
				current_lexeme = lexeme(ail::string_to_number<types::floating_point_value>(number_string));
			else
				current_lexeme = lexeme(ail::string_to_number<types::signed_integer>(number_string));
			output.lexemes.push_back(current_lexeme);

			return true;
		}
		else
			return false;
	}