Beispiel #1
0
/*
 * Read a simple symbol
 * - the buffer must be empty
 * - current_char must be simple
 * - read the sequence of simple chars and add it to the buffer
 *
 * If the symbol is a reserved word, return the corresponding
 * token id. Otherwise, return SMT2_TK_SYMBOL.
 */
static smt2_token_t smt2_read_symbol(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  const keyword_t *kw;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  c = reader_current_char(rd);

  assert(string_buffer_length(buffer) == 0 && issimple(c));

  do {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  } while (issimple(c));
  string_buffer_close(buffer);

  tk = SMT2_TK_SYMBOL;
  kw = in_smt2_tk(buffer->data, buffer->index);
  if (kw != NULL) {
    tk = kw->tk;
  }

  return tk;
}
Beispiel #2
0
/*
 * Read a symbol or keyword
 * lex->buffer contains one char (not a separator or digit)
 * char = next character after that.
 */
static yices_token_t read_symbol(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  token_t tk;
  const keyword_t *kw;

  rd = &lex->reader;
  c = reader_current_char(rd);
  buffer = lex->buffer;

  while (! is_yices_sep(c)) {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  }

  string_buffer_close(buffer);

  tk = TK_SYMBOL;
  kw = in_yices_kw(buffer->data, buffer->index);
  if (kw != NULL) {
    tk = kw->tk;
  }

  return tk;
}
Beispiel #3
0
/*
 * Read a quoted symbol: any sequence of characters delimited by '|'
 * - exceptions: no '\' allowed in the symbol
 * - all characters between '|' must be printable
 * - the delimiting '|' are not part of the symbol
 *
 * - the buffer must be empty
 * - current char must be '|'
 *
 * Return SMT2_TK_INVALID_SYMBOL if a non-printable character
 * or '\' is found before the closing '|'. Return SMT2_TK_QSYMBOL
 * otherwise.
 */
static smt2_token_t smt2_read_quoted_symbol(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  assert(string_buffer_length(buffer) == 0 &&
         reader_current_char(rd) == '|');

  for (;;) {
    c = reader_next_char(rd);
    if (c == '|' || c == '\\' || !ok_char(c)) { 
      //    (!isprint(c) && !isspace(c))) { // HACK TO PARSE BENCHMARKS
      // either the terminator '|' or a character not allowed in quoted symbols
      break;
    }
    string_buffer_append_char(buffer, c);
  }
  string_buffer_close(buffer);

  tk = SMT2_TK_INVALID_SYMBOL;
  if (c == '|') {
    // consume the closing '|'
    reader_next_char(rd);
    tk = SMT2_TK_QSYMBOL;
  }

  return tk;
}
Beispiel #4
0
/*
 * Read an hexadecimal literal
 * - the buffer must contain '#'
 * - current_char must be 'x'
 * - add 'x' and the sequence of hexadecimal digits that
 *   follows to the buffer
 * - stop on the first character that's not hexadecimal
 *
 * The resulting token is stored in buffer
 * - return code:
 *   SMT2_TK_HEXADECIMAL if the sequence is non-empty
 *   SMT2_TK_INVALID_HEXADECIMAL if the sequence is empty
 */
static smt2_token_t smt2_read_hexa(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  c = reader_current_char(rd);

  assert(string_buffer_length(buffer) == 1 &&
         buffer->data[0] == '#' && c == 'x');

  do {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  } while (isxdigit(c));
  string_buffer_close(buffer);

  tk = SMT2_TK_HEXADECIMAL;
  if (string_buffer_length(buffer) <= 2) {
    tk = SMT2_TK_INVALID_HEXADECIMAL;
  }

  return tk;
}
Beispiel #5
0
/*
 * Read a keyword:
 * - the buffer must be empty
 * - current_char must be ':'
 * - add ':' + the sequence of simple_chars that follows to the buffer
 *
 * If ':' is not followed by a simple char, return SMT2_TK_INVALID_KEYWORD
 * Otherwise return SMT2_TK_KEYWORD.
 */
static smt2_token_t smt2_read_keyword(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  c = reader_current_char(rd);

  assert(string_buffer_length(buffer) == 0 && c == ':');

  do {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  } while (issimple(c));
  string_buffer_close(buffer);

  tk = SMT2_TK_KEYWORD;
  if (string_buffer_length(buffer) <= 1) {
    tk = SMT2_TK_INVALID_KEYWORD;
  }

  return tk;
}
Beispiel #6
0
/*
 * Read a binary literal
 * - the buffer must contain '#'
 * - current char must be 'b'
 * - add 'b' and the sequence of '0' and '1' that follows
 *   to the buffer
 * - stop on the first character that's not '0' or '1'
 *
 * The resulting token is stored in buffer
 * - return code:
 *   SMT2_TK_BINARY if the sequence is non-empty
 *   SMT2_TK_INVALID_BINARY if the sequence is empty
 */
static smt2_token_t smt2_read_binary(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  c = reader_current_char(rd);

  assert(string_buffer_length(buffer) == 1 &&
         buffer->data[0] == '#' && c == 'b');

  do {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  } while (c == '0' || c == '1');
  string_buffer_close(buffer);

  tk = SMT2_TK_BINARY;
  if (string_buffer_length(buffer) <= 2) {
    tk = SMT2_TK_INVALID_BINARY;
  }

  return tk;
}
Beispiel #7
0
/*
 * String literal for SMT-LIB 2.5
 *
 * Gratuitous change to the escape sequence:
 * - replace "" inside the string by "
 * - note that this means that we can't have an empty string ""
 *   (so the example on page 22 of 'The SMT-LIB Standard Version 2.5'
 *   is wrong).
 */
static smt2_token_t smt2_read_string_var(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  assert(reader_current_char(rd) == '"');

  for (;;) {
    c = reader_next_char(rd);
    if (c == '"') {
      c = reader_next_char(rd);
      if (c != '"') {
	tk = SMT2_TK_STRING;
	break;
      }
    }
    if (c < 32 && !isspace(c)) {
      // error
      tk = SMT2_TK_INVALID_STRING;
      break;
    }
   string_buffer_append_char(buffer, c);
  }

  string_buffer_close(buffer);

  return tk;
}
Beispiel #8
0
/*
 * Read a string literal:
 * - lex->current_char == '"' and lex->buffer is empty
 */
static yices_token_t read_string(lexer_t *lex) {
  yices_token_t tk;
  int c, x;
  reader_t *rd;
  string_buffer_t *buffer;

  rd = &lex->reader;
  buffer = lex->buffer;
  assert(reader_current_char(rd) == '"');

  c = reader_next_char(rd);

  for (;;) {
    if (c == '"') { // end of string
      // consume the closing quote
      reader_next_char(rd);
      tk = TK_STRING;
      break;
    }
    if (c == '\n' || c == EOF) { // missing quotes
      tk = TK_OPEN_STRING;
      break;
    }
    if (c == '\\') {
      // escape sequence
      c = reader_next_char(rd);
      switch (c) {
      case 'n': c = '\n'; break;
      case 't': c = '\t'; break;
      default:
        if ('0' <= c && c <= '7') {
          // read at most 2 more octal digits
          x = c - '0';
          c = reader_next_char(rd);
          if ('0' <= c && c <= '7') {
            x = 8 * x + (c - '0');
            c = reader_next_char(rd);
            if ('0' <= c && c <= '7') {
              x = 8 * x + (c - '0');
              c = reader_next_char(rd);
            }
          }
          // x = character built from the octal digits
          // c = character after octal digit
          string_buffer_append_char(buffer, x);
          continue;
        } // else skip '\': copy c in the buffer
        break;
      }
    }
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  }

  string_buffer_close(buffer);
  return tk;
}
Beispiel #9
0
/*
 * Numbers that start with '0'
 * - the buffer must be empty
 * - current char must be '0'
 */
static smt2_token_t smt2_read_number0(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  c = reader_current_char(rd);

  assert(string_buffer_length(buffer) == 0 && c == '0');

  // add '0'
  string_buffer_append_char(buffer, c);

  c = reader_next_char(rd);
  tk = SMT2_TK_NUMERAL;

  if (c == '.') {
    // parse a decimal '0.<digits>'
    do {
      string_buffer_append_char(buffer, c);
      c = reader_next_char(rd);
    } while (isdigit(c));

    tk = SMT2_TK_DECIMAL;
    if (string_buffer_length(buffer) <= 2) {
      tk = SMT2_TK_INVALID_DECIMAL; // '0.' but not digit after that
    }

  } else if (isdigit(c)) {
    /*
     * invalid numeral such as '00..' or '05...'
     * put all the digits that follow '0' in the buffer
     * to give a nicer error message
     */
    do {
      string_buffer_append_char(buffer, c);
      c = reader_next_char(rd);
    } while (isdigit(c));

    tk = SMT2_TK_INVALID_NUMERAL;
  }

  string_buffer_close(buffer);

  return tk;
}
Beispiel #10
0
/*
 * Read a hexadecimal constant:
 * lex->current_char = 'x' and lex->buffer contains "0"
 */
static yices_token_t read_hex_constant(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;

  rd = &lex->reader;
  c = reader_current_char(rd);
  buffer = lex->buffer;

  do {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  } while (isxdigit(c));
  string_buffer_close(buffer);

  if (string_buffer_length(buffer) <= 2) {
    return TK_EMPTY_HEXCONST; // empty constant
  } else {
    return TK_HEX_CONSTANT;
  }
}
Beispiel #11
0
/*
 * Numbers that don't start with '0'
 * - the buffer must be empty
 * - current char must be a digit '1' to '9'
 * - read the sequence of digits that follows and add it to the buffer
 * - if the character after this sequence is '.' then read as a DECIMAL
 *   otherwise the token is a NUMERAL.
 *
 * Return code:
 * - SMT2_INVALID_DECIMAL if the '.' is not followed by a digit
 */
static smt2_token_t smt2_read_number(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;
  uint32_t i;

  rd = &lex->reader;
  buffer = lex->buffer;
  c = reader_current_char(rd);

  assert(string_buffer_length(buffer) == 0 && isdigit(c) && c != '0');

  // first sequence of digits
  do {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  } while (isdigit(c));

  tk = SMT2_TK_NUMERAL;
  if (c == '.') {
    i = string_buffer_length(buffer);

    // attempt to parse a DECIMAL
    do {
      string_buffer_append_char(buffer, c);
      c = reader_next_char(rd);
    } while (isdigit(c));

    tk = SMT2_TK_DECIMAL;
    if (string_buffer_length(buffer) <= i+1) {
      tk = SMT2_TK_INVALID_DECIMAL;
    }
  }

  string_buffer_close(buffer);

  return tk;
}
Beispiel #12
0
/*
 * Read a string literal
 * - current char is "
 * - read all characters until the closing " or any non-printable
 *   character
 * - replace escape sequences \" by " and \\ by \
 *
 * Result: the lexer's buffer contains the string literal
 * without the delimiting quotes.
 * - return code:
 *   SMT2_TK_STRING if the string is valid
 *   SMT2_TK_INVALID_STRING if the string is terminated by
 *   a non-printable character
 *
 * NOTE: this is not strictly compliant with the SMT-LIB 2.0
 * standard as we may include non-ascii printable characters
 * in the string.
 *
 * NOTE2: the SMT-LIB2 standard says 'a string is any sequence of
 * printable ASCII characters delimited by double quotes ...' But it
 * does not define 'printable ASCII character'. Several benchmarks in
 * SMT-LIB include line breaks inside a string (which are not
 * printable characters), so I've changed the loop below to allow both
 * printable characters and spaces.
 */
static smt2_token_t smt2_read_string(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  buffer = lex->buffer;
  assert(reader_current_char(rd) == '"');

  for (;;) {
    c = reader_next_char(rd);
    if (c == '"') {
      // consume the closing quote
      reader_next_char(rd);
      tk = SMT2_TK_STRING;
      break;
    }

    if (!isprint(c) && !isspace(c)) {
      // error
      tk = SMT2_TK_INVALID_STRING;
      break;
    }

    if (c == '\\') {
      c = reader_next_char(rd);
      if (c != '"' && c != '\\') {
        // keep the backslash
        string_buffer_append_char(buffer, '\\');
      }
    }
    string_buffer_append_char(buffer, c);
  }

  string_buffer_close(buffer);

  return tk;
}
Beispiel #13
0
/*
 * Read next token and return its type tk
 * - set lex->token to tk
 * - set lex->tk_pos, etc.
 * - if token is TK_STRING, TK_NUM_RATIONAL, TK_NUM_FLOAT, TK_BV_CONSTANT, TK_SYMBOL, TK_ERROR,
 *   the token value is stored in lex->buffer (as a string).
 */
yices_token_t next_yices_token(lexer_t *lex) {
  yices_token_t tk;
  reader_t *rd;
  string_buffer_t *buffer;
  int c;

  rd = &lex->reader;
  c = reader_current_char(rd);
  buffer = lex->buffer;
  string_buffer_reset(buffer);

  // skip spaces and comments
  for (;;) {
    while (isspace(c)) c = reader_next_char(rd);
    if (c != ';') break;
    do { // read to end-of-line or eof
      c = reader_next_char(rd);
    } while (c != '\n' && c != EOF);
  }

  // record token position (start of token)
  lex->tk_pos = rd->pos;
  lex->tk_line = rd->line;
  lex->tk_column = rd->column;

  switch (c) {
  case '(':
    tk = TK_LP;
    goto next_then_return;
  case ')':
    tk = TK_RP;
    goto next_then_return;
  case EOF:
    tk = TK_EOS;
    goto done;
  case ':':
    c = reader_next_char(rd);
    if (c == ':') {
      tk = TK_COLON_COLON;
      goto next_then_return;
    } else {
      // store ':' in the buffer since that may be used for reporting errors
      string_buffer_append_char(buffer, ':');
      string_buffer_close(buffer);
      tk = TK_ERROR;
      goto done;
    }
  case '"':
    tk = read_string(lex);
    goto done;
  case '+':
  case '-':
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
    if (isdigit(c)) {
      string_buffer_append_char(buffer, c);
      reader_next_char(rd);
      tk = read_number(lex);
    } else {
      tk = read_symbol(lex);
    }
    goto done;

  case '0':
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
    if (c == 'b') {
      tk = read_bv_constant(lex);
    } else if (c == 'x') {
      tk = read_hex_constant(lex);
    } else {
      tk = read_number(lex);
    }
    goto done;

  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    string_buffer_append_char(buffer, c);
    reader_next_char(rd);
    tk = read_number(lex);
    goto done;

  default: // symbol or keyword
    string_buffer_append_char(buffer, c);
    reader_next_char(rd);
    tk = read_symbol(lex);
    goto done;
  }

  /*
   * read next character and exit
   */
 next_then_return:
  reader_next_char(rd);


 done:
  lex->token = tk;
  return tk;
}
Beispiel #14
0
/*
 * Read a number
 * lex->buffer contains <optional_sign> and a single digit
 * current_char = what's after the digit in buffer.
 */
static yices_token_t read_number(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c, all_zeros;
  yices_token_t tk;

  rd = &lex->reader;
  c = reader_current_char(rd);
  buffer = lex->buffer;
  tk = TK_NUM_RATIONAL; // default

  while (isdigit(c)) {
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
  }

  if (c == '/') {
    // denominator
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
    if (! isdigit(c)) {
      tk = TK_INVALID_NUM;
      goto done;
    }
    all_zeros = true;
    do {
      if (c != '0') all_zeros = false;
      string_buffer_append_char(buffer, c);
      c = reader_next_char(rd);
    } while (isdigit(c));

    if (all_zeros) tk = TK_ZERO_DIVISOR;
    // else tk = TK_NUM_RATIONAL
    goto done;
  }

  if (c == '.') {
    tk = TK_NUM_FLOAT;
    // fractional part
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
    if (! isdigit(c)) {
      tk = TK_INVALID_NUM;
      goto done;
    }
    do {
      string_buffer_append_char(buffer, c);
      c = reader_next_char(rd);
    } while (isdigit(c));
  }

  if (c == 'e' || c == 'E') {
    tk = TK_NUM_FLOAT;
    // exponent
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
    if (c == '+' || c == '-') {
      string_buffer_append_char(buffer, c);
      c = reader_next_char(rd);
    }
    if (! isdigit(c)) {
      tk = TK_INVALID_NUM;
      goto done;
    }
    do {
      string_buffer_append_char(buffer, c);
      c = reader_next_char(rd);
    } while (isdigit(c));
  }

 done:
  string_buffer_close(buffer);
  return tk;
}
Beispiel #15
0
/*
 * Read the next token and return its code tk
 * - set lex->token to tk
 * - set lex->tk_pos
 * - if the token is not '(' or ')', then its value is in lex->buffer
 *   as a string
 */
smt2_token_t next_smt2_token(lexer_t *lex) {
  reader_t *rd;
  string_buffer_t *buffer;
  int c;
  smt2_token_t tk;

  rd = &lex->reader;
  c = reader_current_char(rd);
  buffer = lex->buffer;
  string_buffer_reset(buffer);

  // skip spaces and comments
  for (;;) {
    while (isspace(c)) c = reader_next_char(rd);
    if (c != ';') break;
    // comments: read everything until the end of the line or EOF
    do {
      c = reader_next_char(rd);
    } while (c != '\n' && c != EOF);
  }

  // record start of token
  lex->tk_pos = rd->pos;
  lex->tk_line = rd->line;
  lex->tk_column = rd->column;

  switch (c) {
  case '(':
    tk = SMT2_TK_LP;
    goto next_then_return;

  case ')':
    tk = SMT2_TK_RP;
    goto next_then_return;

  case EOF:
    tk = SMT2_TK_EOS;
    goto done;

  case '"':
    if (two_dot_five_variant) {
      tk = smt2_read_string_var(lex);
    } else {
      tk = smt2_read_string(lex);
    }
    goto done;

  case '#':
    string_buffer_append_char(buffer, c);
    c = reader_next_char(rd);
    if (c == 'b') {
      tk = smt2_read_binary(lex);
    } else if (c == 'x') {
      tk = smt2_read_hexa(lex);
    } else {
      tk = SMT2_TK_ERROR;
      string_buffer_close(buffer);
    }
    goto done;

  case '0':
    tk = smt2_read_number0(lex);
    goto done;

  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    tk = smt2_read_number(lex);
    goto done;

  case ':':
    tk = smt2_read_keyword(lex);
    goto done;

  case '|':
    tk = smt2_read_quoted_symbol(lex);
    goto done;

  default:
    if (issimple(c)) {
      tk = smt2_read_symbol(lex);
      goto done;
    } else {
      tk = SMT2_TK_ERROR;
      /*
       * copy the bad character in buffer for
       * better error reporting
       */
      string_buffer_append_char(buffer, c);
      string_buffer_close(buffer);
      goto next_then_return;
    }
  }

 next_then_return:
  reader_next_char(rd);

 done:
  lex->token = tk;

  return tk;
}