Пример #1
0
unsigned int binary_string_to_number(std::string const & input)
{
    unsigned int output = 0;
    for(std::string::const_iterator i = input.begin(), end = input.end(); i != end; ++i)
    {
        char current_char = *i;
        if(is_binary_digit(current_char) == false)
        {
            break;
        }
        output = (output << 1) | (current_char - '0');
    }
    return output;
}
Пример #2
0
bool assembly_lexer(std::string const & file_name, unsigned int & line, std::string::const_iterator & begin, std::string::const_iterator const & end, lexeme & output)
{
    std::string input;

    for(; begin != end; ++begin)
    {
        char current_char = *begin;
        char type = type_lookup_table[current_char];
        switch(type)
        {
        case char_type_illegal:
        {
            lexer_exception(file_name, line, "Illegal character");
        }

        case char_type_name:
        {
            std::string::const_iterator name_begin = begin;
            for(++begin;
                    (begin != end)
                    &&
                    (is_name_char(*begin) == true);
                    ++begin);
            output = lexeme(lexeme_name, std::string(name_begin, begin), line);
            return true;
        }

        case char_type_digit:
        {
            std::string::const_iterator number_begin = begin;
            for(++begin;
                    (begin != end)
                    &&
                    (nil::string::is_digit(*begin) == true)
                    ; ++begin);
            output = lexeme(lexeme_number, std::string(number_begin, begin), line);
            return true;
        }

        case char_type_zero:
        {
            std::string::const_iterator number_begin = begin;
            ++begin;
            if(begin != end)
            {
                char second_character = *begin;
                if(
                    (second_character == 'x')
                    ||
                    (second_character == 'X')
                )
                {
                    for(++begin;
                            (begin != end)
                            &&
                            (nil::string::is_digit(*begin) == true)
                            ; ++begin);
                }
                else if(is_binary_digit(second_character) == true)
                {
                    for(++begin;
                            (begin != end)
                            &&
                            (is_binary_digit(*begin) == true)
                            ; ++begin);
                }
            }
            output = lexeme(lexeme_number, std::string(number_begin, begin), line);
            return true;
        }

        case char_type_string:
        {
            std::string string;
            for(++begin; begin != end;)
            {
                char current_char = *begin;
                switch(current_char)
                {
                case '"':
                {
                    ++begin;
                    output = lexeme(lexeme_string, string, line);
                    return true;
                }

                case '\\':
                {
                    try
                    {
                        parse_backslash(begin, end, string);
                    }
                    catch(std::exception & exception)
                    {
                        lexer_exception(file_name, line, exception.what());
                    }
                    break;
                }

                case '\n':
                {
                    lexer_exception(file_name, line, "Newline in string");
                }

                default:
                {
                    string += current_char;
                    ++begin;
                    break;
                }
                }
            }

            lexer_exception(file_name, line, "Incomplete string at the end of file");
        }

        case char_type_operator:
        {
            ++begin;
            output = lexeme(lexeme_operator, std::string(1, current_char), line);
            return true;
        }

        case char_type_operator_extended:
        {
            ++begin;
            if(begin == end)
            {
                output = lexeme(lexeme_operator, std::string(1, current_char), line);
                return false;
            }
            else
            {
                char second_char = *begin;
                bool is_extended = false;
                switch(current_char)
                {
                case '&':
                {
                    if(second_char == '&')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '|':
                {
                    if(second_char == '|')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '=':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '!':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '<':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '>':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }
                }
                if(is_extended == true)
                {
                    ++begin;
                    output = lexeme(lexeme_operator, std::string(1, current_char) + second_char, line);
                }
                else
                {
                    output = lexeme(lexeme_operator, std::string(1, current_char), line);
                }
                return true;
            }
        }

        case char_type_newline:
        {
            ++begin;
            ++line;
            output = lexeme(lexeme_newline, line);
            return true;
        }

        case char_type_comment:
        {
            for(++begin; begin != end; ++begin)
            {
                if(*begin == '\n')
                {
                    ++begin;
                    ++line;
                    output = lexeme(lexeme_newline, line);
                    return true;
                }
            }
            return false;
        }
        }
    }

    return false;
}
Пример #3
0
void parse_backslash(std::string::const_iterator & begin, std::string::const_iterator const & end, std::string & string)
{
    std::string::const_iterator offset = begin + 1;
    if(offset >= end)
    {
        throw std::runtime_error("Invalid escape sequence");
    }

    bool custom_iterator = false;

    char next_character = *offset;
    switch(next_character)
    {
    case 'a':
    {
        string += '\a';
        break;
    }

    case 'f':
    {
        string += '\f';
        break;
    }

    case 'n':
    {
        string += '\n';
        break;
    }

    case 'r':
    {
        string += '\r';
        break;
    }

    case 't':
    {
        string += '\t';
        break;
    }

    case 'v':
    {
        string += '\v';
        break;
    }

    case 'b':
    {
        string += '\b';
        break;
    }

    case '0':
    {
        unsigned int digit_counter = 8;
        for(++offset; offset < end; ++offset)
        {
            if(is_binary_digit(*offset) == false)
            {
                break;
            }
            --digit_counter;
            if(digit_counter == 0)
            {
                break;
            }
        }

        string += static_cast<char>(binary_string_to_number(std::string(begin + 1, offset)));
        begin = offset;
        custom_iterator = true;
        break;
    }

    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    {
        for(++offset; offset < end; ++offset)
        {
            if(nil::string::is_digit(*offset) == false)
            {
                break;
            }
        }

        string += static_cast<char>(binary_string_to_number(std::string(begin + 1, offset)));
        begin = offset;
        custom_iterator = true;
        break;
    }

    case 'x':
    case 'X':
    {
        unsigned int const max_hex_digits = 2;

        unsigned int digit_counter = max_hex_digits;
        for(++offset; offset < end; ++offset)
        {
            if(nil::string::is_hex_digit(*offset) == false)
            {
                break;
            }
            --digit_counter;
            if(digit_counter == 0)
            {
                break;
            }
        }

        if(digit_counter == max_hex_digits)
        {
            throw std::runtime_error("Empty hexadecimal number in escape sequence");
        }

        string += nil::string::string_to_number<char>(std::string(begin + 2, offset), std::ios_base::hex);
        begin = offset;
        custom_iterator = true;
        break;
    }

    default:
    {
        throw std::runtime_error("Unknown escape sequence in regular expression");
        break;
    }
    }

    if(custom_iterator == false)
    {
        begin += 2;
    }
}
Пример #4
0
long parse_integer(char *str, char **endptr,
                   int len, int base, int *err){
  if(len == 0){
    len = strlen(str);
  }
  //since signed overflow is undefined and I don't really
  //want to use gmp for this, use an unsigned long to hold the value
  uint64_t num = 0;
  int i = 0, negitive = 0, overflow = 0;
  //ignore leading space
  while(isspace(str[i]) && ++i < len);
  if(i < len){
    if(str[i] == '-'){
      negitive = 1;
      i++;
    }
  }
  if(i == len){
    if(endptr){*endptr = str;}
    return 0;
  }
#define TO_NUMBER(start, end, base)                     \
  ({uint64_t number = 0, next = 0;                      \
    int j;                                              \
    for(j = start; j < end; j++){                       \
      next = (number * base) + char_to_number(str[j]);  \
      if(next < number){/*overflow*/                    \
        overflow = j;                                   \
        break;                                          \
      } else {                                          \
        number = next;                                  \
      }                                                 \
    }                                                   \
    number;})
//I'm pretty sure this will cause an error if the string is something like
//0x abcdefg. It should be read as a 0, but I'm not sure what will happen
  if(base == 0 && ((i+1) < len)){
    if(str[i] == '0' && str[i+1] == 'x'){
      base = 16;
      i+=2;
    } else if(str[i] == '0' && str[i+1] == 'o'){
      base = 8;
      i+=2;
    } else if(str[i] == '0' && str[i+1] == 'b'){
      base = 2;
      i+=2;
    } else {
      base = 10;
    }
  }
  while(str[i] == '0' && ++i < len);//read leading zeros
  if(i == len){
    if(endptr){*endptr = (str+i);}
    return 0;
  }
   /*
    Use special cases for 2, 8, 10, and 16 to speed them up.
    Multiplies in base 2, 8, or 16 become shifts, and for x86 at least
    a multiply by 10 becomes two lea instructions.
   */
#define DO_CASE(base)                                                   \
  if(i <= max_length_per_base[base]){                                   \
    num = TO_NUMBER(start, i, base);                                    \
  }                                                                     \
  if(i > max_length_per_base[base] || overflow > 0 || num>LONG_MAX){    \
    *err = errno = ERANGE;                                              \
    *endptr = (str+i);                                                  \
    return (negitive ? LONG_MIN : LONG_MAX);                            \
  }
  int start = i;
  if(base == 10){
    while(isdigit(str[i]) && ++i < len);
    DO_CASE(base);
  } else if(base == 16){
    while(isxdigit(str[i]) && ++i < len);
    DO_CASE(base);
  } else if(base == 8){
    while(is_oct_digit(str[i]) && ++i < len);
    DO_CASE(base);
  } else if(base == 2){
    while(is_binary_digit(str[i]) && ++i < len);
    DO_CASE(base);
  } else if(base < 10){
    i = memspn((uint8_t*)str + i, len - i, valid_digits, base);
    DO_CASE(base);
  } else {
    i = memspn((uint8_t*)str + i, len -i, valid_digits, 10 + (base-10)*2);
    DO_CASE(base);
  }
  if(endptr){*endptr = (str + i);}
  return (long)(negitive ? -num : num);
}