unsigned int binary_string_to_number(std::string const & input) { unsigned int output = 0; for(std::string::const_iterator i = input.begin(), end = input.end(); i != end; ++i) { char current_char = *i; if(is_binary_digit(current_char) == false) { break; } output = (output << 1) | (current_char - '0'); } return output; }
bool assembly_lexer(std::string const & file_name, unsigned int & line, std::string::const_iterator & begin, std::string::const_iterator const & end, lexeme & output) { std::string input; for(; begin != end; ++begin) { char current_char = *begin; char type = type_lookup_table[current_char]; switch(type) { case char_type_illegal: { lexer_exception(file_name, line, "Illegal character"); } case char_type_name: { std::string::const_iterator name_begin = begin; for(++begin; (begin != end) && (is_name_char(*begin) == true); ++begin); output = lexeme(lexeme_name, std::string(name_begin, begin), line); return true; } case char_type_digit: { std::string::const_iterator number_begin = begin; for(++begin; (begin != end) && (nil::string::is_digit(*begin) == true) ; ++begin); output = lexeme(lexeme_number, std::string(number_begin, begin), line); return true; } case char_type_zero: { std::string::const_iterator number_begin = begin; ++begin; if(begin != end) { char second_character = *begin; if( (second_character == 'x') || (second_character == 'X') ) { for(++begin; (begin != end) && (nil::string::is_digit(*begin) == true) ; ++begin); } else if(is_binary_digit(second_character) == true) { for(++begin; (begin != end) && (is_binary_digit(*begin) == true) ; ++begin); } } output = lexeme(lexeme_number, std::string(number_begin, begin), line); return true; } case char_type_string: { std::string string; for(++begin; begin != end;) { char current_char = *begin; switch(current_char) { case '"': { ++begin; output = lexeme(lexeme_string, string, line); return true; } case '\\': { try { parse_backslash(begin, end, string); } catch(std::exception & exception) { lexer_exception(file_name, line, exception.what()); } break; } case '\n': { lexer_exception(file_name, line, "Newline in string"); } default: { string += current_char; ++begin; break; } } } lexer_exception(file_name, line, "Incomplete string at the end of file"); } case char_type_operator: { ++begin; output = lexeme(lexeme_operator, std::string(1, current_char), line); return true; } case char_type_operator_extended: { ++begin; if(begin == end) { output = lexeme(lexeme_operator, std::string(1, current_char), line); return false; } else { char second_char = *begin; bool is_extended = false; switch(current_char) { case '&': { if(second_char == '&') { is_extended = true; } break; } case '|': { if(second_char == '|') { is_extended = true; } break; } case '=': { if(second_char == '=') { is_extended = true; } break; } case '!': { if(second_char == '=') { is_extended = true; } break; } case '<': { if(second_char == '=') { is_extended = true; } break; } case '>': { if(second_char == '=') { is_extended = true; } break; } } if(is_extended == true) { ++begin; output = lexeme(lexeme_operator, std::string(1, current_char) + second_char, line); } else { output = lexeme(lexeme_operator, std::string(1, current_char), line); } return true; } } case char_type_newline: { ++begin; ++line; output = lexeme(lexeme_newline, line); return true; } case char_type_comment: { for(++begin; begin != end; ++begin) { if(*begin == '\n') { ++begin; ++line; output = lexeme(lexeme_newline, line); return true; } } return false; } } } return false; }
void parse_backslash(std::string::const_iterator & begin, std::string::const_iterator const & end, std::string & string) { std::string::const_iterator offset = begin + 1; if(offset >= end) { throw std::runtime_error("Invalid escape sequence"); } bool custom_iterator = false; char next_character = *offset; switch(next_character) { case 'a': { string += '\a'; break; } case 'f': { string += '\f'; break; } case 'n': { string += '\n'; break; } case 'r': { string += '\r'; break; } case 't': { string += '\t'; break; } case 'v': { string += '\v'; break; } case 'b': { string += '\b'; break; } case '0': { unsigned int digit_counter = 8; for(++offset; offset < end; ++offset) { if(is_binary_digit(*offset) == false) { break; } --digit_counter; if(digit_counter == 0) { break; } } string += static_cast<char>(binary_string_to_number(std::string(begin + 1, offset))); begin = offset; custom_iterator = true; break; } case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { for(++offset; offset < end; ++offset) { if(nil::string::is_digit(*offset) == false) { break; } } string += static_cast<char>(binary_string_to_number(std::string(begin + 1, offset))); begin = offset; custom_iterator = true; break; } case 'x': case 'X': { unsigned int const max_hex_digits = 2; unsigned int digit_counter = max_hex_digits; for(++offset; offset < end; ++offset) { if(nil::string::is_hex_digit(*offset) == false) { break; } --digit_counter; if(digit_counter == 0) { break; } } if(digit_counter == max_hex_digits) { throw std::runtime_error("Empty hexadecimal number in escape sequence"); } string += nil::string::string_to_number<char>(std::string(begin + 2, offset), std::ios_base::hex); begin = offset; custom_iterator = true; break; } default: { throw std::runtime_error("Unknown escape sequence in regular expression"); break; } } if(custom_iterator == false) { begin += 2; } }
long parse_integer(char *str, char **endptr, int len, int base, int *err){ if(len == 0){ len = strlen(str); } //since signed overflow is undefined and I don't really //want to use gmp for this, use an unsigned long to hold the value uint64_t num = 0; int i = 0, negitive = 0, overflow = 0; //ignore leading space while(isspace(str[i]) && ++i < len); if(i < len){ if(str[i] == '-'){ negitive = 1; i++; } } if(i == len){ if(endptr){*endptr = str;} return 0; } #define TO_NUMBER(start, end, base) \ ({uint64_t number = 0, next = 0; \ int j; \ for(j = start; j < end; j++){ \ next = (number * base) + char_to_number(str[j]); \ if(next < number){/*overflow*/ \ overflow = j; \ break; \ } else { \ number = next; \ } \ } \ number;}) //I'm pretty sure this will cause an error if the string is something like //0x abcdefg. It should be read as a 0, but I'm not sure what will happen if(base == 0 && ((i+1) < len)){ if(str[i] == '0' && str[i+1] == 'x'){ base = 16; i+=2; } else if(str[i] == '0' && str[i+1] == 'o'){ base = 8; i+=2; } else if(str[i] == '0' && str[i+1] == 'b'){ base = 2; i+=2; } else { base = 10; } } while(str[i] == '0' && ++i < len);//read leading zeros if(i == len){ if(endptr){*endptr = (str+i);} return 0; } /* Use special cases for 2, 8, 10, and 16 to speed them up. Multiplies in base 2, 8, or 16 become shifts, and for x86 at least a multiply by 10 becomes two lea instructions. */ #define DO_CASE(base) \ if(i <= max_length_per_base[base]){ \ num = TO_NUMBER(start, i, base); \ } \ if(i > max_length_per_base[base] || overflow > 0 || num>LONG_MAX){ \ *err = errno = ERANGE; \ *endptr = (str+i); \ return (negitive ? LONG_MIN : LONG_MAX); \ } int start = i; if(base == 10){ while(isdigit(str[i]) && ++i < len); DO_CASE(base); } else if(base == 16){ while(isxdigit(str[i]) && ++i < len); DO_CASE(base); } else if(base == 8){ while(is_oct_digit(str[i]) && ++i < len); DO_CASE(base); } else if(base == 2){ while(is_binary_digit(str[i]) && ++i < len); DO_CASE(base); } else if(base < 10){ i = memspn((uint8_t*)str + i, len - i, valid_digits, base); DO_CASE(base); } else { i = memspn((uint8_t*)str + i, len -i, valid_digits, 10 + (base-10)*2); DO_CASE(base); } if(endptr){*endptr = (str + i);} return (long)(negitive ? -num : num); }