command* parser::parse_command() { try { return d_internal->parse_command(); } catch (const parser_exception& e) { if (!e.has_line_info()) { // Add line information throw parser_exception(e.get_message(), d_internal->get_filename(), d_internal->get_current_parser_line(), d_internal->get_current_parser_position()); } else { throw e; } } catch (const sally::exception& e) { throw parser_exception(e.get_message(), d_internal->get_filename(), d_internal->get_current_parser_line(), d_internal->get_current_parser_position()); } }
[[ noreturn ]] void scanner::throw_exception(char const * msg) { unsigned line = m_sline; unsigned pos = m_upos; while (curr() != EOF && !std::isspace(curr())) next(); throw parser_exception(msg, m_stream_name.c_str(), line, pos); }
cons_t* parse_exact_real(const char* sc, int radix) { if ( radix != 10 ) raise(parser_exception( "Only reals with decimal radix are supported")); /* * Since the real is already in string form, we can simply turn it into a * rational number. */ char *s = strdup(sc); char *d = strchr(s, '.'); *d = '\0'; const char* left = s; const char* right = d+1; int decimals = strlen(right); /* * NOTE: If we overflow here, we're in big trouble. * TODO: Throw an error if we overflow. Or just implement bignums. */ rational_t r; r.numerator = to_i(left, radix)*pow10(decimals) + to_i(right, radix); r.denominator = pow10(decimals); free(s); return rational(r, true); }
real_t to_f(const char* s, int radix) { // TODO: Add support for non-decimal radix if ( radix != 10 ) raise(parser_exception("Only radix 10 reals are supported")); return atof(s); }
/* * Convert hexadecimal character to a number. */ static uint8_t hexval(int c) { if ( c>='0' && c<='9' ) return c - '0'; c = tolower(c); if ( c>='a' && c<='f' ) return 10 + c - 'a'; if ( isprint(c) ) raise(parser_exception(format("Not a hexadecimal digit: '%c'", static_cast<char>(c)))); else raise(parser_exception(format("Not a hexadecimal digit: \\x%x", c))); return 0; // to please compiler }
/** \brief Parse an expression. */ expr parser_imp::parse_expr_main() { try { auto p = elaborate(parse_expr()); check_no_metavar(p, "invalid expression, it still contains metavariables after elaboration"); return p.first; } catch (parser_error & ex) { throw parser_exception(ex.what(), m_strm_name.c_str(), ex.m_pos.first, ex.m_pos.second); } catch (exception & ex) { throw parser_nested_exception(std::shared_ptr<exception>(ex.clone()), std::shared_ptr<pos_info_provider>(new lean_pos_info_provider(m_this.lock(), m_last_cmd_pos))); } }
static std::string decode_literal_string(const char* s) { std::string r; const size_t skip_quote = 1; const char *end = s+strlen(s) - skip_quote; for ( const char *p = s+skip_quote; p < end; ++p ) { // add normal characters to output if ( *p != '\\' ) { r += *p; continue; } // advance to next char if ( !(p++ < end) ) { // note: technically, this is not supported because of tests // done on the string before decode_literal_string raise(parser_exception("String did not end with double-quote")); break; } switch ( *p ) { // known escape characters case 'a': r += '\a'; break; case 'b': r += '\b'; break; case 't': r += '\t'; break; case 'n': r += '\n'; break; case 'r': r += '\r'; break; case '"': r += '"'; break; case '\\': r += '\\'; break; case '|': r += '|'; break; case 'x': { // format "\x<hex>;" const char * start = p; // skip 'x' if ( !(p++ < end) ) raise(parser_exception("Incomplete \\x<hex>; sequence in string")); // decode "\x<hex>;" formatted character uint16_t value = 0, pos = 0; char c = '\0'; // atoi, the right way while ( (c = getnext(&p, end)) != '\0' && c != ';' ) { ++pos; uint16_t prev = value; value = value*16 + hexval(c); // detect overflow if ( value < prev ) { while ( *p!=0 && *p!=';' ) ++pos, ++p; raise(parser_exception(format( "Unicode character does not fit into unsigned %u-bit storage: \\%.*s", sizeof(value)*8, pos+2, start))); // +2 == the two chars \x } } if ( c != ';' ) { raise(parser_exception(format( "Character escape sequence not semi-colon terminated: \\%.*s", pos+1, start))); } if ( value > MAX_UNICODE_CHAR ) { raise(parser_exception(format( "Unicode character not supported on this platform: U+%.*X", sizeof(value)*8/4, value))); } r += static_cast<char>(value); --p; // back up one position for the loop increment } break; default: // unknown escape character; just add a default value (ref. R7RS // chapter 6.7) r += *p; break; } } return r; }
void actual_assembly_parser(std::string const & file_name, std::string const & path, std::vector<std::string> const & include_directories) { std::string input; bool success = nil::read_file(file_name, input); if(success == false) { throw std::runtime_error("Failed to open file \"" + file_name + "\""); } std::string::iterator begin = input.begin(), end = input.end(); typedef std::map< std::string, std::vector<lexeme> > macro_map; macro_map macros; std::vector<lexeme> lexemes; unsigned int line = 1; while(true) { lexeme current_lexeme; bool not_done_yet = assembly_lexer(file_name, line, begin, end, current_lexeme); if(not_done_yet == true) { if(current_lexeme.type == lexeme_name) { std::vector<lexeme> line_lexemes; std::string const & name = current_lexeme.data; if(name == "include") { read_until_eol(file_name, line, begin, end, line_lexemes); if(line_lexemes.size() != 1) { parser_exception(file_name, line, "invalid argument count in include statement"); } else if(line_lexemes[0].type != lexeme_string) { parser_exception(file_name, line, "invalid argument type in include statement"); } std::string include_input; std::string const & include_file_name = line_lexemes[0].data; bool success = include_file(include_file_name, path, include_directories, include_input); if(success == false) { parser_exception(file_name, line, "failed to include file \"" + include_file_name + "\""); } input = include_input + std::string(begin, end); begin = input.begin(); end = input.end(); } else if(name == "define") { bool not_done_yet = read_until_eol(file_name, line, begin, end, line_lexemes); if(not_done_yet == false) { return; } else if(line_lexemes.size() < 1) { parser_exception(file_name, line, "invalid argument count in define statement"); } else if(line_lexemes[0].type != lexeme_name) { parser_exception(file_name, line, "expected a name in define statement"); } std::string define_name = line_lexemes[0].data; line_lexemes.erase(line_lexemes.begin()); for(std::vector<lexeme>::iterator i = line_lexemes.begin(), end = line_lexemes.end(); i != end; ++i) { if(i->type == lexeme_name) { macro_map::iterator macros_end = macros.end(), search = macros.find(i->data); if(search != macros_end) { std::size_t backup = i - line_lexemes.begin(); std::vector<lexeme> & macro_lexemes = search->second; line_lexemes.insert(i + 1, macro_lexemes.begin(), macro_lexemes.end()); line_lexemes.erase(line_lexemes.begin() + backup); i = line_lexemes.begin() + backup + macro_lexemes.size(); end = line_lexemes.end(); } } } macros[define_name] = line_lexemes; } else { macro_map::iterator macros_end = macros.end(), search = macros.find(name); if(search == macros_end) { lexemes.push_back(current_lexeme); } else { std::vector<lexeme> & macro_lexemes = search->second; lexemes.insert(lexemes.end(), macro_lexemes.begin(), macro_lexemes.end()); } } } else { lexemes.push_back(current_lexeme); } } else { break; } } unsigned int code_boundary = 0, data_boundary = 0; for(std::vector<lexeme>::iterator i = lexemes.begin(), end = lexemes.end(); i != end; ++i) { switch(i->type) { case lexeme_name: { std::vector<lexeme>::iterator line_begin = i; for( std::vector<lexeme>::iterator i = lexemes.begin(), end = lexemes.end(); (i != end) && (i->type != lexeme_newline); ++i ); std::size_t line_length = i - line_begin; std::string const & name = line_begin->data; if(name == "align") { if(line_length != 3) { parser_exception(file_name, line_length->line, "Illegal argument count for align statement"); } } ++i; break; } case lexeme_number: case lexeme_string: case lexeme_operator: { parser_exception(file_name, i->line, "Illegal lexeme type at the beginning of a line"); } } } }
const char* get_token() { // mutatable return buffer static char token[256]; for ( ;; ) { token[0] = token[1] = '\0'; source = skip_space(source); // comment? skip to end of line if ( *source == ';' ) { while ( *source != '\n' ) { ++source; if ( *source == '\0' ) return NULL; } continue; } // hash-bang or similar? skip to end of line // TODO: Properly handle reader directives like case-folding, etc. if ( source[0]=='#' && source[1]=='!' ) { // skip to end of line const char *start = source; while ( *source != '\n' ) ++source; if ( !strncmp("#!fold-case", start, source - start) ) fold_case_flag = true; else if ( !strncmp("#!no-fold-case", start, source - start) ) fold_case_flag = false; continue; } // block-comments? if ( source[0]=='#' && source[1]=='|' ) { // match nested pairs source += 2; for ( int n=1; n && *source; ++source ) { if ( source[0]=='#' && source[1]=='|' ) { ++source; ++n; } else if ( source[0]=='|' && source[1]=='#' ) { ++source; --n; } } continue; } // vector form "#( ... )" if ( source[0]=='#' && source[1]=='(' ) { strcpy(token, "#("); source += 2; return token; } // bytevector form "#u8( ... )" if ( source[0]=='#' && source[1]=='u' && source[2]=='8' && source[3]=='(' ) { strcpy(token, "#u8("); source += 4; return token; } // ignore-next-datum form "#;" if ( source[0]=='#' && source[1]==';' ) { strcpy(token, "#;"); source += 2; return token; } if ( char_in(*source, "()'") ) // tokens ( and ) token[0] = *source++; else { // long-form-symbol w/format "|foo bar baz|" if ( source[0]=='|' ) { const size_t lineno = line; token[0]='|'; source = copy_while(token+1, source+1, sizeof(token)-2, not_pipe); if ( *source == '|' ) ++source; else raise(parser_exception(format( "Invalid |long symbol| on line %lu\n", lineno))); const size_t l = strlen(token); token[l] = '|'; token[l+1] = '\0'; } else // other tokens source = copy_while(token, source, sizeof(token)-1, string_or_non_delimiter); } // emit NULL when finished return !empty(token) ? token : NULL; } }