char *TKGetToken(TokenizerT *tk) { char* token = NULL; char * token_start = NULL; while(tk->current_position - tk->copied_string < strlen(tk->copied_string)) { if(!is_delimiter(*tk->current_position, tk->delimiters)) { token_start = tk->current_position; break; } tk->current_position++; } if(token_start == NULL) { return NULL; } while(tk->current_position - tk->copied_string < strlen(tk->copied_string)) { if(is_delimiter(*tk->current_position, tk->delimiters)) { break; } tk->current_position++; } token = (char*)malloc(sizeof(char) * (tk->current_position - token_start + 1)); strncpy(token, token_start, tk->current_position - token_start); token[ (tk->current_position - token_start)] = '\0'; return token; }
unsigned sc_string_old::fmt_length()const { unsigned result=0; if((*this)[0]!='%') return 0; else result++; if(is_delimiter("-+0 #",result)) // flags result++; while(is_delimiter("0123456789*",result)) // width result++; if(rep->str[result]=='.') // precision { result++; unsigned old_result = result; while(is_delimiter("0123456789*",result)) result++; if(old_result == result) //error in format return 0; } if(is_delimiter("hlL",result)) result++; // I64 is not supported if(is_delimiter("cCdiouxXeEfgGnpsS",result)) result++; else // error in format return 0; return result; }
uint32_t ofputil_versions_from_string(const char *s) { size_t i = 0; uint32_t bitmap = 0; while (s[i]) { size_t j; int version; char *key; if (is_delimiter(s[i])) { i++; continue; } j = 0; while (s[i + j] && !is_delimiter(s[i + j])) { j++; } key = xmemdup0(s + i, j); version = ofputil_version_from_string(key); if (!version) { VLOG_FATAL("Unknown OpenFlow version: \"%s\"", key); } free(key); bitmap |= 1u << version; i += j; } return bitmap; }
END_TEST START_TEST (test_delim) { ck_assert (is_delimiter (')') == true); ck_assert (is_delimiter ('f') == false); }
static void do_state_dot(char **pscan) { if (is_digit(**pscan)){ //deal with real, this for extend ; } else if (**pscan == '.'){ ++*pscan; if (**pscan != '.'){ printf("Error: illegally uses '.' -- READ\n"); do_input_error(pscan); } else{ ++*pscan; if (is_delimiter(**pscan) || **pscan == '\0'){ reg = make_symbol("..."); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); } else{ printf("Error: illegally uses '.' -- READ\n"); do_input_error(pscan); } } } else if (is_delimiter(**pscan) && stack_top(&state_stack) == STATE_LIST){ stack_push(&parser_stack, make_symbol(".")); current_state = stack_pop(&state_stack); } else{ printf("Error: illegally uses '.' -- READ\n"); do_input_error(pscan); } }
static void parse_char(char **pscan) { char rbuf[50]; char *rbufp = NULL; if (**pscan == '\0'){ reg = make_char('\n'); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); return; } rbufp = rbuf; *rbufp++ = **pscan; ++*pscan; if (is_delimiter(**pscan) || **pscan == '\0'){ reg = make_char(*rbuf); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); }else { while (!is_delimiter(**pscan) && **pscan != '\0'){ *rbufp++ = **pscan; ++*pscan; } *rbufp = '\0'; if (strcmp(rbuf, "space") == 0){ reg = make_char(' '); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); }else if (strcmp(rbuf, "newline") == 0){ reg = make_char('\n'); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); }else { printf("Error: Bad character constant #\\%s -- READ", rbuf); do_input_error(pscan); } } }
char *TKGetNextToken(TokenizerT *tk) { /* * Description: returns the next token from the token stream specified within the tokenizer * Parameters: tokenizer from which to extract token * Modifies: tokenizer->current_position: identifies starting point of next token; creates a new string with * Returns: token extracted as a char* on success, null on failure/end of string; */ char* token = NULL; char* token_start = NULL; ///printf("token getting split: %s\n", tk->copied_string); while(tk->current_position - tk->copied_string < strlen(tk->copied_string)) { if(!is_delimiter(*tk->current_position)) { token_start = tk->current_position; break; } tk->current_position++; } if(token_start == NULL) { return NULL; } while(tk->current_position - tk->copied_string < strlen(tk->copied_string)) { if(is_delimiter(*tk->current_position )) { break; } tk->current_position++; } //token = (char*)malloc(sizeof(char) * (tk->current_position - tk->copied_string + 1)); token = (char*) calloc ((tk->current_position-tk->copied_string+1),sizeof(char)); strncpy(token, token_start, tk->current_position - token_start); token[(tk->current_position - tk->copied_string)] = '\0'; //if (token[strlen(token)-1] == '\0') { if (token[tk->current_position - tk->copied_string] == '\0') { //printf("null terminated @ index = %lu but the strlen-1 = %zu \n\n", tk->current_position - tk->copied_string, strlen(token)-1); } else { //printf("uh oh. for the string = %s last is =%c\n\n", token, token[tk->current_position - tk->copied_string]); //printf("token[strlen-1] = %zu", strlen(token)-1); //printf("tk->curr - tk->copied = %lu", tk->current_position - tk->copied_string); //token[strlen(token)-1]; } return token; }
static void do_state_sharp(char *ibuf, char **pscan) { if (**pscan == 'd'){ //only deal with decimal current_state = STATE_NUM; ++*pscan; } else if (**pscan == '\\'){ current_state = STATE_CHAR; ++*pscan; } else if (**pscan == '('){ parse_vector(ibuf, pscan); } else if (**pscan == 't' || **pscan == 'f'){ boolean b = (**pscan == 't') ? true : false; ++*pscan; if (is_delimiter(**pscan) || **pscan == '\0'){ stack_push(&parser_stack, make_boolean(b)); current_state = stack_pop(&state_stack); }else { printf("Error: illegally uses '#' -- READ\n"); do_input_error(pscan); } } else{ printf("Error: illegally uses '#' -- READ\n"); do_input_error(pscan); } }
static void parse_num(char **pscan) { char rbuf[50]; char *rbufp = rbuf; *rbufp++ = **pscan; ++*pscan; while (!is_delimiter(**pscan) && **pscan != '\0'){ *rbufp++ = **pscan; if (!is_digit(**pscan)){ printf("Error: Bad number constant %s -- READ\n", rbuf); do_input_error(pscan); } ++*pscan; } *rbufp = '\0'; reg = make_integer(atoi(rbuf)); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); }
void str_tokenize(const std::string &text, std::vector<std::pair<int, int> > &tokens) { int i = 0; int size = text.size(); int lastStart = -1; while (i < size) { char c = text[i]; bool isDelimiter = is_delimiter(c); if (!isDelimiter) { if (lastStart == -1) { lastStart = i; } } else { if (lastStart != -1) { tokens.push_back(std::make_pair(lastStart, i - 1)); lastStart = -1; } } i++; } if (lastStart != -1) { tokens.push_back(std::make_pair(lastStart, size - 1)); } }
static void parse_sym(char **pscan) { char rbuf[50]; char *rbufp = rbuf; *rbufp++ = **pscan; ++*pscan; while (!is_delimiter(**pscan) && **pscan != '\0'){ *rbufp++ = **pscan; if (!is_subsequent(**pscan)){ printf("Error: %s is a illegal symbol. -- READ\n", rbuf); do_input_error(pscan); } ++*pscan; } *rbufp = '\0'; reg = make_symbol(rbuf); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); }
static void expect_delimiter(pSlip gd) { if (!is_delimiter(peek_input(gd))) { throw_error(gd, "character not followed by delimiter\n"); } }
/* this doesn't read the peculiar identifiers, they are scanned in the * main reader body */ static object read_identifier(FILE *in) { object o; int str_len = 0; int str_size = STRING_MIN_BUFFER; char *buffer; int c; buffer = xmalloc(str_size); c = tolower(fgetc(in)); assert(is_initial(c)); buffer[str_len++] = c; while (1) { c = fgetc(in); if (is_delimiter(c)) { ungetc(c, in); break; } if (!is_subsequent(c)) error("Symbol has bad name -- read", nil); /* we're a lower case scheme */ buffer[str_len++] = tolower(c); } o = make_symbol(buffer, str_len); xfree(buffer); return o; }
Cell read_pair(FILE *in) { int c; Cell car_obj; Cell cdr_obj; skip_space(in); c = getc(in); if (c == ')') { return null; } ungetc(c, in); car_obj = read(in); skip_space(in); c = getc(in); if (c == '.') { c = peek(in); if (!is_delimiter(c)) { fprintf(stderr, "dot not followed by delimiter\n"); exit(1); } cdr_obj = read(in); skip_space(in); c = getc(in); if (c != ')') { fprintf(stderr, "missing right paren\n"); exit(1); } return cons(car_obj, cdr_obj); } else { /* read list */ ungetc(c, in); cdr_obj = read_pair(in); return cons(car_obj, cdr_obj); } }
static void peek_expected_delimiter(FILE* in) { if(!is_delimiter(peek(in))) { fprintf(stderr, "expected a delimiter\n"); exit(1); } }
virtual bool take_some(const_pointer& aFirst, const_pointer aLast) { if (aFirst == aLast) return false; while (aFirst != aLast && is_delimiter(*aFirst)) ++aFirst; const_pointer start = aFirst; while (aFirst != aLast && !is_delimiter(*aFirst)) ++aFirst; const_pointer end = aFirst; if (has_max_length() && length() + (end - start) > max_length()) throw typename base_type::packet_too_big(); iContents.insert(iContents.end(), start, end); while (aFirst != aLast && !is_terminating_delimiter(*aFirst)) ++aFirst; if (aFirst != aLast) ++aFirst; return end != aLast; }
static int first_pass(FILE * input) { int pc = 0; /* program counter for symbol values */ char *line; /* whole input line */ char **tokens; /* ARGV-style split of line */ sourcelinenumber = 0; nerrors = 0; while ((line = getLine(input)) != NULL) { tokens = lexer(line); /* convert line to tokens */ sourcelinenumber++; /* update line number for errors */ /* * blank line -- ignore it */ if (tokens[0] == NULL) continue; /* * delimiter character as first token?! */ if (is_delimiter(tokens[0])) { fprintf(stderr, "%s:%d: bad delimiter character '%s' at begining of line\n", sourcefilename, sourcelinenumber, tokens[0]); nerrors++; continue; } /* * second token is ':' delimiter -- assume the first token * is a label and add it in with the value of the current PC. */ if (tokens[1] == colon) { if (!symbol_insert(tokens[0], pc)) { fprintf(stderr, "%s:%d: duplicate symbol '%s'\n", sourcefilename, sourcelinenumber, tokens[0]); nerrors++; } tokens++; /* advance ptr to skip the label */ tokens++; /* advance ptr to skip the delimiter */ } /* * if the line (also) contains an opcode, bump the PC */ if (tokens[0] != NULL) pc++; } fprintf(stderr, "%d errors on the first pass\n", nerrors); return (nerrors == 0); /* 1 on success, 0 on failure */ }
pointer parse_symbol(parser* parse) { const char* C = parse->curr; while(!is_delimiter(*parse->curr)) { if(is_symbol_char(*parse->curr)) parse->curr++; else return parser_error(parse, "Unexpected char '%c' in symbol.", *parse->curr); } return create_symbol(parse->symbols, C, parse->curr-C); }
char *TKGetNextToken(TokenizerT *tk) { /* * Description: returns the next token from the token stream specified within the tokenizer * Parameters: tokenizer from which to extract token * Modifies: tokenizer->current_position: identifies starting point of next token; creates a new string with * Returns: token extracted as a char* on success, null on failure/end of string; */ char* token = NULL; char* token_start = NULL; while(tk->current_position - tk->copied_string < strlen(tk->copied_string)) { if(!is_delimiter(*tk->current_position, tk->delimiters)) { token_start = tk->current_position; break; } tk->current_position++; } if(token_start == NULL) { return NULL; } while(tk->current_position - tk->copied_string < strlen(tk->copied_string)) { if(is_delimiter(*tk->current_position, tk->delimiters)) { break; } tk->current_position++; } token = (char*)malloc(sizeof(char) * (tk->current_position - token_start + 1)); strncpy(token, token_start, tk->current_position - token_start); token[(tk->current_position - token_start)] = '\0'; return token; }
static scheme_object* read_pair(vm* context, FILE* in) { int c; scheme_object* car = NULL; scheme_object* cdr = NULL; eat_whitespace(in); c = getc(in); if(c ==')') { return the_empty_list; } gc_push_root((void**) &car); gc_push_root((void**) &cdr); ungetc(c, in); car = read(context, in); eat_whitespace(in); c = getc(in); if(c == '.') { if(!is_delimiter(peek(in))) { fprintf(stderr, "Expected a delimiter after '.'...\n"); exit(1); } cdr = read(context, in); eat_whitespace(in); c = getc(in); if(c != ')') { fprintf(stderr, "Expected a ')'\n"); exit(1); } } else { ungetc(c, in); cdr = read_pair(context, in); } gc_pop_root(); gc_pop_root(); return cons(context, car, cdr); }
static void do_state_sub(char **pscan) { if (is_delimiter(**pscan) || **pscan == '\0'){ reg = make_symbol("-"); current_state = stack_pop(&state_stack); if (current_state == STATE_QUOTE){ reg = cons(reg, NIL); reg = cons(make_symbol("quote"), reg); current_state = stack_pop(&state_stack); } stack_push(&parser_stack, reg); }else { printf("Error: illegally uses '-' -- READ\n"); do_input_error(pscan); } }
/*Used to determine what type is coming next in the stream *Inputs: * in - the stream to read from * c - the first non-whitespace character to look at*/ object_type next_type(FILE *in, char c){ int next_char; if(c == EOF){ printf("Got EOF, exiting\n"); exit(0); } if(c == '#'){ /*a boolean or character*/ next_char = peek(in); switch(next_char){ case 't': case 'f': return BOOLEAN; case '\\': return CHARACTER; default: fprintf(stderr, "Unknown boolean or character literal\n"); exit(1); } }else if (isdigit(c) || (c == '-' && isdigit(peek(in)))){ ungetc(c, in); return FIXNUM; }else if(c == '('){ if(peek(in) == ')'){ return EMPTY_LIST; }else{ return PAIR; } }else if(c == '"'){ return STRING; /*if this is the start of a symbol*/ }else if(is_symbol_start(c) || /*or +/- by themselves*/ ((c == '+' || c == '-') && is_delimiter(peek(in)))){ //put this first character back ungetc(c, in); return SYMBOL; }else{ fprintf(stderr, "Bad input. Unexpected '%c'\n", c); exit(1); } fprintf(stderr, "No object type, no error... wtf?\n"); exit(1); }
static scheme_object* read_symbol(vm* context, FILE* in) { string_t* str = string_create(); int c = getc(in); while(is_initial(c) || isdigit(c) || c == '+' || c == '-') { string_append_char(str, c); c = getc(in); } if(!is_delimiter(c)) { fprintf(stderr, "symbol not folowed by delimiter\n"); exit(1); } ungetc(c, in); scheme_object* rval = make_symbol(context, string_cstring(str)); string_free(str); return rval; }
Cell read(FILE *in) { int c; int i; Cell a; skip_space(in); c = getc(in); if (c == '(') { return read_pair(in); } else if (c == '\'') { return cons(atom("quote"), cons(read(in), null)); } else if (c == EOF) { return atom("#<void>"); } else if (c == '"') { c = getc(in); a = atom(""); i = 0; while (c != '"') { if (c == '\\') { c = getc(in); c = c == 'n' ? '\n' : c; } if (i < 15) { a->atom[i++] = c; } c = getc(in); } a->atom[i] = '\0'; return cons(atom("quote"), cons(a, null)); } else { a = atom(""); i = 0; while (!is_delimiter(c)) { if (i < 15) { a->atom[i++] = c; } c = getc(in); } a->atom[i] = '\0'; ungetc(c, in); return a; } }
float eval(char *expr) { STACK solution,temp; init_stack(&solution);init_stack(&temp); float value,operand1,operand2; while(*expr) { if(!is_permitted(*expr)){ fprintf(stderr,"error: invalid expression\n"); exit(1); } if(is_digit(*expr)) if(*expr == DECIMAL_PT) push(&temp, (float) DECIMAL_PT); else push(&temp, (float) (*expr - '0')); else if(is_delimiter(*expr)){ if(!is_empty(&temp)) push(&solution,process_data(&temp)); else { expr++;continue; } } else { operand2 = pop(&solution); operand1 = pop(&solution); value = operation(operand1,operand2,*expr); push(&solution,value); } expr++; } if(!is_empty(&temp)) push(&solution,process_data(&temp)); if(get_top(&solution)>0){ printf("error: invalid postfix expression\n"); exit(1); } return pop(&solution); }
char getsymbol (FILE* file, char* s) { register char c; int comm = 0; while ((c = getc (file)) != EOF) { if (is_comment(c)) { comm = 1; continue; } if (comm == 1) { if(c == '\n') comm = 0; continue; } if (is_white_space(c)) continue; if (is_delimiter(c)) break; *s++ = c; } *s = '\0'; return(c); }
object *read_fixnum(FILE *in){ int c = getc(in); long num = 0; short sign = 1; if(c == '-'){ sign = -1; }else{ ungetc(c, in); } while(isdigit(c = getc(in))){ num = (num * 10) + (c - '0'); } num *= sign; if(is_delimiter(c)){ ungetc(c, in); return make_fixnum(num); }else{ fprintf(stderr, "number not followed by a delimiter"); exit(1); } }
bool spaced(term right) { //literal if(is_literal()) return true; //open if(is_open()) return false; //close and right close if(is_close()&&right.is_close()) return false; //close if(is_close()) return true; //delimiter if(is_delimiter()) return false; //right open if(right.is_open()) return true; //right delimiter if(right.is_delimiter()) return false; return true; }
pointer parse_number(parser* parse) { size_t len = 0; bool isFloat = false; bool isHex = false; bool isBinary = false; const char* Start = parse->curr; while(!is_delimiter(*parse->curr)) { if(*parse->curr == '.') isFloat = true; if(is_number_char(*parse->curr) || (isHex && is_extended_hex_char(*parse->curr))) len++; else if(len == 1 && *parse->curr == 'x' && *Start == '0') { len++; isHex = true; } else if(len == 0 && *parse->curr == 'b') isBinary = true; else return parser_error(parse, "Unexpected char '%c' in number literal.", *parse->curr); parse->curr++; } { int TotalIs = isHex + isBinary + isFloat; if(TotalIs > true) { char* buffer = new char[len+1]; strncpy(buffer, Start, len); buffer[len] = '\0'; parser_error(parse, "Unexpected number literal: %s.", buffer); delete buffer; return NIL; } } if(isFloat) { char* buffer = new char[len+1]; strncpy(buffer, Start, len); buffer[len] = '\0'; float ret = atof(buffer); delete buffer; return create_real(ret); } else { // Might be smart to use a buffer here, in case strtol doesn't see all delimiters as we do. int ret; if(isHex) ret = strtol(Start + 2, NULL, 16); else if(isBinary) ret = strtol(Start + 1, NULL, 2); else ret = strtol(Start, NULL, 10); return create_int(ret); } }
int get_token() { typedef enum { INIT, OPERATOR, SLASH, STRING, STRING_BACKSLASH, STRING_HEXA, STRING_BINARY, STRING_OCTA, NUMBER, FLOAT, FLOAT_EXP, ID, ID_KEYWORD, LINE_COMMENT, BLOCK_COMMENT, BLOCK_COMMENT_END, BASE_EXT, BINARY, OCTA, HEXA } Tstate; Tstate state = INIT; int c; int j = 0; int ret_val = 0; int escape_seq = 0; //char *check; strClear(buffer); token.type = TT_ERR; while ((c = fgetc(in))) { if (c == '\n') { row++; col = 0; } else col++; if (c == EOF) { token.type = TT_EOF; return EOF; } #ifdef SCANNER_DEBUG fprintf(stderr, "%s (%s)", fsm_states[state], strGetStr(buffer)); if (strFirst(buffer) == '\0') fprintf(stderr, "\n"); else fprintf(stderr, " -> "); #endif // DEBUG switch(state) { case INIT: if (c == '/') // comment or operator { state = SLASH; strAddChar(buffer, c); } else if (is_operator(c)) { state = OPERATOR; strAddChar(buffer, c); } else if (c == '"') // string literal { state = STRING; } else if (c == '\\') // x, b, 0 literals supported - BASE { state = BASE_EXT; } else if (isdigit(c)) // number -> integer or double literal { state = NUMBER; strAddChar(buffer, c); } else if (c == '_') // id { state = ID; strAddChar(buffer, c); } else if (isalpha(c)) // alphabetic char -> id or keyword { state = ID_KEYWORD; strAddChar(buffer, c); } else if ((ret_val = is_delimiter(c))) { token.type = TYPE_DELIMITER + ret_val - 1; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else if (!isspace(c)) // non valid character { lex_error("Unknown character: '%c'.\n", c); } break; case BASE_EXT: if (c == 'b') { state = BINARY; } else if (c == '0') { state = OCTA; } else if (c == 'x') { state = HEXA; } else lex_error("Unknown character in literal '\\%c'.\n", c); break; case HEXA: if (isxdigit(c)) { if (j < 8) // 8 hexadecimal digits are max int value { literal[j] = c; j++; } else lex_error("Hexadecimal literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 16); // cannot fail if (token.value_int < 0) lex_warning("Hexadecimal literal '\\x%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case OCTA: if (c >= '0' && c <= '7') { if (j < 12) // max int = \0 7777 7777 7777 { literal[j] = c; j++; } else lex_error("Octal literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 8); if (token.value_int < 0) lex_warning("Octal literal '\\0%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case BINARY: if ((c == '0' || c == '1')) { if (j < 32) { literal[j] = c; j++; } else lex_error("Binary literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 2); if (token.value_int < 0) lex_warning("Binary literal '\\b%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case ID_KEYWORD: if (isalpha(c)) // add another char into buffer { strAddChar(buffer, c); } else if (c == '_' || isdigit(c)) // id - these chars are not in any keyword { state = ID; strAddChar(buffer, c); } else // end of id or keyword { ungetc(c, in); // return last read char to buffer ret_val = is_keyword(strGetStr(buffer)); if (ret_val) { token.type = TYPE_KEYWORD + ret_val - 1; // magic #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else { token.type = TT_ID; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } } break; case ID: if (isalnum(c) || c == '_') { strAddChar(buffer, c); } else { ungetc(c, in); token.type = TT_ID; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case SLASH: if (c == '/') { state = LINE_COMMENT; } else if (c == '*') { state = BLOCK_COMMENT; } else // it was division { ungetc(c, in); token.type = TT_DIVIDE; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case OPERATOR: // not precisely "normal" fsm, but easily extensible (just add operator to operators[] and Ttoken_type) if (is_operator(c)) // c is one of valid chars, that can be in operator { strAddChar(buffer, c); ret_val = determine_operator(strGetStr(buffer)); // check if we still have valid operator in buffer if (!ret_val) // if it's not valid operator { ungetc(c, in); // return last char, it was not part of operator strDelChar(buffer); // delete wrong char from buffer ret_val = determine_operator(strGetStr(buffer)); // determine which operator we have token.type = TYPE_OPERATOR + ret_val - 1; // return token #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } // continue with loading chars if it's valid } else // another char is not operator -> end { ungetc(c, in); ret_val = determine_operator(strGetStr(buffer)); if (ret_val) { token.type = TYPE_OPERATOR + ret_val - 1; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else // shouldn't occur, just to be sure.. { lex_error("Unknown operator: '%s'.\n", strGetStr(buffer)); } } break; case LINE_COMMENT: if (c == '\n') // end of line comment { state = INIT; strClear(buffer); } break; case BLOCK_COMMENT: if (c == '*') // possible end of comment state = BLOCK_COMMENT_END; break; case BLOCK_COMMENT_END: if (c == '/') // comment ended { state = INIT; strClear(buffer); } else // false alarm - comment continues state = BLOCK_COMMENT; break; case NUMBER: if (isdigit(c)) { strAddChar(buffer, c); } else if (c == '.') { strAddChar(buffer, c); state = FLOAT; } else if (tolower(c) == 'e') { strAddChar(buffer, c); state = FLOAT_EXP; } else { ungetc(c, in); token.type = TT_VALUE_INT; token.value_int = (int) strtol(strGetStr(buffer), NULL, 10); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case FLOAT: // aspoň jedna číslice! if (isdigit(c)) { strAddChar(buffer, c); } else if (tolower(c) == 'e') { strAddChar(buffer, c); state = FLOAT_EXP; } else { ungetc(c, in); token.type = TT_VALUE_DOUBLE; token.value_double = strtod(strGetStr(buffer), NULL); //&check); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case FLOAT_EXP: if (isdigit(c)) { strAddChar(buffer, c); } else if (tolower(strLast(buffer)) == 'e' && (c == '+' || c == '-')) // optional +/- after e/E { strAddChar(buffer, c); } else { ungetc(c, in); token.type = TT_VALUE_DOUBLE; token.value_double = strtod(strGetStr(buffer), NULL); //&check); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case STRING: if (c == '"') // end of string literal { token.type = TT_VALUE_STRING; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else if (c == '\\') // string literal continues on another line or character constant state = STRING_BACKSLASH; else if (c != '\n') { strAddChar(buffer, c); } else { lex_error("String literal not closed.\n"); } break; case STRING_BACKSLASH: state = STRING; if (c == '\\') { strAddChar(buffer, '\\'); } else if (c == 'n') { strAddChar(buffer, '\n'); } else if (c == 't') { strAddChar(buffer, '\t'); } else if (c == '"') { strAddChar(buffer, '"'); } else if (c == 'x') { state = STRING_HEXA; } else if (c == 'b') { state = STRING_BINARY; } else if (c == '0') { state = STRING_OCTA; } else if (c == '\n') { // do nothing, string continues on next line - TODO: zdokumentovat upravu } else { lex_error("Escape sequence '\\%c' unknown.\n", c); } break; case STRING_HEXA: if (j < 2 && isxdigit(c)) // 2 is max hexadecimal escape length { literal[j] = c; j++; } else if (j == 0) // no valid hexadecimal digit after \x -> error { lex_error("'\\x%c' is not valid hexadecimal escape sequence.\n", c); } else // end of hexadecimal escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 16); // will always be successful if (escape_seq == 0) { lex_error("\\x00 is not allowed hexadecimal escape sequence.\n"); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; case STRING_BINARY: if (j < 8 && (c == '0' || c == '1')) // 8 is max binary escape length { literal[j] = c; j++; } else if (j == 0) // no valid binary digit after \b -> error { lex_error("'\\b%c' is not valid binary escape sequence.\n", c); } else // end of binary escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 2); // will always be successful if (escape_seq == 0) { lex_error("\\b00000000 is not allowed binary escape sequence.\n"); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; case STRING_OCTA: if (j < 3 && c >= '0' && c <= '7') // 3 is max octal escape length { literal[j] = c; j++; } else if (j == 0) // no valid octal digit after \0 -> error { lex_error("'\\0%c' is not valid octal escape sequence.\n", c); } else // end of octal escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 8); // will always be successful if (escape_seq == 0) { lex_error("\\000 is not allowed octal escape sequence.\n"); } else if (escape_seq > 255) { lex_error("Octal escape '\\0%s' bigger than 255.\n", literal); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; default: lex_error("Scanner panic!!!\n"); break; } // end_switch } // end_while return 0; }