void GlobTableInit(tGlobSymbolTable *T,Tridic *ridic)/*inicializace globalni tabilky*/ { string pom; sGlobTableItem *novy; Rfirst=NULL;/* zasobnik ramcu je prazdny*/ T->first = NULL;/*globalni tabulka je prazdna*/ ridic->aktiv= NULL;/*ukazatel na aktivni uzel globalni tabulky*/ ridic->pomlog = 0;/*pomocna promenna*/ ridic->pocet_argumentu=0;/*pocet argumentu fce je nula*/ ridic->deklaration=0;/*pomocna promenna pro definici funkci*/ strInit(&pom); strAddStr(&pom,"length"); GlobItemInsert(T,&pom,FUNCTION_HEADER,ridic,&novy); strAddStr(&(novy->arg),"si"); strClear(&pom); strAddStr(&pom,"copy"); GlobItemInsert(T,&pom,FUNCTION_HEADER,ridic,&novy); strAddStr(&(novy->arg),"siis"); strClear(&pom); strAddStr(&pom,"find"); GlobItemInsert(T,&pom,FUNCTION_HEADER,ridic,&novy); strAddStr(&(novy->arg),"ssi"); strClear(&pom); strAddStr(&pom,"sort"); GlobItemInsert(T,&pom,FUNCTION_HEADER,ridic,&novy); strAddStr(&(novy->arg),"ss"); }
tErrors printTape(tTape *tape) { if(tape == NULL) return E_INTERN; if(tape->first == NULL) return E_INTERN; tTapeItem *iter = tape->first; string inst; string op1; string op2; string result; if(strInit(&inst) != STR_SUCCESS) return E_INTERN; if(strInit(&op1) != STR_SUCCESS) return E_INTERN; if(strInit(&op2) != STR_SUCCESS) return E_INTERN; if(strInit(&result) != STR_SUCCESS) return E_INTERN; printf("||======================================================================||\n"); printf("||\tINST\t|\tOP1\t|\tOP2\t|\tRESULT\t||\n"); printf("||======================================================================||\n"); while(iter != NULL) { // Line 1 strClear(&inst); strClear(&op1); strClear(&op2); strClear(&result); constructInstStringLine1(&inst, iter->instruction); if(iter->op1 != NULL) constructOpStringLine1(&op1, iter->op1); if(iter->op2 != NULL) constructOpStringLine1(&op2, iter->op2); if(iter->result != NULL) constructOpStringLine1(&result, iter->result); printf("||\t%s\t|\t%s\t|\t%s\t|\t%s\t\t||\n", inst.str, op1.str, op2.str, result.str); // Line 2 strClear(&inst); strClear(&op1); strClear(&op2); strClear(&result); constructInstStringLine2(&inst, iter); if(iter->op1 != NULL) constructOpStringLine2(&op1, iter->op1); if(iter->op2 != NULL) constructOpStringLine2(&op2, iter->op2); if(iter->result != NULL) constructOpStringLine2(&result, iter->result); printf("||\t%s\t|\t%s\t|\t%s\t|\t%s\t\t||\n", inst.str, op1.str, op2.str, result.str); printf("||======================================================================||\n"); iter = iter->next; } strFree(&inst); strFree(&op1); strFree(&op2); strFree(&result); return E_OK; }
// Fuknce na generovánà nových proměnných void GenNewVariable(string *item) { strClear(item); strAddChar(item, '#'); int i; i = cnt; while(i != 0) { strAddChar(item, (char)(i % 10 + '0')); i = i / 10; } cnt++; }
int strCopyChar(string *s1, char retezec[]) // Funkce zkopíruje øetìzec do øetìzce s1 { int delka, x; // Promìnné pro kopírování strClear(s1); // Vyprázdnìní prvního øetìzce delka = strlen(retezec); // Zjistím délku øetìzce for(x = 0; x < delka; x++) // Procházím øetìzec po znacích { if(strAddChar(s1, retezec[x]) == STR_ERROR) // Pøidávám znak na konec øetìzce s1 return STR_ERROR; } return STR_SUCCESS; // Úspì¹né zkopírování øetìzce do øetìzce s1 }
void generateVariable(string *var) // generuje jedinecne nazvy identifikatoru // nazev se sklada ze znaku $ nasledovanym cislem // postupne se tu generuji prirozena cisla a do nazvu promenne se ukladaji // v reverzovanem poradi - na funkcnost to nema vliv, ale je jednodussi implementace { int i; strClear(var); strAddChar(var, '$'); i = counterVar; while (i != 0) { strAddChar(var, (char)(i % 10 + '0')); i = i / 10; } counterVar ++; }
int getNextToken(string *attr) { int state = 0; int c; // vymazeme obsah atributu a v pripade identifikatoru // budeme postupne do nej vkladat jeho nazev strClear(attr); while (1) { // nacteni dalsiho znaku c = getc(source); switch (state) { case 0: // zakladni stav automatu if (isspace(c)) // bila mista - ignorovat state = 0; else if (c == '<') // komentar state = 1; else if (isalpha(c)) // identifikator nebo klicove slovo { strAddChar(attr, c); state = 2; } else if (c == '+') // operator ++ state = 3; else if (c == '-') // operator -- state = 4; else if (c == '{') return LEFT_VINCULUM; else if (c == '}') return RIGHT_VINCULUM; else if (c == ';') return SEMICOLON; else if (c == EOF) return END_OF_FILE; else return LEX_ERROR; break; case 1: // komentar if (c == '>') state = 0; else // komentar pokracuje libovolnymi znaky, zustan ve stavu 1, // ale kontroluj, zda neprijde EOF (neukonceny komentar) if (c == EOF) return LEX_ERROR; break; case 2: // identifikator nebo klicove slovo if (isalnum(c)) // identifikator pokracuje strAddChar(attr, c); else // konec identifikatoru { ungetc(c, source); // POZOR! Je potreba vratit posledni nacteny znak // kontrola, zda se nejedna o klicove slovo if (strCmpConstStr(attr, "setzero") == 0) return SETZERO; else if (strCmpConstStr(attr, "read") == 0) return READ; else if (strCmpConstStr(attr, "write") == 0) return WRITE; else if (strCmpConstStr(attr, "while") == 0) return WHILE; else // jednalo se skutecne o identifikator return ID; } break; case 3: // pokracovani operatoru ++ if (c == '+') return INC; else return LEX_ERROR; break; case 4: // pokracovani operatoru -- if (c == '-') return DEC; else return LEX_ERROR; break; } } }
void gettoken() { strClear(&(T.s)); int x; char c; char h; int state=0; c=fgetc(f); if(c==EOF) { T.type=T_EOF; return; } while(1) { switch(state) { case 0: { if(c == '{') { if((x=jumpcomment()) == 1) { T.type=T_ERRORTOKEN; return; } c=fgetc(f); break; } else { if(isspace(c) != 0) { c=fgetc(f); break; } else { if(isalpha(c) != 0 || c=='_') { state=3; //T_ID if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); break; } else{ if(isdigit(c) != 0) { state=4; //->T_INT/T_REAL if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); break; } else{ if(c==EOF) { T.type=T_EOF; //T_EOF return; } else{ switch(c) //bude prepinat medzi dalsimi moznostami stale v pociatocnom stave { case '\'': { state=11;//->T_STRING c=fgetc(f); break; } case '(': { T.type=T_LB;//T_LB return; break; } case ')' : { T.type=T_RB;//T_RB return; break; } case ':' : { state=14;//T_COLON/T_ASSIGN c=fgetc(f); break; } case '*' : { T.type=T_MUL;//T_MUL return; break; } case '+' : { T.type=T_ADD;//T_ADD return; break; } case '-' : { T.type=T_SUB;//T_SUB return; break; } case '/' : { T.type=T_DIV;//T_DIV return; break; } case '<' : { state=12;//T_LESS/T_LESS_EQ if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); break; } case '>' : { state=13;//T_MORE/T_MORE_EQ if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); break; } case '=' : { T.type=T_EQUAL;//T_EQUAL return; break; } case '.' : { T.type=T_DOT;//T_DOT return; break; } case ',' : { T.type=T_COMMA;//T_COMMA return; break; } case ';' : { T.type=T_SEMICOLON;//T_SEMICOLON return; break; } default : //neplatny token { T.type=T_ERRORTOKEN; return; } } //switch c } // vylezie z else } } } } break; } case 3 : { if(isalpha(c) != 0 || isdigit(c) !=0 || c=='_') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); } else { ungetc(c,f); if(iskeyword(strGetStr(&(T.s)))==1){ T.type=T_KEYWORD; } else if (iskeyword(strGetStr(&(T.s)))==3){ T.type=T_DATATYPE; } else{ if(iskeyword(strGetStr(&(T.s)))==2) T.type=T_KONST; else{ T.type=T_ID; } } return; } break; } case 4 : { if(c == '.') { state=5; h=c; c=fgetc(f); } else{ if(c=='e' ||c=='E') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } state=7; c=fgetc(f); break; } else{ if(isdigit(c)!=0) { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); } else{ ungetc(c,f); T.type=T_INT; return; } } } break; } case 5 : { if(isdigit(c)!=0) { if(strAddChar(&(T.s),h)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } state=6; c=fgetc(f); } else{ ungetc(c,f); ungetc(h,f); T.type=T_INT; return; } break; } case 6 : { if(isdigit(c)!=0) { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); } else{ if(c=='e' || c=='E') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } state=7; c=fgetc(f); break; } else { ungetc(c,f); T.type=T_REAL; return; } } break; } case 7 : { if(isdigit(c)!=0) { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } state=9; c=fgetc(f); break; } else{ if(c=='+' || c=='-') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } state=8; c=fgetc(f); break; } else{ T.type=T_ERRORTOKEN; return; } } break; } case 8 : { if(isdigit(c)!=0) { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } state=9; c=fgetc(f); break; } else { T.type=T_ERRORTOKEN; return; } break; } case 9 : { if(isdigit(c)!=0) { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } state=9; c=fgetc(f); break; } else { T.type=T_REAL; ungetc(c,f); return; } break; } case 11 : { if ( c == '\'') { c=fgetc(f); if ( c == '\''){ if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); break;} if ( c == '#'){ if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); if(isdigit(c)!=0) { if(c == '0'){ while (c == '0') { c=fgetc(f); } if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } } } else { T.type = T_ERRORTOKEN; return; break; } c=fgetc(f); if(isdigit(c)!=0) { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } } else { T.type = T_ERRORTOKEN; return; break; } c=fgetc(f); if(isdigit(c)!=0) { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } } else { ungetc(c,f); } c=fgetc(f); if ( c != '\'') { T.type = T_ERRORTOKEN; return; } c=fgetc(f); break; } else { ungetc(c,f); T.type = T_STRING; return; } } else { while (c != '\'') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } c=fgetc(f); if(c==EOF) { T.type=T_ERRORTOKEN; return; } } state = 11; break; } } case 12: { if(c=='=') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } T.type=T_LESS_EQ; return; } else if (c=='>') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } T.type=T_NOT_EQ; return; } else { ungetc(c,f); T.type=T_LESS; return; } break; } case 13: { if(c=='=') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } T.type=T_MORE_EQ; return; } else { ungetc(c,f); T.type=T_MORE; return; } break; } case 14: //:= { if(c=='=') { if(strAddChar(&(T.s),c)== STR_ERROR) { T.type=T_ERRORSYSTEM; return; } T.type=T_ASSIGN; return; } else { ungetc(c,f); T.type=T_COLON; return; } break; } } } }
int get_token() { typedef enum { INIT, OPERATOR, SLASH, STRING, STRING_BACKSLASH, STRING_HEXA, STRING_BINARY, STRING_OCTA, NUMBER, FLOAT, FLOAT_EXP, ID, ID_KEYWORD, LINE_COMMENT, BLOCK_COMMENT, BLOCK_COMMENT_END, BASE_EXT, BINARY, OCTA, HEXA } Tstate; Tstate state = INIT; int c; int j = 0; int ret_val = 0; int escape_seq = 0; //char *check; strClear(buffer); token.type = TT_ERR; while ((c = fgetc(in))) { if (c == '\n') { row++; col = 0; } else col++; if (c == EOF) { token.type = TT_EOF; return EOF; } #ifdef SCANNER_DEBUG fprintf(stderr, "%s (%s)", fsm_states[state], strGetStr(buffer)); if (strFirst(buffer) == '\0') fprintf(stderr, "\n"); else fprintf(stderr, " -> "); #endif // DEBUG switch(state) { case INIT: if (c == '/') // comment or operator { state = SLASH; strAddChar(buffer, c); } else if (is_operator(c)) { state = OPERATOR; strAddChar(buffer, c); } else if (c == '"') // string literal { state = STRING; } else if (c == '\\') // x, b, 0 literals supported - BASE { state = BASE_EXT; } else if (isdigit(c)) // number -> integer or double literal { state = NUMBER; strAddChar(buffer, c); } else if (c == '_') // id { state = ID; strAddChar(buffer, c); } else if (isalpha(c)) // alphabetic char -> id or keyword { state = ID_KEYWORD; strAddChar(buffer, c); } else if ((ret_val = is_delimiter(c))) { token.type = TYPE_DELIMITER + ret_val - 1; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else if (!isspace(c)) // non valid character { lex_error("Unknown character: '%c'.\n", c); } break; case BASE_EXT: if (c == 'b') { state = BINARY; } else if (c == '0') { state = OCTA; } else if (c == 'x') { state = HEXA; } else lex_error("Unknown character in literal '\\%c'.\n", c); break; case HEXA: if (isxdigit(c)) { if (j < 8) // 8 hexadecimal digits are max int value { literal[j] = c; j++; } else lex_error("Hexadecimal literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 16); // cannot fail if (token.value_int < 0) lex_warning("Hexadecimal literal '\\x%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case OCTA: if (c >= '0' && c <= '7') { if (j < 12) // max int = \0 7777 7777 7777 { literal[j] = c; j++; } else lex_error("Octal literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 8); if (token.value_int < 0) lex_warning("Octal literal '\\0%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case BINARY: if ((c == '0' || c == '1')) { if (j < 32) { literal[j] = c; j++; } else lex_error("Binary literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 2); if (token.value_int < 0) lex_warning("Binary literal '\\b%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case ID_KEYWORD: if (isalpha(c)) // add another char into buffer { strAddChar(buffer, c); } else if (c == '_' || isdigit(c)) // id - these chars are not in any keyword { state = ID; strAddChar(buffer, c); } else // end of id or keyword { ungetc(c, in); // return last read char to buffer ret_val = is_keyword(strGetStr(buffer)); if (ret_val) { token.type = TYPE_KEYWORD + ret_val - 1; // magic #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else { token.type = TT_ID; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } } break; case ID: if (isalnum(c) || c == '_') { strAddChar(buffer, c); } else { ungetc(c, in); token.type = TT_ID; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case SLASH: if (c == '/') { state = LINE_COMMENT; } else if (c == '*') { state = BLOCK_COMMENT; } else // it was division { ungetc(c, in); token.type = TT_DIVIDE; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case OPERATOR: // not precisely "normal" fsm, but easily extensible (just add operator to operators[] and Ttoken_type) if (is_operator(c)) // c is one of valid chars, that can be in operator { strAddChar(buffer, c); ret_val = determine_operator(strGetStr(buffer)); // check if we still have valid operator in buffer if (!ret_val) // if it's not valid operator { ungetc(c, in); // return last char, it was not part of operator strDelChar(buffer); // delete wrong char from buffer ret_val = determine_operator(strGetStr(buffer)); // determine which operator we have token.type = TYPE_OPERATOR + ret_val - 1; // return token #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } // continue with loading chars if it's valid } else // another char is not operator -> end { ungetc(c, in); ret_val = determine_operator(strGetStr(buffer)); if (ret_val) { token.type = TYPE_OPERATOR + ret_val - 1; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else // shouldn't occur, just to be sure.. { lex_error("Unknown operator: '%s'.\n", strGetStr(buffer)); } } break; case LINE_COMMENT: if (c == '\n') // end of line comment { state = INIT; strClear(buffer); } break; case BLOCK_COMMENT: if (c == '*') // possible end of comment state = BLOCK_COMMENT_END; break; case BLOCK_COMMENT_END: if (c == '/') // comment ended { state = INIT; strClear(buffer); } else // false alarm - comment continues state = BLOCK_COMMENT; break; case NUMBER: if (isdigit(c)) { strAddChar(buffer, c); } else if (c == '.') { strAddChar(buffer, c); state = FLOAT; } else if (tolower(c) == 'e') { strAddChar(buffer, c); state = FLOAT_EXP; } else { ungetc(c, in); token.type = TT_VALUE_INT; token.value_int = (int) strtol(strGetStr(buffer), NULL, 10); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case FLOAT: // aspoň jedna číslice! if (isdigit(c)) { strAddChar(buffer, c); } else if (tolower(c) == 'e') { strAddChar(buffer, c); state = FLOAT_EXP; } else { ungetc(c, in); token.type = TT_VALUE_DOUBLE; token.value_double = strtod(strGetStr(buffer), NULL); //&check); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case FLOAT_EXP: if (isdigit(c)) { strAddChar(buffer, c); } else if (tolower(strLast(buffer)) == 'e' && (c == '+' || c == '-')) // optional +/- after e/E { strAddChar(buffer, c); } else { ungetc(c, in); token.type = TT_VALUE_DOUBLE; token.value_double = strtod(strGetStr(buffer), NULL); //&check); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case STRING: if (c == '"') // end of string literal { token.type = TT_VALUE_STRING; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else if (c == '\\') // string literal continues on another line or character constant state = STRING_BACKSLASH; else if (c != '\n') { strAddChar(buffer, c); } else { lex_error("String literal not closed.\n"); } break; case STRING_BACKSLASH: state = STRING; if (c == '\\') { strAddChar(buffer, '\\'); } else if (c == 'n') { strAddChar(buffer, '\n'); } else if (c == 't') { strAddChar(buffer, '\t'); } else if (c == '"') { strAddChar(buffer, '"'); } else if (c == 'x') { state = STRING_HEXA; } else if (c == 'b') { state = STRING_BINARY; } else if (c == '0') { state = STRING_OCTA; } else if (c == '\n') { // do nothing, string continues on next line - TODO: zdokumentovat upravu } else { lex_error("Escape sequence '\\%c' unknown.\n", c); } break; case STRING_HEXA: if (j < 2 && isxdigit(c)) // 2 is max hexadecimal escape length { literal[j] = c; j++; } else if (j == 0) // no valid hexadecimal digit after \x -> error { lex_error("'\\x%c' is not valid hexadecimal escape sequence.\n", c); } else // end of hexadecimal escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 16); // will always be successful if (escape_seq == 0) { lex_error("\\x00 is not allowed hexadecimal escape sequence.\n"); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; case STRING_BINARY: if (j < 8 && (c == '0' || c == '1')) // 8 is max binary escape length { literal[j] = c; j++; } else if (j == 0) // no valid binary digit after \b -> error { lex_error("'\\b%c' is not valid binary escape sequence.\n", c); } else // end of binary escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 2); // will always be successful if (escape_seq == 0) { lex_error("\\b00000000 is not allowed binary escape sequence.\n"); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; case STRING_OCTA: if (j < 3 && c >= '0' && c <= '7') // 3 is max octal escape length { literal[j] = c; j++; } else if (j == 0) // no valid octal digit after \0 -> error { lex_error("'\\0%c' is not valid octal escape sequence.\n", c); } else // end of octal escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 8); // will always be successful if (escape_seq == 0) { lex_error("\\000 is not allowed octal escape sequence.\n"); } else if (escape_seq > 255) { lex_error("Octal escape '\\0%s' bigger than 255.\n", literal); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; default: lex_error("Scanner panic!!!\n"); break; } // end_switch } // end_while return 0; }
int getNextToken(string *attr) { int esc = 0; int state = 0; //při hledání každého nového tokenu bude počáteční stav 0 int c; strClear(attr);//vyčítím řetězec pro nové použití while(1) { c = getc(source); if (c == EOF && state != 0) lErr(); switch (state) { /***** DEFAULT STATE *****/ case 0: if (isspace(c)) state = 0; else if (c == '<') state = 1; else if (c == '>') state = 2; else if (c == '!') state = 3; else if (c == '=') state = 4; else if (isdigit(c)) { strAddChar(attr, c); state = 5; } else if (isalpha(c) || c == '_') { strAddChar(attr, c); state = 11; } else if (c == '"') { state = 15; } else if (c == '/') state = 20; else { switch(c) { case ';': return STREDNIK; break; case '+': return PLUS; break; case '-': return MINUS; break; case '*': return NASOBENI; break; case ',': return CARKA; break; case '(': return L_ZAVORKA; break; case ')': return P_ZAVORKA; break; case '{': return LS_ZAVORKA; break; case '}': return PS_ZAVORKA; break; case EOF: return FILEEND; break; default: lErr(); break; } } break; /***** LESS OR LESS_OR_EQUALS *****/ case 1: if (c == '=') return MENE_NEBO_ROVNO; // token -> [<=, ] else if (c == '<') return ZAPIS; // token -> [<<, ] else { ungetc(c, source); return MENE; // token -> [<, ] } break; /***** MORE OR MORE_OR_EQUALS *****/ case 2: if (c == '=') return VICE_NEBO_ROVNO; // token -> [>=, ] else if (c == '>') return CTENI; // token -> [>>, ] else { ungetc(c, source); return VICE; // token -> [>, ] } break; /***** NOT EQUALS *****/ case 3: if (c == '=') return NEROVNOST; // token -> [!=, ] else lErr(); break; /***** EQUALS *****/ case 4: if (c == '=') return ROVNA_SE; // token -> [==, ] else { ungetc(c, source); return PRIRAZENI; } break; /***** INT OR DOUBLE *****/ case 5: if (isdigit(c)) strAddChar(attr, c); else if (c == '.') { strAddChar(attr, c); state = 6; } else if (c == 'e' || c == 'E') { strAddChar(attr, c); state = 8; } else { ungetc(c, source); return INT_V; } // token -> [INT_V, N] break; case 6: if (isdigit(c)) { strAddChar(attr, c); state = 7; } else lErr(); break; case 7: if (isdigit(c)) strAddChar(attr, c); else if (c == 'e' || c == 'E') { strAddChar(attr, c); state = 8; } else { ungetc(c, source); return DOUBLE_V; // token -> [DOUBLE_V, D.D] } break; case 8: if (isdigit(c)) { strAddChar(attr, c); state = 10; } else if (c == '+' || c == '-') { strAddChar(attr, c); state = 9; } else lErr(); break; case 9: if (isdigit(c)) { strAddChar(attr, c); state = 10; } else lErr(); break; case 10: if (isdigit(c)) strAddChar(attr, c); else { ungetc(c, source); return DOUBLE_V; } break; /***** ID OR KEYWORD *****/ case 11: if (isalnum(c) || c == '_') strAddChar(attr, c); else { ungetc(c, source); if (!strCmpConstStr(attr, "auto")) return AUTO; else if (!strCmpConstStr(attr, "cin")) return CIN; else if (!strCmpConstStr(attr, "cout")) return COUT; else if (!strCmpConstStr(attr, "double")) return DOUBLE; else if (!strCmpConstStr(attr, "else")) return ELSE; else if (!strCmpConstStr(attr, "for")) return FOR; else if (!strCmpConstStr(attr, "if")) return IF; else if (!strCmpConstStr(attr, "int")) return INT; else if (!strCmpConstStr(attr, "return")) return RETURN; else if (!strCmpConstStr(attr, "string")) return STRING; else return ID; } break; /***** STRING *****/ case 15: if (c == '\\') { state = 16; } else if (c == '"') { return STRING_V; } else strAddChar(attr, c); break; case 16: if (c == '\\') { strAddChar(attr, '\\'); state = 15; } else if (c == 't') { strAddChar(attr, '\t'); state = 15; } else if (c == 'n') { strAddChar(attr, '\n'); state = 15; } else if (c == '"') { strAddChar(attr, '"'); state = 15; } else if (c == 'x') { state = 17; } else { lErr(); } break; case 17: if (isxdigit(c)) { if (c >= 'a' && c <= 'f') { esc = (c - 'a' + 10) * 16; } else if (c >= 'A' && c <= 'F') { esc = (c - 'A' + 10) * 16; } else { esc = (c - '0') * 16; } state = 18; } else lErr(); break; case 18: if (isxdigit(c)) { if (c == '0' && esc == 0) lErr(); if (c >= 'a' && c <= 'f') { esc = esc + (c - 'a' + 10); } else if (c >= 'A' && c <= 'F') { esc = esc + (c - 'A' + 10); } else { esc = esc + (c - '0'); } strAddChar(attr, esc); state = 15; } else lErr(); break; /***** COMMENT *****/ case 20: if (c == '*') state = 21; else if (c == '/') state = 23; else { ungetc(c, source); return DELENI; } break; case 21: if (c == '*') state = 22; break; case 22: if (c == '/') state = 0; else state = 21; break; case 23: if (c == '\n') state = 0; break; } } }
int getNextToken(string *attr){ char c; int stav = STATE_START; strClear(attr); while(1){ c = getc(sourceFile); switch(stav){ case STATE_START: if(isspace(c)){ stav = STATE_START; } else if(isalpha(c) || c == '_'){ // znaky: a-zA-Z nebo _ strAddChar(attr, c); stav = STATE_ALPHANUM; } else if(isdigit(c)){ // cisla strAddChar(attr, c); stav = STATE_DIGIT; } else if(c == 34) { // " //printf("%c", c); // strAddChar(attr, c); stav = STATE_STRING; } else { switch(c){ case '+': return TOK_ADDITION; break; case '-': return TOK_SUBTRACTION; break; case '*': return TOK_MULTIPLICATION; break; case '/': stav = STATE_SLASH; break; case '<': stav = STATE_LESS_THAN; break; case '>': stav = STATE_GREATER_THAN; break; case '=': stav = STATE_EQUALS; break; case '!': stav = STATE_INEQUALITY; break; case ';': return TOK_SEMICOLON; break; case '(': return TOK_LEFT_BRACKET; break; case ')': return TOK_RIGHT_BRACKET; break; case '.': return TOK_DOT; break; case ',': return TOK_COMMA; break; case EOF : return TOK_END_OF_FILE; break; case '{' : return TOK_LEFT_BRACE; break; case '}' : return TOK_RIGHT_BRACE; break; default: printf("defualt %c", c); break; } } break; case STATE_INEQUALITY: if(c == '='){ return TOK_INEQUALITY; } else { ungetc (c,sourceFile); return LEX_ERROR; } break; case STATE_STRING: if(c == '"'){ // strAddChar(attr, c); return TOK_STR; } else if(c == 92){ // ASCII 92 je zpetne lomitko // strAddChar(attr, c); stav = STATE_ESCAPE_SEQUENCE; } else if (c == EOF){ ungetc(c, sourceFile); return LEX_ERROR; } else if(c != 34 && c > 31){ strAddChar(attr, c); } else { ungetc(c, sourceFile); return LEX_ERROR; } break; case STATE_ESCAPE_SEQUENCE: if(c == 'n'){ strAddChar(attr, '\n'); stav = STATE_STRING; } else if (c == 92){ strAddChar(attr, '\\'); stav = STATE_STRING; } else if (c == 't'){ strAddChar(attr, '\t'); stav = STATE_STRING; } else if (c == '"'){ // strAddChar(attr, c); stav = STATE_STRING; } else if (c == 'x'){ strAddChar(attr, 92); strAddChar(attr, c); stav = STATE_HEXA_CHAR; } else { ungetc(c, sourceFile); return LEX_ERROR; } break; case STATE_HEXA_CHAR: if((c >= 48 && c <= 57) || (c >= 65 && c <= 70) || (c >= 97 && c <= 102)){ strAddChar(attr, c); stav = STATE_HEXA_CHAR_SECOND; } else { ungetc(c, sourceFile); return LEX_ERROR; } break; case STATE_HEXA_CHAR_SECOND: if((c >= 48 && c <= 57) || (c >= 65 && c <= 70) || (c >= 97 && c <= 102)){ strAddChar(attr, c); stav = STATE_STRING; } else { ungetc(c, sourceFile); return LEX_ERROR; } break; case STATE_DIGIT: if(isdigit(c)){ // prislo dalsi cislo strAddChar(attr, c); } else if(c == '.'){ strAddChar(attr, c); stav = STATE_FLOATING_POINT; } else if(c == 'E' || c == 'e'){ strAddChar(attr, c); stav = STATE_EXPONENT; } else { ungetc (c,sourceFile); return TOK_DECIMAL_NUMBER; } break; case STATE_FLOATING_POINT: if(isdigit(c)){ strAddChar(attr, c); } else if(c == 'E' || c == 'e'){ strAddChar(attr, c); stav = STATE_EXPONENT; } else { ungetc (c,sourceFile); return TOK_FLOATING_POINT_NUMBER; } break; case STATE_EXPONENT:// if(isdigit(c) != 0) { strAddChar(attr, c); stav = STATE_MORE_IN_EXPONENT; } else if(c == '+' || c == '-') { strAddChar(attr, c); stav = STATE_MORE_IN_EXPONENT; } else { ungetc(c, sourceFile); return LEX_ERROR; } break; case STATE_MORE_IN_EXPONENT: if(isdigit(c) != 0) { strAddChar(attr, c); stav = STATE_MORE_IN_EXPONENT2; } else { ungetc(c, sourceFile); return LEX_ERROR; } break; case STATE_MORE_IN_EXPONENT2: if(isdigit(c) != 0) { strAddChar(attr, c); } else if(c == ' ' || c == ';' || c == ')' || c == '+' || c == '-' || c == '/' || c == '*'){ ungetc(c, sourceFile); return TOK_FLOATING_POINT_NUMBER; } else{ ungetc(c, sourceFile); return LEX_ERROR; } break; case STATE_EQUALS: if(c == '='){ return TOK_COMPARISON; } else { ungetc (c,sourceFile); return TOK_EQUALS; break; } break; case STATE_ALPHANUM: if(!(isalnum(c) || c == '_')){ // kdyz neni toto: a-zA-Z0-9 nebo _ //printf("STATE_ALPHANUM: %s", attr->str); ungetc(c, sourceFile); if(0 == strCmpConstStr(attr, "int")){ return TOK_INT; } else if(0 == strCmpConstStr(attr, "double")){ return TOK_DOUBLE; } else if(0 == strCmpConstStr(attr, "else")){ return TOK_ELSE; } else if(0 == strCmpConstStr(attr, "if")){ return TOK_IF; } else if(0 == strCmpConstStr(attr, "return")){ return TOK_RETURN; } else if(0 == strCmpConstStr(attr, "string")){ return TOK_STRING; } else if(0 == strCmpConstStr(attr, "auto")){ return TOK_AUTO; } else if(0 == strCmpConstStr(attr, "cin")){ return TOK_CIN; } else if(0 == strCmpConstStr(attr, "for")){ return TOK_FOR; } else if(0 == strCmpConstStr(attr, "cout")){ return TOK_COUT; } else { return TOK_ID; } } else { strAddChar(attr, c); } break; case STATE_GREATER_THAN: switch(c){ case '=': return TOK_GREATER_THAN_OR_EQUAL; break; case '>': return TOK_DOUBLE_ARROW_RIGHT; break; default: ungetc (c,sourceFile); return TOK_GREATER_THAN; break; } break; case STATE_LESS_THAN: switch(c){ case '=': return TOK_LESS_THAN_OR_EQUAL; break; case '<': return TOK_DOUBLE_ARROW_LEFT; break; default: ungetc (c,sourceFile); return TOK_LESS_THAN; break; } break; case STATE_SLASH: switch(c){ case '/': stav = STATE_LINE_COMMENT; break; case '*': stav = STATE_BLOCK_COMMENT; break; default: ungetc (c,sourceFile); return TOK_DIVISION; break; } case STATE_LINE_COMMENT: if(c == '\n'){ stav = STATE_START; } break; case STATE_BLOCK_COMMENT: if(c == '*'){ stav = STATE_BLOCK_COMMENT_STAR; } break; case STATE_BLOCK_COMMENT_STAR: if(c == '/'){ stav = STATE_START; } break; default: break; } } return LEX_ERROR; }
int getNextToken(string *attr) { int c, state = 0; string pomEsc; strClear(attr); strClear(&pomEsc); while (1) { c = getc(source); switch (state) { case 0: // pocatecni stav { if (isspace(c)) state = 0; // ignorovani bilych mist else if (c == '{') state = 1; // komentar else if (isalpha(c)) { strAddChar(attr, c); state = 2; // zacatek identifikatoru nebo klic. slova } else if (isdigit(c)) { strAddChar(attr, c); state = 3; // cislo int ci double } else if (c == ':') { state = 4; // samostatna dvojtecka nebo operator prirazeni } else if (c == '<') state = 5; // mensi, mensi nebo rovno else if (c == '>') state = 6; // vetsi, vetsi nebo rovno else if (c == '\'') state = 8; // zacatek retezce else if (c == '+') return PLUS; else if (c == '-') return MINUS; else if (c == '*') return KRAT; else if (c == '/') return DELENO; else if (c == '=') return ROVNO; else if (c == ';') return STREDNIK; else if (c == ',') return CARKA; else if (c == '.') return TECKA; else if (c == '(') return LEVAZAVORKA; else if (c == ')') return PRAVAZAVORKA; else if (c == EOF) return KONECSOUBORU; else return LEXIKALNICHYBA; break; } case 1: // komentar { if (c == '}') state = 0; else if (c == EOF) return LEXIKALNICHYBA; break; } case 2: // alfanumericky znak (id nebo klic. slovo) { if (isalnum(c) || c == '_') strAddChar(attr, c); else { ungetc(c, source); return is_keyword(attr, c == '('); } break; } case 3: // libovolne cislo { if (isdigit(c)) strAddChar(attr, c); else if (c == '.') { strAddChar(attr, c); state = 7; } else { ungetc(c, source); return KONST_INTEGER; } break; } case 4: // prirazeni { if (c == '=') return PRIRAZENI; else { ungetc(c, source); return DVOJTECKA; } } case 5: // mensi, mensi nebo rovno, nerovno { if (c == '=') return MENSIROVNO; else if (c == '>') return NEROVNO; else { ungetc(c, source); return MENSI; } } case 6: // vetsi, vetsi nebo rovno { if (c == '=') return VETSIROVNO; else { ungetc(c, source); return VETSI; } } case 7: // zakladni realne cislo { if (isdigit(c)) strAddChar(attr, c); else return KONST_REAL; break; } case 8: // retezec { if (c == '\'') state = 9; else if (c == EOF || c == '\n') return LEXIKALNICHYBA; else strAddChar(attr, c); break; } case 9: // apostrof v retezci, escape sekvence nebo konec retezce { if (c == '\'') { strAddChar(attr, c); state = 8; // apostrof v retezci } else if (c == '#') state = 10; // escape sekvence else return KONST_STRING; // konec retezce break; } case 10: // escape sekvence { if (isdigit(c)) strAddChar(&pomEsc, c); else if (c == '\'') { if (strlen(pomEsc.str) == 0) return LEXIKALNICHYBA; // Zadne cislo escape sekvence int escape = atoi(pomEsc.str); if (esc < 1 || esc > 255) return LEXIKALNICHYBA; // Escape sekvence neni znak strAddChar(attr, escape); strClear(&pomEsc); state = 8; // pokracovani retezce } else return LEXIKALNICHYBA; break; } } } }