Token malpar_next_token(Buffer * sc_buf) { Token t; /*token to be returned */ unsigned char c; /* input symbol */ Buffer *lex_buf; /* temporary buffer for holding lexemes */ int accept = NOAS; /* Not Accepting State */ int state = 0; /* Start state in the Transition Table */ int lexstart; /* current lexeme start offset */ static int forward; /* current input char offset */ /* forward is the offset from the beginning of the char buffer of the input buffer (sc_buf) to the current character, which is to be processed by the scanner. lexstart is the offset from the beginning of the char buffer of the input buffer (sc_buf) to the first character of the current lexeme, which is being processed by the scanner. */ /* DECLARE YOUR VARIABLES HERE IF NEEDED */ int VID_FLAG = 0; /*flag if start of VID reconised*/ int FPL_FLAG = 0; /*flag for if FPL*/ int DIL_FLAG = 0; /*flag for if DIL is reconised*/ int OIL_FLAG = 0; /*flag for if octal literal is reconised*/ int ZERO_FLAG = 0; /*flag for if first digit is a 0*/ int NUM_FLAG = 0; /*flag if number of unknown type is reconised*/ int i; /*generic counter for a for loop*/ lexstart = sc_buf->addc_offset; forward = lexstart; while (1)/*constant used for infinite loop*/ { c = sc_buf->ca_head[forward++]; /*may need to be ++forward if getting wierd errors*/ if (isalnum(c)==0 || c=='"') { switch (c) { case '"': { ca_setmark(sc_buf,forward); c = sc_buf->ca_head[forward++]; if (c != '"') /*checks to see if it is an empty string*/ { while (c = sc_buf->ca_head[forward++]) { if (c=='\n' || c=='\r') /*potential weird string error*/ { t.code = ERR_T; if ((forward - ca_getmark(sc_buf)) >= 20) { for (i=0;i<17;i++) { t.attribute.err_lex[i] = sc_buf->ca_head[ca_getmark(sc_buf)+1]; } t.attribute.err_lex[17]='.'; t.attribute.err_lex[18]='.'; t.attribute.err_lex[19]='.'; t.attribute.err_lex[20]='\0'; } else { for (i=0;i<20;i++) { t.attribute.err_lex[i]= sc_buf->ca_head[ca_getmark(sc_buf)+1]; } t.attribute.err_lex[20]='\0'; } } if (c == '"') { t.attribute.str_offset = str_LTBL->addc_offset; for (i=(ca_getmark(sc_buf)+1);i<forward;i++) /*forward may point to 1 past the quotation mark*/ { ca_addc(str_LTBL,sc_buf->ca_head[i]); } ca_addc(str_LTBL,'\0'); /*end of string marker*/ t.code = STR_T; } } } else { t.code = ERR_T; t.attribute.err_lex[0]='R'; t.attribute.err_lex[1]='U'; t.attribute.err_lex[2]='N'; t.attribute.err_lex[3]=' '; t.attribute.err_lex[4]='T'; t.attribute.err_lex[5]='I'; t.attribute.err_lex[6]='M'; t.attribute.err_lex[7]='E'; t.attribute.err_lex[8]=' '; t.attribute.err_lex[9]='E'; t.attribute.err_lex[10]='R'; t.attribute.err_lex[11]='R'; t.attribute.err_lex[12]='O'; t.attribute.err_lex[13]='R'; t.attribute.err_lex[14]=':'; t.attribute.err_lex[15]='\0'; } break; } case '+': { t.code = ART_OP_T; t.attribute.arr_op = PLUS; break; } case '-': { t.code = ART_OP_T; t.attribute.arr_op = MINUS; break; } case '*': { t.code = ART_OP_T; t.attribute.arr_op = MULT; break; } case '/': { t.code = ART_OP_T; t.attribute.arr_op = DIV; break; } case '{': { t.code = LPR_T; break; } case '}': { t.code = RPR_T; break; } case '>': { t.code = REL_OP_T; t.attribute.rel_op = GT; break; } case '<': { t.code = REL_OP_T; t.attribute.rel_op = LT; break; } case '=': { if (sc_buf->ca_head[forward++] == '=') { t.code = REL_OP_T; t.attribute.rel_op = EQ; } else { forward--; t.code = ASS_OP_T; } break; } case ' ': { //c = sc_buf->ca_head[forward++]; break; } case '.': { c= sc_buf->ca_head[forward++]; if (c == 'A') { c= sc_buf->ca_head[forward++]; if (c == 'N') { c = sc_buf->ca_head[forward++]; if (c =='D') { t.code = LOG_OP_T; t.attribute.log_op = AND; } else { t.code = ERR_T; } } else { t.code = ERR_T; } } if (c == 'O') { c = sc_buf->ca_head[forward++]; if (c == 'R') { t.code = LOG_OP_T; t.attribute.log_op = OR; } else { t.code = ERR_T; } } else { t.code = ERR_T; } break; } case '!': { c = sc_buf->ca_head[forward++]; if (c != '<') { t.code = ERR_T; forward--; } if (c == '=') { t.code = REL_OP_T; t.attribute.rel_op = NE; } else { while (sc_buf->ca_head[forward++] != '\n'){} //move through buffer untill comment is done } break; } case ';': { t.code = EOS_T; break; } case '(': { t.code = LBR_T; break; } case ')': { t.code = RBR_T; break; } case EOF: { t.code=SEOF_T; break; } default:/* may pick up others like string, so wierd errors could come from here*/ { t.code = ERR_T; t.attribute.err_lex[0] = c; t.attribute.err_lex[1] = '/0'; /*store end of string inidicator to the string*/ break; } } } else /*character is a digit or letter*/ /*final state machine*/ { ca_setmark(sc_buf,forward); state = 0; lexstart = ca_getmark(sc_buf); state = get_next_state (state,c,&accept); c = sc_buf->ca_head[forward++]; while (accept == NOAS) { state = get_next_state(state,c,&accept); if (accept == NOAS) /*if statemet to check if sate has changed*/ { c = sc_buf->ca_head[forward++]; } } lex_buf = b_create(sc_buf->capacity,sc_buf->inc_factor,sc_buf->mode); if (state == (2||7||8||11)) { forward = lexstart; } c = sc_buf->ca_head[lexstart]; while (1) { switch (c)/*character most recently added in*/ { case 0: /*[a-zA-Z]*/ { /*VID or SVID or Keyword may need to take this out check w/ leland*/ if (iskeyword(lex_buf->ca_head) != FALSE) /*keyword token found*/ { if (lex_buf->addc_offset>0) { forward--; /*character not added to lex_buf move pointer back 1*/ } t.code = KW_T; t.attribute.kwt_idx = iskeyword(lex_buf->ca_head); /*set attribute*/ } else { /*check to see if any of the identifier flags have been checked*/ if (FPL_FLAG!=0||DIL_FLAG!=0||OIL_FLAG!=0||NUM_FLAG!=0) { if (FPL_FLAG == 1) { if (lex_buf->addc_offset>0) { forward--; /*character not added to lex_buf move pointer back 1*/ } reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[8](lex_buf->ca_head); } if (DIL_FLAG == 1) { if (lex_buf->addc_offset>0) { forward--; /*character not added to lex_buf move pointer back 1*/ } reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[7](lex_buf->ca_head); } if (OIL_FLAG == 1) { if (lex_buf->addc_offset>0) { forward--; /*character not added to lex_buf move pointer back 1*/ } reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[12](lex_buf->ca_head); } if (ZERO_FLAG == 1) { if (lex_buf->addc_offset>0) { forward--; /*character not added to lex_buf move pointer back 1*/ } reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[7](lex_buf->ca_head); } if (NUM_FLAG==1&&ZERO_FLAG==0) { if (lex_buf->addc_offset>0) { forward--; /*character not added to lex_buf move pointer back 1*/ } reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[7](lex_buf->ca_head); } } VID_FLAG = 1; /*set variable identifier flag*/ } ca_addc(lex_buf,c); break; } case 1: /*0*/ { /*octal or DIL 0 or FPL 0*/ if (ZERO_FLAG==1)/*leading 0 and another digit*/ { OIL_FLAG=1; } if (FPL_FLAG==0&&NUM_FLAG==0&&DIL_FLAG==0&&ZERO_FLAG==0) { ZERO_FLAG=1; /*identify as a numeric token starting with 0*/ } if (NUM_FLAG==1&&ZERO_FLAG==0) /*number with no leading 0*/ { /*could be FPL or DIL*/ } if (VID_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); VID_FLAG=1; } /*if FPL_FLAG is set it is known to be an FPL*/ ca_addc(lex_buf,c); break; } case 2: /*[8-9]*/ { if (ZERO_FLAG==1&&FPL_FLAG==0&&OIL_FLAG==0&&DIL_FLAG==0)/*leading 0 and no decimal place*/ { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); /*number is single 0*/ if (lex_buf->addc_offset > 0) { forward--; } t=aa_table[7](lex_buf->ca_head); } if (OIL_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); /*reset the flags*/ if (lex_buf->addc_offset > 0) { forward--; } t=aa_table[12](lex_buf->ca_head); } if (NUM_FLAG==0&&DIL_FLAG==0&&FPL_FLAG==0) /*no preceeding digits*/ { /*could be a DIL or FPL*/ NUM_FLAG=1; } if (VID_FLAG==1)/*is already known VID*/ { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); VID_FLAG=1; } ca_addc(lex_buf,c); break; } case 3: /*[1-7]*/ { if (ZERO_FLAG==1&&OIL_FLAG==1) /*already known oil*/ { /*still oil*/ } if (ZERO_FLAG==1&&OIL_FLAG==0)/*leading 0 with no other digits*/ { OIL_FLAG=1; } if (NUM_FLAG==0&&FPL_FLAG==0&&DIL_FLAG==0)/*no numeric flags set*/ { NUM_FLAG=1; } if (VID_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); VID_FLAG=1; } ca_addc(lex_buf,c); break; } case 4: /*.*/ { if (NUM_FLAG==0&&ZERO_FLAG==0&&DIL_FLAG==0&&FPL_FLAG==0)/*if no numeric flags are set*/ { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); ca_addc(lex_buf,c); t=aa_table[9](lex_buf->ca_head); } if (OIL_FLAG==1) /*if known OIL*/ { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); ca_addc(lex_buf,c); t=aa_table[9](lex_buf->ca_head); } if (FPL_FLAG==1) /*if known FPL*/ { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); ca_addc(lex_buf,c); t=aa_table[9](lex_buf->ca_head); } if ((DIL_FLAG==1||NUM_FLAG==1)&&FPL_FLAG==0)/*if identified as a DIL or NUM already*/ { FPL_FLAG=1; } if (VID_FLAG==1) /*if VID*/ { if (lex_buf->addc_offset>0) { forward--; /*character not added to lex_buf move pointer back 1*/ } reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[3](lex_buf->ca_head); } ca_addc(lex_buf,c); break; } case 5: /*#*/ { if (VID_FLAG==0&&FPL_FLAG==0&&DIL_FLAG==0&&OIL_FLAG==0&&ZERO_FLAG==0) { ca_addc(lex_buf,c); t=aa_table[9](lex_buf->ca_head); } if (FPL_FLAG==1) { if(lex_buf->addc_offset>0) { forward--; } t=aa_table[8](lex_buf->ca_head); } if (VID_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); ca_addc(lex_buf,c); t=aa_table[2](lex_buf->ca_head); } break; } case 6: /*other*/ { if (lex_buf->addc_offset!=0) { sc_buf->addc_offset--; /*character never written to lex_buf set pointer back 1*/ } if ((c==';'||c ==' ')&&VID_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t = aa_table[2](lex_buf->ca_head); } if ((c==';'||c ==' ')&&FPL_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[8](lex_buf->ca_head); } if ((c==';'||c ==' ')&&DIL_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[7](lex_buf->ca_head); } if ((c==';'||c ==' ')&&OIL_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[11](lex_buf->ca_head); } if ((c==';'||c ==' ')&&NUM_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[11](lex_buf->ca_head); } if ((c==';'||c ==' ')&&ZERO_FLAG==1) { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); t=aa_table[7](lex_buf->ca_head); } else { reset_flags(&FPL_FLAG,&DIL_FLAG,&OIL_FLAG,&ZERO_FLAG,&VID_FLAG,&NUM_FLAG); ca_addc(lex_buf,c); t=aa_table[9](lex_buf->ca_head); } break; } } c=(lex_buf,sc_buf->ca_head[forward++]);/*add next character to the buffer*/ } } b_destroy(lex_buf); return t; }//end while(1) }
/* main function takes a PLATYPUS source file as * an argument at the command line. * usage: platy source_file_name [-stz size][-sts:A | -sts:D] */ int main(int argc, char ** argv){ FILE *fi; /* input file handle */ int loadsize = 0; /*the size of the file loaded in the buffer */ int st_def_size = ST_DEF_SIZE; /* Sumbol Table default size */ char sort_st = 0; /*Symbol Table sort switch */ int ansi_c = !ANSI_C; /* ANSI C flag */ /* Check if the compiler option is set to compile ANSI C */ /* __DATE__, __TIME__, __LINE__, __FILE__, __STDC__ are predefined preprocessor macros*/ if(ansi_c){ err_printf("Date: %s Time: %s",__DATE__, __TIME__); err_printf("ERROR: Compiler is not ANSI C compliant!\n"); exit(1); } /*check for correct arrguments - source file name */ if (argc <= 1){ /* __DATE__, __TIME__, __LINE__, __FILE__ are predefined preprocessor macros*/ err_printf("Date: %s Time: %s",__DATE__, __TIME__); err_printf("Runtime error at line %d in file %s", __LINE__, __FILE__); err_printf("%s%s%s",argv[0],": ","Missing source file name."); err_printf("%s%s%s","Usage: ", "platy", " source_file_name [-stz size][-sts:A | -sts:D]"); exit(EXIT_FAILURE); } /* check for optional switches - symbol table size and/or sort */ if (argc == 3){ if (strcmp(argv[2],"-sts:A") && strcmp(argv[2],"-sts:D") ){ err_printf("%s%s%s",argv[0],": ","Invalid switch."); err_printf("%s%s\b\b\b\b%s","Usage: ", argv[0], " source file name [-stz size][-sts:A | -sts:D]"); exit(EXIT_FAILURE); } if(strcmp(argv[2],"-sts:A")) sort_st = 'D'; else sort_st = 'A'; } /* symbol table size specified */ if (argc == 4){ if (strcmp(argv[2],"-stz")){ err_printf("%s%s%s",argv[0],": ","Invalid switch."); err_printf("%s%s\b\b\b\b%s","Usage: ", argv[0], " source file name [-stz size][-sts:A | -sts:D]"); exit(EXIT_FAILURE); } /* convert the symbol table size */ st_def_size = atoi(argv[3]); if (st_def_size <= 0){ err_printf("%s%s%s",argv[0],": ","Invalid switch."); err_printf("%s%s\b\b\b\b%s","Usage: ", argv[0], " source file name [-stz size][-sts:A | -sts:D]"); exit(EXIT_FAILURE); } } if (argc == 5){ if (strcmp(argv[2],"-stz")){ err_printf("%s%s%s",argv[0],": ","Invalid switch."); err_printf("%s%s\b\b\b\b%s","Usage: ", argv[0], " source file name [-stz size][-sts:A | -sts:D]"); exit(EXIT_FAILURE); } /* convert the symbol table size */ st_def_size = atoi(argv[3]); if (st_def_size <= 0){ err_printf("%s%s%s",argv[0],": ","Invalid switch."); err_printf("%s%s\b\b\b\b%s","Usage: ", argv[0], " source file name [-stz size][-sts:A | -sts:D]"); exit(EXIT_FAILURE); } if (strcmp(argv[4],"-sts:A")&& strcmp(argv[4],"-sts:D") ){ err_printf("%s%s%s",argv[0],": ","Invalid switch."); err_printf("%s%s\b\b\b\b%s","Usage: ", argv[0], " source file name [-stz size][-sts:A | -sts:D]"); exit(EXIT_FAILURE); } if(strcmp(argv[4],"-sts:A")) sort_st = 'D'; else sort_st = 'A'; } /* create a source code input buffer - multiplicative mode */ sc_buf = b_create(INIT_CAPACITY,INC_FACTOR,'m'); if (sc_buf == NULL){ err_printf("%s%s%s",argv[0],": ","Could not create source buffer"); exit(EXIT_FAILURE); } /* create symbol table */ sym_table = st_create(st_def_size); if (!sym_table.st_size){ err_printf("%s%s%s",argv[0],": ","Could not create symbol table"); exit (EXIT_FAILURE); } /*open source file */ if ((fi = fopen(argv[1],"r")) == NULL){ err_printf("%s%s%s%s",argv[0],": ", "Cannot open file: ",argv[1]); exit (1); } /* load source file into input buffer */ printf("Reading file %s ....Please wait\n",argv[1]); loadsize = ca_load (fi,sc_buf); if(loadsize == R_FAIL_1) err_printf("%s%s%s",argv[0],": ","Error in loading buffer."); /* close source file */ fclose(fi); /*find the size of the file */ if (loadsize == LOAD_FAIL){ printf("The input file %s %s\n", argv[1],"is not completely loaded."); printf("Input file size: %ld\n", get_filesize(argv[1])); } /* pack and display the source buffer */ if(ca_pack(sc_buf)){ display(sc_buf); } /* create string Literal Table */ str_LTBL = b_create(INIT_CAPACITY,INC_FACTOR,'a'); if (str_LTBL == NULL){ err_printf("%s%s%s",argv[0],": ","Could not create string buffer"); exit(EXIT_FAILURE); } /*registrer exit function */ atexit(garbage_collect); /*Testbed for buffer, scanner,symbol table and parser*/ /* Initialize scanner input buffer scanner_init(sc_buf); */ line = 1; ca_addc(sc_buf, EOF); printf("\nParsing the source file...\n\n"); parser(sc_buf); /* print Symbol Table */ if(sym_table.st_size && sort_st){ st_print(sym_table); if(sort_st){ printf("\nSorting symbol table...\n"); st_sort(sym_table,sort_st); st_print(sym_table); } } return (EXIT_SUCCESS); /* same effect as exit(0) */ }/*end of main */
void scanner_init(Buffer *buf) { ca_addc(buf, '\0'); /* in case EOF is not in the buffer */ b_reset(str_LTBL); /* reset the string literal table */ line = 1; /*set the source code line number to 1*/ }