inline unsigned int followingChar() { return haveNextChar() ? getNextChar() : INVALID_CHARACTER; }
/* function getToken returns the * next token in source file */ AsmTokenType TAsmAnalyze::getToken(void) { int tokenStrIdx = 0; /* index for storing into tokenString */ tokenString = ""; AsmTokenType curToken; /* holds current token to be returned */ StateType state = START; /* current state - always begins at START */ int save; /* flag to indicate save to tokenString */ //linepos=0; //bufsize=0; //cout<<linepos<<endl; while (state != DONE) { char c = getNextChar(); save = TRUE; switch (state) { case START: if (IsName(c)) // Is Name state = INID; else if (isdigit(c)) // Is Hex Number state = INHEXNUM; else if (c == '#') state = INSHARP; else if (c == ',') state = INXCHAR; else if ((c == ' ') || (c == '\t')) save = FALSE; else if (c == '[') { save = FALSE; state = INSTRING; } else if (c == '{') { save = FALSE; state = INCOMMENT; } else if (c == ';') { save = FALSE; state = INCOMMENT_SINGLE; } else { state = DONE; switch (c) { case EOF: save = FALSE; curToken = ENDFILE; break; case ':': curToken = COLON; break; case '=': curToken = EQUAL; break; case '@': curToken = ATCHAR; break; case '\n': curToken = ENDLINE; break; default: curToken = TOKENERROR; } } break; case INCOMMENT: save = FALSE; if (c == EOF) state = DONE; else if (c == '}') state = START; break; case INCOMMENT_SINGLE: save = FALSE; if (c == '\n') { state = DONE; curToken = ENDLINE; } break; case INXCHAR: state = DONE; if (c == 'X') curToken = XCHAR; else { ungetNextChar(); /* backup in the input */ save = FALSE; curToken = TOKENERROR; } break; case INSHARP: if (!isxdigit(c)) { ungetNextChar(); /* backup in the input */ save = FALSE; state = DONE; if (tokenStrIdx > 1) curToken = SHARP; else curToken = TOKENERROR; } break; case INSTRING: save = TRUE; if (c == ']') { ungetNextChar(); /* backup in the input */ save = FALSE; state = DONE; curToken = STRING; } break; case INID: if (!IsName(c) && !isdigit(c)) { ungetNextChar(); /* backup in the input */ save = FALSE; state = DONE; curToken = ID; } break; case INHEXNUM: if (!isxdigit(c)) { ungetNextChar(); /* backup in the input */ save = FALSE; state = DONE; curToken = HEXNUM; } break; case DONE: default: //cout<<"Scanner Bug: state= "<<state<<endl; state = DONE; curToken = TOKENERROR; break; } if ((save) && (tokenStrIdx <= MAXTOKENLEN)) { //tokenString[tokenStrIdx++] = c; tokenString+=c; tokenStrIdx++; //cout<<tokenStrIdx<<" "<<c<<"\n"; } if (state == DONE) { //tokenString[tokenStrIdx++] = '\0'; if (curToken == ID) curToken = reservedLookup(tokenString); /*if (curToken == ID) { if (IsHex(tokenString)) curToken = HEXNUM; else curToken = ID; }*/ } } TokenPos++; //cout<<"\t"<<lineno<<": "; //cout<<tokenString<<" "<<TokenPos<<endl; //printToken(curToken, tokenString); CurrentToken = curToken; return curToken; }
bool Reader::readToken(Token& token) { skipSpaces(); token.start_ = current_; Char c = getNextChar(); bool ok = true; switch (c) { case '{': token.type_ = tokenObjectBegin; break; case '}': token.type_ = tokenObjectEnd; break; case '[': token.type_ = tokenArrayBegin; break; case ']': token.type_ = tokenArrayEnd; break; case '"': token.type_ = tokenString; ok = readString(); break; case '/': token.type_ = tokenComment; ok = readComment(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': token.type_ = tokenNumber; readNumber(); break; case 't': token.type_ = tokenTrue; ok = match("rue", 3); break; case 'f': token.type_ = tokenFalse; ok = match("alse", 4); break; case 'n': token.type_ = tokenNull; ok = match("ull", 3); break; case ',': token.type_ = tokenArraySeparator; break; case ':': token.type_ = tokenMemberSeparator; break; case 0: token.type_ = tokenEndOfStream; break; default: ok = false; break; } if (!ok) token.type_ = tokenError; token.end_ = current_; return true; }
//--------------------------------------- // This function is tokenizer (aka lexical analyzer, scanner) TOKEN *getNextToken(void) { char buffer[80]; // buffer to build image int bufferx = 0; // index into buffer TOKEN *t; // will point to taken struct // skip whitespace while (isspace(currentChar)) getNextChar(); // construct token to be returned to parser t = (TOKEN *)malloc(sizeof(TOKEN)); t -> next = NULL; // save start-of-token position t -> beginLine = currentLineNumber; t -> beginColumn = currentColumnNumber; // check for END if (currentChar == END) { t -> image = "<END>"; t -> endLine = currentLineNumber; t -> endColumn = currentColumnNumber; t -> kind = END; } else // check for unsigned int if (isdigit(currentChar)) { do { buffer[bufferx++] = currentChar; t -> endLine = currentLineNumber; t -> endColumn = currentColumnNumber; getNextChar(); } while (isdigit(currentChar)); buffer[bufferx] = '\0'; // term string with null char // save buffer as String in token.image // strdup allocates space and copies string to it t -> image = strdup(buffer); t -> kind = UNSIGNED; } else // check for identifier if (isalpha(currentChar)) { do // build token image in buffer { buffer[bufferx++] = currentChar; t -> endLine = currentLineNumber; t -> endColumn = currentColumnNumber; getNextChar(); } while (isalnum(currentChar)); buffer[bufferx] = '\0'; // save buffer as String in token.image t -> image = strdup(buffer); // check if keyword if (!strcmp(t -> image, "println")) t -> kind = PRINTLN; else // not a keyword so kind is ID t -> kind = ID; } else // process single-character token { switch(currentChar) { case '=': t -> kind = ASSIGN; break; case ';': t -> kind = SEMICOLON; break; case '(': t -> kind = LEFTPAREN; break; case ')': t -> kind = RIGHTPAREN; break; case '+': t -> kind = PLUS; break; case '-': t -> kind = MINUS; break; case '*': t -> kind = TIMES; break; default: t -> kind = ERROR; break; } // save currentChar as string in image field t -> image = (char *)malloc(2); // get space (t -> image)[0] = currentChar; // move in string (t -> image)[1] = '\0'; // save end-of-token position t -> endLine = currentLineNumber; t -> endColumn = currentColumnNumber; getNextChar(); // read beyond end of token } // token trace appears as comments in output file // set debug to true to check tokenizer if (debug) fprintf(outFile, "; kd=%3d bL=%3d bC=%3d eL=%3d eC=%3d im=%s\n", t -> kind, t -> beginLine, t -> beginColumn, t -> endLine, t -> endColumn, t -> image); return t; // return token to parser }
int main(int argc,char* args[]) { char* filename=NULL; register FILE *fp = stdin; int colorshema=0; //0:normal, 1:black, 2:pink int iso=-1; //utf8 char stylesheet=0; char htop_fix=0; char line_break=0; char* title=NULL; char word_wrap=0; char no_header=0; printf("<pre class=\"diffed\">\n"); //Begin of Conversion unsigned int c; int fc = -1; //Standard Foreground Color //IRC-Color+8 int bc = -1; //Standard Background Color //IRC-Color+8 int ul = 0; //Not underlined int bo = 0; //Not bold int bl = 0; //No Blinking int ofc,obc,oul,obo,obl; //old values int line=0; int momline=0; int newline=-1; int temp; while ((c=fgetc(fp)) != EOF) { if (c=='\033') { //Saving old values ofc=fc; obc=bc; oul=ul; obo=bo; obl=bl; //Searching the end (a letter) and safe the insert: c='0'; char buffer[1024]; int counter=0; while ((c<'A') || ((c>'Z') && (c<'a')) || (c>'z')) { c=getNextChar(fp); buffer[counter]=c; if (c=='>') //end of htop break; counter++; if (counter>1022) break; } buffer[counter-1]=0; pelem elem; switch (c) { case 'm': //printf("\n%s\n",buffer); //DEBUG elem=parseInsert(buffer); pelem momelem=elem; while (momelem!=NULL) { //jump over zeros int mompos=0; while (mompos<momelem->digitcount && momelem->digit[mompos]==0) mompos++; if (mompos==momelem->digitcount) //only zeros => delete all { bo=0;ul=0;bl=0;fc=-1;bc=-1; } else { switch (momelem->digit[mompos]) { case 1: bo=1; break; case 2: if (mompos+1<momelem->digitcount) switch (momelem->digit[mompos+1]) { case 1: //Reset blink and bold bo=0; bl=0; break; case 4: //Reset underline ul=0; break; case 7: //Reset Inverted temp = bc; if (fc == -1 || fc == 9) { if (colorshema!=1) bc = 0; else bc = 7; } else bc = fc; if (temp == -1 || temp == 9) { if (colorshema!=1) fc = 7; else fc = 0; } else fc = temp; break; } break; case 3: if (mompos+1<momelem->digitcount) fc=momelem->digit[mompos+1]; break; case 4: if (mompos+1==momelem->digitcount) ul=1; else bc=momelem->digit[mompos+1]; break; case 5: bl=1; break; case 7: //TODO: Inverse temp = bc; if (fc == -1 || fc == 9) { if (colorshema!=1) bc = 0; else bc = 7; } else bc = fc; if (temp == -1 || temp == 9) { if (colorshema!=1) fc = 7; else fc = 0; } else fc = temp; break; } } momelem=momelem->next; } deleteParse(elem); break; case 'H': if (htop_fix) //a little dirty ... { elem=parseInsert(buffer); pelem second=elem->next; if (second==NULL) second=elem; newline=second->digit[0]-1; if (second->digitcount>1) newline=(newline+1)*10+second->digit[1]-1; if (second->digitcount>2) newline=(newline+1)*10+second->digit[2]-1; deleteParse(elem); if (newline<line) line_break=1; } break; } if (htop_fix) if (line_break) { for (;line<80;line++) printf(" "); } //Checking the differences if ((fc!=ofc) || (bc!=obc) || (ul!=oul) || (bo!=obo) || (bl!=obl)) //ANY Change { if ((ofc!=-1) || (obc!=-1) || (oul!=0) || (obo!=0) || (obl!=0)) printf("</span>"); if ((fc!=-1) || (bc!=-1) || (ul!=0) || (bo!=0) || (bl!=0)) { if (stylesheet) printf("<span class=\""); else printf("<span style=\""); switch (fc) { case 0: if (stylesheet) printf("dimgray "); else printf("color:dimgray;"); break; //Black case 1: if (stylesheet) printf("red "); else printf("color:red;text-decoration:line-through;"); break; //Red case 2: if (stylesheet) printf("green "); else if (colorshema!=1) printf("color:green;"); else printf("color:lime;"); break; //Green case 3: if (stylesheet) printf("yellow "); else if (colorshema!=1) printf("color:olive;"); else printf("color:yellow;"); break; //Yellow case 4: if (stylesheet) printf("blue "); else if (colorshema!=1) printf("color:blue;"); else printf("color:#3333FF;"); break; //Blue case 5: if (stylesheet) printf("purple "); else if (colorshema!=1) printf("color:purple;"); else printf("color:fuchsia;"); break; //Purple case 6: if (stylesheet) printf("cyan "); else if (colorshema!=1) printf("color:teal;"); else printf("color:aqua;"); break; //Cyan case 7: if (stylesheet) printf("white "); else if (colorshema!=1) printf("color:gray;"); else printf("color:white;"); break; //White case 9: if (stylesheet) printf("reset "); else if (colorshema!=1) printf("color:black;"); else printf("color:white;"); break; //Reset } switch (bc) { case 0: if (stylesheet) printf("bg-black "); else printf("background-color:black;"); break; //Black case 1: if (stylesheet) printf("bg-red "); else printf("background-color:red;"); break; //Red case 2: if (stylesheet) printf("bg-green "); else if (colorshema!=1) printf("background-color:green;"); else printf("background-color:lime;"); break; //Green case 3: if (stylesheet) printf("bg-yellow "); else if (colorshema!=1) printf("background-color:olive;"); else printf("background-color:yellow;"); break; //Yellow case 4: if (stylesheet) printf("bg-blue "); else if (colorshema!=1) printf("background-color:blue;"); else printf("background-color:#3333FF;"); break; //Blue case 5: if (stylesheet) printf("bg-purple "); else if (colorshema!=1) printf("background-color:purple;"); else printf("background-color:fuchsia;"); break; //Purple case 6: if (stylesheet) printf("bg-cyan "); else if (colorshema!=1) printf("background-color:teal;"); else printf("background-color:aqua;"); break; //Cyan case 7: if (stylesheet) printf("bg-white "); else if (colorshema!=1) printf("background-color:gray;"); else printf("background-color:white;"); break; //White case 9: if (stylesheet) printf("bg-reset "); else if (colorshema==1) printf("background-color:black;"); else if (colorshema==2) printf("background-color:pink;"); else printf("background-color:white;"); break; //Reset } if (ul) { if (stylesheet) printf("underline "); else printf("text-decoration:underline;"); } if (bo) { if (stylesheet) printf("bold "); else printf("font-weight:bold;"); } if (bl) { if (stylesheet) printf("blink "); else printf("text-decoration:blink;"); } printf("\">"); } } } else if (c==13 && htop_fix) { for (;line<80;line++) printf(" "); line=0; momline++; printf("\n"); } else if (c!=8) { line++; if (line_break) { printf("\n"); line=0; line_break=0; momline++; } if (newline>=0) { while (newline>line) { printf(" "); line++; } newline=-1; } switch (c) { case '&': printf("&"); break; case '\"': printf("""); break; case '<': printf("<"); break; case '>': printf(">"); break; case '\n':case 13: momline++; line=0; default: printf("%c",c); } if (iso>0) //only at ISOS if ((c & 128)==128) //first bit set => there must be followbytes { int bits=2; if ((c & 32)==32) bits++; if ((c & 16)==16) bits++; for (int meow=1;meow<bits;meow++) printf("%c",getNextChar(fp)); } } } //Footer if ((fc!=-1) || (bc!=-1) || (ul!=0) || (bo!=0) || (bl!=0)) printf("</span>\n"); printf("</pre>\n"); if (fp!=stdin) fclose(fp); return 0; }
int main(int argc,char* args[]) { char* filename=NULL; register FILE *fp = stdin; int colorshema=0; //0:normal, 1:black, 2:pink int iso=-1; //utf8 char stylesheet=0; char htop_fix=0; char line_break=0; char* title=NULL; char word_wrap=0; char no_header=0; //Searching Parameters for (int p = 1;p<argc;p++) { if ((strcmp(args[p],(char*)"--help")==0) || (strcmp(args[p],(char*)"-h")==0) || (strcmp(args[p],(char*)"-?")==0)) { printf("\033[1;31mAnsi Html Adapter\033[0m Version "AHA_VERSION"\n"); printf("\033[1maha\033[0m takes SGR-colored Input and prints W3C conform HTML-Code\n"); printf("use: \033[1maha\033[0m <\033[4moptions\033[0m> [\033[4m-f file\033[0m]\n"); printf(" \033[1maha\033[0m (\033[4m--help\033[0m|\033[4m-h\033[0m|\033[4m-?\033[0m)\n"); printf("\033[1maha\033[0m reads the Input from a file or stdin and writes HTML-Code to stdout\n"); printf("\033[4moptions\033[0m: --black, -b: \033[1;30m\033[1;47mBlack\033[0m Background and \033[1;37mWhite\033[0m \"standard color\"\n"); printf(" --pink, -p: \033[1;35mPink\033[0m Background\n"); printf(" --stylesheet, -s: Use a stylesheet instead of inline styles\n"); printf(" --iso X, -i X: Uses ISO 8859-X instead of utf-8. X must be 1..16\n"); printf(" --title X, -t X: Gives the html output the title \"X\" instead of\n"); printf(" \"stdin\" or the filename\n"); printf(" --line-fix, -l: Uses a fix for inputs using control sequences to\n"); printf(" change the cursor position like htop. It's a hot fix,\n"); printf(" it may not work with any program like htop. Example:\n"); printf(" \033[1mecho\033[0m q | \033[1mhtop\033[0m | \033[1maha\033[0m -l > htop.htm\n"); printf(" --word-wrap, -w: Wrap long lines in the html file. This works with\n"); printf(" CSS3 supporting browsers as well as many older ones.\n"); printf(" --no-header, -n: Don't include header into generated HTML,\n"); printf(" useful for inclusion in full HTML files.\n"); printf("Example: \033[1maha\033[0m --help | \033[1maha\033[0m --black > aha-help.htm\n"); printf(" Writes this help text to the file aha-help.htm\n\n"); printf("Copyleft \033[1;32mAlexander Matthes\033[0m aka \033[4mZiz\033[0m 2012\n"); printf(" \033[5;[email protected]\033[0m\n"); printf(" \033[5;36mhttp://ziz.delphigl.com/tool_aha.php\033[0m\n"); printf("This application is subject to the \033[1;34mMPL\033[0m or \033[1;34mLGPL\033[0m.\n"); return 0; } else if ((strcmp(args[p],(char*)"--version")==0) || (strcmp(args[p],(char*)"-v")==0)) { printf("\033[1;31mAnsi Html Adapter\033[0m Version "AHA_VERSION"\n"); return 0; } else if ((strcmp(args[p],"--title")==0) || (strcmp(args[p],"-t")==0)) { if (p+1>=argc) { fprintf(stderr,"No title given!\n"); return 0; } title=args[p+1]; p++; } else if ((strcmp(args[p],"--line-fix")==0) || (strcmp(args[p],"-l")==0)) { htop_fix=1; } else if ((strcmp(args[p],"--no-header")==0) || (strcmp(args[p],"-n")==0)) { no_header=1; } else if ((strcmp(args[p],"--word-wrap")==0) || (strcmp(args[p],"-w")==0)) word_wrap=1; else if ((strcmp(args[p],"--black")==0) || (strcmp(args[p],"-b")==0)) colorshema=1; else if ((strcmp(args[p],"--pink")==0) || (strcmp(args[p],"-p")==0)) colorshema=2; else if ((strcmp(args[p],"--stylesheet")==0) || (strcmp(args[p],"-s")==0)) stylesheet=1; else if ((strcmp(args[p],"--iso")==0) || (strcmp(args[p],"-i")==0)) { if (p+1>=argc) { fprintf(stderr,"No ISO code given!\n"); return 0; } iso = atoi(args[p+1]); if (iso<1 || iso>16) { fprintf(stderr,"not a valid ISO code: ISO 8859-%s\n",args[p+1]); return 0; } p++; } else if (strcmp(args[p],"-f")==0) { if (p+1>=argc) { fprintf(stderr,"no file to read given after \"-f\"!\n"); return 0; } fp = fopen(args[p+1],"r"); if (fp==NULL) { fprintf(stderr,"file \"%s\" not found!\n",args[p+1]); return 0; } p++; filename=args[p]; } else { fprintf(stderr,"Unknown parameter \"%s\"\n",args[p]); return 0; } } if (no_header == 0) { //Header: if (iso<0) printf("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n"); else printf("<?xml version=\"1.0\" encoding=\"ISO-8859-%i\" ?><!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n",iso); printf("<!-- This file was created with the aha Ansi HTML Adapter. http://ziz.delphigl.com/tool_aha.php -->\n"); printf("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n"); printf("<head>\n<meta http-equiv=\"Content-Type\" content=\"application/xml+xhtml; charset=UTF-8\" />\n"); if (title) printf("<title>%s</title>\n",title); else { if (filename==NULL) printf("<title>stdin</title>\n"); else printf("<title>%s</title>\n",filename); } if (stylesheet) { printf("<style type=\"text/css\">\n"); switch (colorshema) { case 1: printf("body {color: white; background-color: black;}\n"); printf(".reset {color: white;}\n"); printf(".bg-reset {background-color: black;}\n"); break; case 2: printf("body {background-color: pink;}\n"); printf(".reset {color: black;}\n"); printf(".bg-reset {background-color: pink;}\n"); break; default: printf(".reset {color: black;}\n"); printf(".bg-reset {background-color: white;}\n"); } if (colorshema!=1) { printf(".black {color: black;}\n"); printf(".red {color: red;}\n"); printf(".green {color: green;}\n"); printf(".yellow {color: olive;}\n"); printf(".blue {color: blue;}\n"); printf(".purple {color: purple;}\n"); printf(".cyan {color: teal;}\n"); printf(".white {color: gray;}\n"); printf(".bg-black {background-color: black;}\n"); printf(".bg-red {background-color: red;}\n"); printf(".bg-green {background-color: green;}\n"); printf(".bg-yellow {background-color: olive;}\n"); printf(".bg-blue {background-color: blue;}\n"); printf(".bg-purple {background-color: purple;}\n"); printf(".bg-cyan {background-color: teal;}\n"); printf(".bg-white {background-color: gray;}\n"); } else { printf(".black {color: black;}\n"); printf(".red {color: red;}\n"); printf(".green {color: lime;}\n"); printf(".yellow {color: yellow;}\n"); printf(".blue {color: #3333FF;}\n"); printf(".purple {color: fuchsia;}\n"); printf(".cyan {color: aqua;}\n"); printf(".white {color: white;}\n"); printf(".bg-black {background-color: black;}\n"); printf(".bg-red {background-color: red;}\n"); printf(".bg-green {background-color: lime;}\n"); printf(".bg-yellow {background-color: yellow;}\n"); printf(".bg-blue {background-color: #3333FF;}\n"); printf(".bg-purple {background-color: fuchsia;}\n"); printf(".bg-cyan {background-color: aqua;}\n"); printf(".bg-white {background-color: white;}\n"); } printf(".underline {text-decoration: underline;}\n"); printf(".bold {font-weight: bold;}\n"); printf(".blink {text-decoration: blink;}\n"); printf("</style>\n"); } if (word_wrap) { printf("<style type=\"text/css\">pre {white-space: pre-wrap; white-space: -moz-pre-wrap !important;\n"); printf("white-space: -pre-wrap; white-space: -o-pre-wrap; word-wrap: break-word;}</style>\n"); } printf("</head>\n"); if (stylesheet || ! colorshema) printf("<body>\n"); else { switch (colorshema) { case 1: printf("<body style=\"color:white; background-color:black\">\n"); break; case 2: printf("<body style=\"background-color:pink\">\n"); break; } } //default values: //printf("<div style=\"font-family:monospace; white-space:pre\">"); printf("<pre>\n"); } //Begin of Conversion unsigned int c; int fc = -1; //Standard Foreground Color //IRC-Color+8 int bc = -1; //Standard Background Color //IRC-Color+8 int ul = 0; //Not underlined int bo = 0; //Not bold int bl = 0; //No Blinking int ofc,obc,oul,obo,obl; //old values int line=0; int momline=0; int newline=-1; int temp; while ((c=fgetc(fp)) != EOF) { if ((c=='\033')) { //Saving old values ofc=fc; obc=bc; oul=ul; obo=bo; obl=bl; //Searching the end (a letter) and safe the insert: c='0'; char buffer[1024]; int counter=0; while ((c<'A') || ((c>'Z') && (c<'a')) || (c>'z')) { c=getNextChar(fp); buffer[counter]=c; if (c=='>') //end of htop break; counter++; if (counter>1022) break; } buffer[counter-1]=0; pelem elem; switch (c) { case 'm': //printf("\n%s\n",buffer); //DEBUG elem=parseInsert(buffer); pelem momelem=elem; while (momelem!=NULL) { //jump over zeros int mompos=0; while (mompos<momelem->digitcount && momelem->digit[mompos]==0) mompos++; if (mompos==momelem->digitcount) //only zeros => delete all { bo=0;ul=0;bl=0;fc=-1;bc=-1; } else { switch (momelem->digit[mompos]) { case 1: bo=1; break; case 2: if (mompos+1<momelem->digitcount) switch (momelem->digit[mompos+1]) { case 1: //Reset blink and bold bo=0; bl=0; break; case 4: //Reset underline ul=0; break; case 7: //Reset Inverted temp = bc; if (fc == -1 || fc == 9) { if (colorshema!=1) bc = 0; else bc = 7; } else bc = fc; if (temp == -1 || temp == 9) { if (colorshema!=1) fc = 7; else fc = 0; } else fc = temp; break; } break; case 3: if (mompos+1<momelem->digitcount) fc=momelem->digit[mompos+1]; break; case 4: if (mompos+1==momelem->digitcount) ul=1; else bc=momelem->digit[mompos+1]; break; case 5: bl=1; break; case 7: //TODO: Inverse temp = bc; if (fc == -1 || fc == 9) { if (colorshema!=1) bc = 0; else bc = 7; } else bc = fc; if (temp == -1 || temp == 9) { if (colorshema!=1) fc = 7; else fc = 0; } else fc = temp; break; } } momelem=momelem->next; } deleteParse(elem); break; case 'H': if (htop_fix) //a little dirty ... { elem=parseInsert(buffer); pelem second=elem->next; if (second==NULL) second=elem; newline=second->digit[0]-1; if (second->digitcount>1) newline=(newline+1)*10+second->digit[1]-1; if (second->digitcount>2) newline=(newline+1)*10+second->digit[2]-1; deleteParse(elem); if (newline<line) line_break=1; } break; } if (htop_fix) if (line_break) { for (;line<80;line++) printf(" "); } //Checking the differences if ((fc!=ofc) || (bc!=obc) || (ul!=oul) || (bo!=obo) || (bl!=obl)) //ANY Change { if ((ofc!=-1) || (obc!=-1) || (oul!=0) || (obo!=0) || (obl!=0)) printf("</span>"); if ((fc!=-1) || (bc!=-1) || (ul!=0) || (bo!=0) || (bl!=0)) { if (stylesheet) printf("<span class=\""); else printf("<span style=\""); switch (fc) { case 0: if (stylesheet) printf("black "); else printf("color:black;"); break; //Black case 1: if (stylesheet) printf("red "); else printf("color:red;"); break; //Red case 2: if (stylesheet) printf("green "); else if (colorshema!=1) printf("color:green;"); else printf("color:lime;"); break; //Green case 3: if (stylesheet) printf("yellow "); else if (colorshema!=1) printf("color:olive;"); else printf("color:yellow;"); break; //Yellow case 4: if (stylesheet) printf("blue "); else if (colorshema!=1) printf("color:blue;"); else printf("color:#3333FF;"); break; //Blue case 5: if (stylesheet) printf("purple "); else if (colorshema!=1) printf("color:purple;"); else printf("color:fuchsia;"); break; //Purple case 6: if (stylesheet) printf("cyan "); else if (colorshema!=1) printf("color:teal;"); else printf("color:aqua;"); break; //Cyan case 7: if (stylesheet) printf("white "); else if (colorshema!=1) printf("color:gray;"); else printf("color:white;"); break; //White case 9: if (stylesheet) printf("reset "); else if (colorshema!=1) printf("color:black;"); else printf("color:white;"); break; //Reset } switch (bc) { case 0: if (stylesheet) printf("bg-black "); else printf("background-color:black;"); break; //Black case 1: if (stylesheet) printf("bg-red "); else printf("background-color:red;"); break; //Red case 2: if (stylesheet) printf("bg-green "); else if (colorshema!=1) printf("background-color:green;"); else printf("background-color:lime;"); break; //Green case 3: if (stylesheet) printf("bg-yellow "); else if (colorshema!=1) printf("background-color:olive;"); else printf("background-color:yellow;"); break; //Yellow case 4: if (stylesheet) printf("bg-blue "); else if (colorshema!=1) printf("background-color:blue;"); else printf("background-color:#3333FF;"); break; //Blue case 5: if (stylesheet) printf("bg-purple "); else if (colorshema!=1) printf("background-color:purple;"); else printf("background-color:fuchsia;"); break; //Purple case 6: if (stylesheet) printf("bg-cyan "); else if (colorshema!=1) printf("background-color:teal;"); else printf("background-color:aqua;"); break; //Cyan case 7: if (stylesheet) printf("bg-white "); else if (colorshema!=1) printf("background-color:gray;"); else printf("background-color:white;"); break; //White case 9: if (stylesheet) printf("bg-reset "); else if (colorshema==1) printf("background-color:black;"); else if (colorshema==2) printf("background-color:pink;"); else printf("background-color:white;"); break; //Reset } if (ul) if (stylesheet) printf("underline "); else printf("text-decoration:underline;"); if (bo) if (stylesheet) printf("bold "); else printf("font-weight:bold;"); if (bl) if (stylesheet) printf("blink "); else printf("text-decoration:blink;"); printf("\">"); } } } else if (c==13 && htop_fix) { for (;line<80;line++) printf(" "); line=0; momline++; printf("\n"); } else if (c!=8) { line++; if (line_break) { printf("\n"); line=0; line_break=0; momline++; } if (newline>=0) { while (newline>line) { printf(" "); line++; } newline=-1; } switch (c) { case '&': printf("&"); break; case '\"': printf("""); break; case '<': printf("<"); break; case '>': printf(">"); break; case '\n':case 13: momline++; line=0; default: printf("%c",c); } if (iso>0) //only at ISOS if ((c & 128)==128) //first bit set => there must be followbytes { int bits=2; if ((c & 32)==32) bits++; if ((c & 16)==16) bits++; for (int meow=1;meow<bits;meow++) printf("%c",getNextChar(fp)); } } } //Footer if ((fc!=-1) || (bc!=-1) || (ul!=0) || (bo!=0) || (bl!=0)) printf("</span>\n"); if (no_header == 0) { printf("</pre>\n"); printf("</body>\n"); printf("</html>\n"); } if (fp!=stdin) fclose(fp); return 0; }
PUBLIC int ecGetToken(EcCompiler *cp) { EcToken *tp; EcStream *stream; int c; if ((tp = getLexToken(cp)) == NULL) { return T_ERR; } if (tp->tokenId) { return tp->tokenId; } stream = cp->stream; while (1) { c = getNextChar(stream); /* Overloadable operators + - ~ * / % < > <= >= == << >> >>> & | === != !== TODO FUTURE, we could allow also: ".", "[", "(" and unary !, ^ */ switch (c) { default: if (isdigit((uchar) c)) { return makeNumberToken(cp, tp, c); } else if (c == '\\') { c = getNextChar(stream); if (c == '\n') { break; } putBackChar(stream, c); c = '\n'; } if (isalpha((uchar) c) || c == '_' || c == '\\' || c == '$') { return makeAlphaToken(cp, tp, c); } return makeToken(tp, 0, T_ERR, 0); case -1: return makeToken(tp, 0, T_ERR, 0); case 0: if (stream->flags & EC_STREAM_EOL) { return makeToken(tp, 0, T_NOP, 0); } return makeToken(tp, 0, T_EOF, 0); case ' ': case '\f': case '\t': case '\v': case 0xA0: /* No break space */ break; case '\r': case '\n': break; case '"': case '\'': return makeQuotedToken(cp, tp, c); case '#': return makeToken(tp, c, T_HASH, 0); case '[': // EJS extension to consider this an operator return makeToken(tp, c, T_LBRACKET, G_OPERATOR); case ']': return makeToken(tp, c, T_RBRACKET, 0); case '(': // EJS extension to consider this an operator return makeToken(tp, c, T_LPAREN, G_OPERATOR); case ')': return makeToken(tp, c, T_RPAREN, 0); case '{': return makeToken(tp, c, T_LBRACE, 0); case '}': return makeToken(tp, c, T_RBRACE, 0); case '@': return makeToken(tp, c, T_AT, 0); case ';': return makeToken(tp, c, T_SEMICOLON, 0); case ',': return makeToken(tp, c, T_COMMA, 0); case '?': return makeToken(tp, c, T_QUERY, 0); case '~': return makeToken(tp, c, T_TILDE, G_OPERATOR); case '+': c = getNextChar(stream); if (c == '+') { addCharToToken(tp, '+'); return makeToken(tp, c, T_PLUS_PLUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '+'); return makeSubToken(tp, c, T_ASSIGN, T_PLUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '+', T_PLUS, G_OPERATOR); case '-': c = getNextChar(stream); if (isdigit((uchar) c)) { putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); } else if (c == '-') { addCharToToken(tp, '-'); return makeToken(tp, c, T_MINUS_MINUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '-'); return makeSubToken(tp, c, T_ASSIGN, T_MINUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); case '*': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '*'); return makeSubToken(tp, c, T_ASSIGN, T_MUL_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '*', T_MUL, G_OPERATOR); case '/': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '/'); return makeSubToken(tp, c, T_ASSIGN, T_DIV_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { addCharToToken(tp, '/'); return makeToken(tp, c, T_SLASH_GT, G_OPERATOR); } else if (c == '*' || c == '/') { /* C and C++ comments */ if (getComment(cp, tp, c) < 0) { return tp->tokenId; } /* Doc comments are: [slash]**. The second "*' becomes the first char of the comment. Don't regard: [slash]*** (three stars) as a comment. */ if (cp->doc) { if (tp->text && tp->text[0] == '*' && tp->text[1] != '*') { cp->docToken = mprMemdup(tp->text, tp->length * sizeof(wchar)); } } initializeToken(tp, stream); break; } putBackChar(stream, c); return makeToken(tp, '/', T_DIV, G_OPERATOR); case '%': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '%'); return makeSubToken(tp, c, T_ASSIGN, T_MOD_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '%', T_MOD, G_OPERATOR); case '.': c = getNextChar(stream); if (c == '.') { c = getNextChar(stream); if (c == '.') { addStringToToken(tp, ".."); return makeToken(tp, c, T_ELIPSIS, 0); } putBackChar(stream, c); addCharToToken(tp, '.'); return makeToken(tp, '.', T_DOT_DOT, 0); #if FUTURE } else if (c == '<') { addCharToToken(tp, '.'); return makeToken(tp, c, T_DOT_LESS, 0); #endif } else if (isdigit((uchar) c)) { putBackChar(stream, c); return makeNumberToken(cp, tp, '.'); } putBackChar(stream, c); // EJS extension to consider this an operator return makeToken(tp, '.', T_DOT, G_OPERATOR); case ':': c = getNextChar(stream); if (c == ':') { addCharToToken(tp, ':'); return makeToken(tp, c, T_COLON_COLON, 0); } putBackChar(stream, c); return makeToken(tp, ':', T_COLON, 0); case '!': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "!="); return makeToken(tp, c, T_STRICT_NE, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '!'); return makeToken(tp, '=', T_NE, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '!', T_LOGICAL_NOT, G_OPERATOR); case '&': c = getNextChar(stream); if (c == '&') { addCharToToken(tp, '&'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_LOGICAL_AND, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, c, T_ASSIGN, T_BIT_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_BIT_AND, G_OPERATOR); case '<': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LE, G_OPERATOR); } else if (c == '<') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "<<"); return makeSubToken(tp, c, T_ASSIGN, T_LSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addCharToToken(tp, '<'); return makeToken(tp, c, T_LSH, G_OPERATOR); } else if (c == '/') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LT_SLASH, 0); } putBackChar(stream, c); return makeToken(tp, '<', T_LT, G_OPERATOR); case '=': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "=="); return makeToken(tp, c, T_STRICT_EQ, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '='); return makeToken(tp, c, T_EQ, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '=', T_ASSIGN, G_OPERATOR); case '>': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_GE, G_OPERATOR); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ZERO_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addStringToToken(tp, ">>"); return makeToken(tp, '>', T_RSH_ZERO, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '>'); return makeToken(tp, '>', T_RSH, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '>', T_GT, G_OPERATOR); case '^': c = getNextChar(stream); if (c == '^') { addCharToToken(tp, '^'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_LOGICAL_XOR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_BIT_XOR, G_OPERATOR); case '|': c = getNextChar(stream); if (c == '|') { addCharToToken(tp, '|'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_LOGICAL_OR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_BIT_OR, G_OPERATOR); } } }
Token Scanner::getNextToken() { bool matched = false; do { if (state_ != State::NONE) { matched = true; } switch (state_) { case State::NONE: getNextChar(); break; case State::END_OF_FILE: handleEOFState(); break; case State::IDENTIFIER: handleIdentifierState(); break; case State::NUMBER: handleNumberState(); break; case State::STRING: handleStringState(); break; case State::OPERATION: handleOperationState(); break; default: errorToken("Match token state error."); errorFlag_ = true; break; } if (state_ == State::NONE) { preprocess(); if (input_.eof()) { state_ = State::END_OF_FILE; } else { if (std::isalpha(currentChar_)) { state_ = State::IDENTIFIER; } // if it is digit or xdigit else if (std::isdigit(currentChar_) || (currentChar_ == '$' && std::isxdigit(peekChar()))) { state_ = State::NUMBER; } else if (currentChar_ == '\'') { state_ = State::STRING; } else { state_ = State::OPERATION; } } } } while (!matched); return token_; }
void Scanner::handleNumberState() { loc_ = getTokenLocation(); bool matched = false; bool isFloat = false; int numberBase = 10; if (currentChar_ == '$') { numberBase = 16; // eat $ and update currentChar_ getNextChar(); } enum class NumberState { INTERGER, FRACTION, EXPONENT, DONE }; NumberState numberState = NumberState::INTERGER; do { switch (numberState) { case NumberState::INTERGER: if (numberBase == 10) { handleDigit(); } else if (numberBase == 16) { handleXDigit(); } // maybe want to support octal... break; case NumberState::FRACTION: handleFraction(); isFloat = true; break; case NumberState::EXPONENT: handleExponent(); isFloat = true; break; case NumberState::DONE: break; default: errorToken("Match number state error."); errorFlag_ = true; break; } // change number state if (currentChar_ == '.') { numberState = NumberState::FRACTION; } else if (currentChar_ == 'E' || currentChar_ == 'e') { numberState = NumberState::EXPONENT; } else { numberState = NumberState::DONE; } } while (numberState != NumberState::DONE); if (isFloat) { makeToken(TokenType::REAL, TokenValue::UNRESERVED, loc_, std::stod(buffer_), buffer_); } else { makeToken(TokenType::INTEGER, TokenValue::UNRESERVED, loc_, std::stol(buffer_, 0, numberBase), buffer_); } }
Input::Input(FILE *stream) { this->stream = stream; getNextChar(); }
void Input::match(char c) { if (getChar() != c) Reporter::expected("'" + Cradle::toString(c) + "'", getChar()); getNextChar(); }
void MD_PZone::effectSlice(bool bIn) { if (bIn) { switch(_fsmState) { case INITIALISE: case GET_FIRST_CHAR: PRINT_STATE("I SLICE"); if ((_charCols = getFirstChar()) == 0) { _fsmState = END; break; } zoneClear(); _countCols = 0; _nextPos = ZONE_START_COL(_zoneStart); _endPos = _limitLeft; _fsmState = PUT_CHAR; break; case GET_NEXT_CHAR: // Load the next character from the font table PRINT_STATE("I SLICE"); // have we reached the end of the characters string? if ((_charCols = getNextChar()) == 0) { _fsmState = PAUSE; break; } _countCols = 0; _fsmState = PUT_CHAR; // !! fall through to next state to start displaying case PUT_CHAR: // display the next part of the character PRINT_STATE("I SLICE"); FSMPRINT(" - Next ", _endPos); FSMPRINT(", anim ", _nextPos); if (_cBuf[_countCols] == 0) { _nextPos = _endPos; // pretend we just animated it! } else { // clear the column and animate the next one if (_nextPos != _endPos) _MX->setColumn(_nextPos, EMPTY_BAR); _nextPos++; _MX->setColumn(_nextPos, DATA_BAR(_cBuf[_countCols])); } // set up for the next time if (_nextPos == _endPos) { _nextPos = ZONE_START_COL(_zoneStart); _countCols++; _endPos--; } if (_countCols == _charCols) _fsmState = GET_NEXT_CHAR; break; default: _fsmState = PAUSE; } } else // exiting { switch(_fsmState) { case PAUSE: PRINT_STATE("O SLICE"); _nextPos = _endPos = _limitLeft; _fsmState = PUT_CHAR; // fall through case GET_FIRST_CHAR: case GET_NEXT_CHAR: case PUT_CHAR: PRINT_STATE("O SLICE"); FSMPRINT(" - Next ", _endPos); FSMPRINT(", anim ", _nextPos); while(_MX->getColumn(_nextPos) == EMPTY_BAR && _endPos >= _limitRight) _nextPos = _endPos--; // pretend we just animated it! if (_endPos <= _limitRight) _fsmState = END; //reached the end else { // Move the column over to the left and blank out previous position if (_nextPos < ZONE_END_COL(_zoneEnd)) _MX->setColumn(_nextPos+1, _MX->getColumn(_nextPos)); _MX->setColumn(_nextPos, EMPTY_BAR); _nextPos++; // set up for the next time if (_nextPos == ZONE_END_COL(_zoneEnd)+1) _nextPos = _endPos--; } break; default: _fsmState = END; } } }
void INIFile::readfile(SDL_RWops * file) { SectionRoot = new Section("",0,0); Section* curSection = SectionRoot; std::string completeLine; int lineNum = 0; bool SyntaxError = false; INIFileLine* curLine = NULL; INIFileLine* newINIFileLine; Section* newSection; Key* newKey; bool readfinished = false; while(!readfinished) { lineNum++; completeLine = ""; unsigned char tmp; int readbytes; while(1) { readbytes = SDL_RWread(file,&tmp,1,1); if(readbytes <= 0) { readfinished = true; break; } else if(tmp == '\n') { break; } else if(tmp != '\r') { completeLine += tmp; } } const unsigned char* line = (const unsigned char*) completeLine.c_str(); SyntaxError = false; int ret; ret = getNextChar(line,0); if(ret == -1) { // empty line or comment newINIFileLine = new INIFileLine(completeLine); if(curLine == NULL) { FirstLine = newINIFileLine; curLine = newINIFileLine; } else { curLine->nextLine = newINIFileLine; newINIFileLine->prevLine = curLine; curLine = newINIFileLine; } } else { if(line[ret] == '[') { // section line int sectionstart = ret+1; int sectionend = skipName(line,ret+1); if((line[sectionend] != ']') || (getNextChar(line,sectionend+1) != -1)) { SyntaxError = true; } else { // valid section line newSection = new Section(completeLine,sectionstart,sectionend-sectionstart); if(curLine == NULL) { FirstLine = newSection; curLine = newSection; } else { curLine->nextLine = newSection; newSection->prevLine = curLine; curLine = newSection; } InsertSection(newSection); curSection = newSection; } } else { // might be key/value line int keystart = ret; int keyend = skipKey(line,keystart); if(keystart == keyend) { SyntaxError = true; } else { ret = getNextChar(line,keyend); if((ret == -1) ||(line[ret] != '=')) { SyntaxError = true; } else { int valuestart = getNextChar(line,ret+1); if(valuestart == -1) { SyntaxError = true; } else { if(line[valuestart] == '"') { // now get the next '"' int valueend = getNextQuote(line,valuestart+1); if((valueend == -1) || (getNextChar(line,valueend+1) != -1)) { SyntaxError = true; } else { // valid key/value line newKey = new Key(completeLine,keystart,keyend-keystart,valuestart+1,valueend-valuestart-1); if(FirstLine == NULL) { FirstLine = newKey; curLine = newKey; } else { curLine->nextLine = newKey; newKey->prevLine = curLine; curLine = newKey; } curSection->InsertKey(newKey); } } else { int valueend = skipValue(line,valuestart); if(getNextChar(line,valueend) != -1) { SyntaxError = true; } else { // valid key/value line newKey = new Key(completeLine,keystart,keyend-keystart,valuestart,valueend-valuestart); if(FirstLine == NULL) { FirstLine = newKey; curLine = newKey; } else { curLine->nextLine = newKey; newKey->prevLine = curLine; curLine = newKey; } curSection->InsertKey(newKey); } } } } } } } if(SyntaxError == true) { if(completeLine.size() < 100) { // there are some buggy ini-files which have a lot of waste at the end of the file // and it makes no sense to print all this stuff out. just skip it std::cerr << "INIFile: Syntax-Error in line " << lineNum << ":" << completeLine << " !" << std::endl; } // save this line as a comment newINIFileLine = new INIFileLine(completeLine); if(curLine == NULL) { FirstLine = newINIFileLine; curLine = newINIFileLine; } else { curLine->nextLine = newINIFileLine; newINIFileLine->prevLine = curLine; curLine = newINIFileLine; } } } }
int getNextToken(FILE* fd, token TToken) { int c, cx; int actState = sSTART; int fcv = 0; int tokType; int tokInt; double tokDouble; char *strBuffer; strBuffer = malloc(baseStringLength); // we alloc only to "baseStringLength", because size of char is one by definition if(strBuffer == NULL) { fprintf(stderr, "Allocation error! (strBuffer)\n"); errorHandler(errInt); } char *tmpBuffer; // reallocation purposes only char *entityBuffer; entityBuffer = malloc(baseStringLength); // same as few lines up if(entityBuffer == NULL) { fprintf(stderr, "Alocation error! (entityBuffer)\n"); errorHandler(errInt); } bool forceTokenSend = false; bool inComment = false; bool terminateLoop = false; bool fseeker = true; bool numberIntCase = false; bool numberDoubleCase = false; bool notAllowedChar = false; bool inString = false; bool apostrof = false; bool entity = false; int skipChar = 0; int efcv = -1; int entityID; // I actually hate myself for doing this, but time is the time bool signContained = false; bool eContained = false; bool dotContained = false; char *strtolErrPtr; // error string from strtol() comes here char *strtodErrPtr; // same here for strtod() while( 1 ) { c = fgetc(fd); cx = getNextChar(fd); // spaces if(isspace(c) && actState == sSTART) { continue; } // commentaries if(c == '{' && actState != sSTRING) { inComment = true; continue; } if(inComment) { if(c == '}') { inComment = false; //printf("Commentary skipped!\n"); } if(c == EOF) { fprintf(stderr, "Unexpected end of FILE YOU NIGGER!\n"); errorHandler(errLex); } continue; } if(cx == EOF || cx == -1) { fseeker = false; } switch(actState) { case 999: break; case sSTART: if(isalpha(c) || c == '_') { actState = sIDENT; strBuffer[fcv] = c; break; } if(c == '+' || c == '.' || c == '-' || c == '*' || c == '/' || c == '(' || c == ')' || c == ',' || c == '=' || c== ';' || /* tyto znaky mohou vést na double op*/ c == ':' || c == '<' || c == '>') { actState = sSINGLEOPER; strBuffer[fcv] = c; break; } if(c == APOSTROF_ASCII) { actState = sSTRING; apostrof = true; strBuffer[fcv] = c; fcv--; // snížení indexu ve stringu break; } if( c >= '0' && c <= '9') { actState = sNUMBER; strBuffer[fcv] = c; break; } actState = UNEXPECTED_CHAR; // dostaneme-li se sem, víme, že znak není povolen, tedy vyhodíme hlášku a skončíme. break; case sIDENT: tokType = t_var_id; if(isalpha(c) || c == '_' || isdigit(c)) { strBuffer[fcv] = c; // if the loaded char respond to the mask, we extend outcoming value of the token } else // if character does not respond to the mask, we set terminator on true causing while interuption (and function end) { strBuffer[fcv] = '\0'; // making sure that string is null terminated... you know that joke ... :) terminateLoop = true; if(c == '(') { tokType = t_fun_id; } else { while(isspace(c)) { c = fgetc(fd); } if(c == '(') { tokType = t_fun_id; } } fseek(fd, -1, SEEK_CUR); fseeker = false; } break; case sSINGLEOPER: // this case actually handles even doubleOperators // when we get here we know that it is truely single operator if(strBuffer[fcv-1] == ':') // in this case we can expect these ":", ":=" { if(c == '=') // for ":=" case { strBuffer[fcv] = c; // we add actual char to the string buffer strBuffer[fcv+1] = '\0'; // and null terminate that string tokType = t_assign; // set up token type on t_assign terminateLoop = true; // decide we will terminate the loop fseeker = false; // but we won't fseek back (no need) break; } else // this is the case for just":" { // all of the contents here and bellow is analogical strBuffer[fcv] = '\0'; tokType = t_colon; terminateLoop = true; break; } } if(strBuffer[fcv-1] == '<') // expectable cases "<", "<=", "<>" { if(c == '=') // for "<=" case { strBuffer[fcv] = c; strBuffer[fcv+1] = '\0'; terminateLoop = true; tokType = t_lesseq; fseeker = false; break; } else { if(c == '>') // for "<>" case { strBuffer[fcv] = c; strBuffer[fcv+1] = '\0'; terminateLoop = true; tokType = t_nequal; fseeker = false; break; } else // and finally for just "<" case { strBuffer[fcv] = '\0'; tokType = t_less; terminateLoop = true; break; } } } if(strBuffer[fcv-1] == '>') // expectable cases ">", ">=" { if(c == '=') // case for ">=" { strBuffer[fcv] = c; strBuffer[fcv+1] = '\0'; tokType = t_moreeq; terminateLoop = true; fseeker = false; break; } else // and case for simple ">" { strBuffer[fcv] = '\0'; tokType = t_more; terminateLoop = true; break; } } // here we go only for one char operators if(strBuffer[fcv-1] == ';') // semicolon case { strBuffer[fcv] = '\0'; tokType = t_semicolon; terminateLoop = true; break; } if(strBuffer[fcv-1] == '.') { strBuffer[fcv] = '\0'; tokType = t_period; terminateLoop = true; break; } if(strBuffer[fcv-1] == ',') { strBuffer[fcv] = '\0'; tokType = t_comma; terminateLoop = true; break; } if(strBuffer[fcv-1] == '(') { strBuffer[fcv] == '\0'; tokType = t_l_parrent; terminateLoop = true; break; } if(strBuffer[fcv-1] == ')') { strBuffer[fcv] == '\0'; tokType = t_r_parrent; terminateLoop = true; break; } if(strBuffer[fcv-1] == '=') // equal case { strBuffer[fcv] = '\0'; tokType = t_equal; terminateLoop = true; break; } if(strBuffer[fcv-1] == '/') // "slash" or "divide" case { strBuffer[fcv] = '\0'; tokType = t_div; terminateLoop = true; break; } if(strBuffer[fcv-1] == '*') // multiplication case { strBuffer[fcv] = '\0'; tokType = t_mul; terminateLoop = true; break; } if(strBuffer[fcv-1] == '-') // minus case { strBuffer[fcv] = '\0'; tokType = t_minus; terminateLoop = true; break; } if(strBuffer[fcv-1] == '+') // plus case { strBuffer[fcv] = '\0'; tokType = t_plus; terminateLoop = true; //fseek(fd, -1, SEEK_CUR); break; } // else case -> we get here and we don't end up in any case actState = WRONG_OPERATOR; break; case sSTRING: if(c == EOF || c == -1) { fprintf(stderr, "Unexpected EOF!\n"); errorHandler(errLex); } if(apostrof && !entity) // jsme uvnitř stringu { if(skipChar == 1) { fcv--; skipChar--; break; } if(c > 31 && c <= 255 && c != APOSTROF_ASCII) { strBuffer[fcv] = c; // pouhé přidání znaku do stringu (no big deal) break; } else if(c == APOSTROF_ASCII) { // pokud narazíme na apostrof, dve moznosti - konec retezce, nebo entita/dvojitej if(cx != APOSTROF_ASCII && cx != '#') { apostrof = false; // string terminated strBuffer[fcv] = '\0'; // NULL TERMINATION FOR THE WIN!!! break; } if(cx == APOSTROF_ASCII) { skipChar = 1; strBuffer[fcv] = APOSTROF_ASCII; break; } if(cx == '#') { fcv--; entity = true; } } } else { if(entity) { if(c == '#' && cx == APOSTROF_ASCII) { printf("Incorrect entity entry!\n"); exit(1); } if(c == '#' && !isdigit(cx)) { fprintf(stderr, "Incorrect entity entry!\n"); errorHandler(errLex); } // at this we should be sure we get at least #number (letter may occur later though) if(c == '#') { fcv--; break; } if(!isdigit(c) && c == APOSTROF_ASCII) // v tento moment je konec entity { long tentityID = strtol(entityBuffer, &strtolErrPtr, 10); // base of 10 if(entityBuffer == strtolErrPtr) { fprintf(stderr, "Entity not convertable!\n"); errorHandler(errLex); } entityID = tentityID; if(entityID < 1 || entityID > 255) { fprintf(stderr, "Entity out of its range. Or you possibly run the script too many times in a short time -> buffer was not empty on time. %d\n", entityID); errorHandler(errLex); } strBuffer[fcv] = entityID; resetString(entityBuffer); // reset entityBufferu entityID = 0; efcv = -1; // reset efcv entity = false; break; } if(isdigit(c)) { fcv--; efcv++; entityBuffer[efcv] = c; break; } } else { tokType = t_expr_str; terminateLoop = true; inString = true; break; } } break; case sNUMBER: if(isdigit(c) || c == '.' || c == 'e' || c == 'E') { if(c == '.' || c == 'e' || c == 'E') { actState = sREAL; if(c == '.') { dotContained = true; } if(c == 'e' || c == 'E') { eContained = true; } } strBuffer[fcv] = c; } else { // here we get when we get "terminal" for numeric type // so we got to save, fseekback and send token strBuffer[fcv] = '\0'; tokType = t_expr_int; // situation: in strBuffer is stored nullterminated string containing INTEGER, with possible leading zeroes. // we need to strip this mothef*cker of this zeroes and make him play it our way. // That's the part where strtol kicks in int strtolBase = 10; // there's only decadic notation allowed in project, so base is 10 long tmpTokInt; // temporary long variable, to be semantically correct tmpTokInt = strtol(strBuffer, &strtolErrPtr, strtolBase); if(strtolErrPtr == strBuffer) { fprintf(stderr, "Trial to convert a string that is not convertable to long. Integer branch.\n"); errorHandler(errLex); } tokInt = tmpTokInt; // now we have value of the integer stored in an integer variable, which is what we wanted, right. //printf("%s | %d\n", strBuffer, tokInt); // printing out original string & tokInt, debug mode only terminateLoop = true; numberIntCase = true; // we let the program know that we need to terminate the loop and as we end in integercase, we need to fill the variable } break; case sREAL: // situation is we have practically two cases how we could end up here. // The first, we get "#." from sNUMBER where "#" represents variable number of digit in range 0-9 // The second, we get "#e" or "#E", which we consider to be the same case // in this case we can expect following formats // #.# [done] // #.#e# or #.#E# [done] // #.#e-# or #.#E-# [done] // #-#e+# or #.#E-# [done] // we have to deal with those formats one by one; branches for "e" and "E" will be shared though if(dotContained) // everything here will count on "#." as a an input { // multiple courses of action if(!eContained && !signContained) { // akceptujeme cisla a ecka, po kterých může následovat sign if(isdigit(c)) { strBuffer[fcv] = c; } else // neni-li to pismeno, melo by to byt eE, jinak terminace a odeslani { if(c == 'e' || c == 'E') { strBuffer[fcv] = c; eContained = true; break; } else { strBuffer[fcv] = '\0'; // posíláme token, neboť jsme narazili na double tvaru "#.#" tokType = t_expr_dou; tokDouble = strtod(strBuffer, &strtodErrPtr); if(strBuffer == strtodErrPtr) { fprintf(stderr, "Trial to convert non-convertable string to double! Real branch.\n"); errorHandler(errLex); } terminateLoop = true; numberDoubleCase = true; break; } } } if(eContained && !signContained) { if(strBuffer[fcv-1] == 'e' || strBuffer[fcv-1] == 'E') // pokud předchozí bylo eE, můžeme očekávat znak { if(c == '+' || c == '-') { strBuffer[fcv] = c; signContained = true; break; } else { if(isdigit(c)) // pokud tam nebude znaménko, ale číslo, stejně se budeme tvářit jako by znaménko bylo obsaženo (tzn. teď už nemůže být znovu) { strBuffer[fcv] = c; signContained = true; break; } else // jakýkoliv jiný znak není povolen == chyba literálu { fprintf(stderr, "Wrong format for REAL!\n"); errorHandler(errLex); } } } if(isdigit(c)) // nebude-li předchozí přímo eE, přijímáme čísla a možnost zadávat znaménko padá { strBuffer[fcv] = c; signContained = true; break; } else { fprintf(stderr, "Wrong format for REAL!\n"); errorHandler(errLex); } } if(eContained && signContained) // v tento moment už přijímáme jen čísla, cokoliv jiného vede na odeslání tokenu { if(strBuffer[fcv-1] == '+' || strBuffer[fcv-1] == '-') // pokud předchozí byl +-, je třeba přijmout číslo { if(!isdigit(c)) { fprintf(stderr, "Wrong format for REAL!\n"); errorHandler(errLex); } } if(isdigit(c)) { strBuffer[fcv] = c; break; } else { strBuffer[fcv] = '\0'; tokType = t_expr_dou; tokDouble = strtod(strBuffer, &strtodErrPtr); if(strBuffer == strtodErrPtr) { fprintf(stderr, "Trial to convert non-convertable string to double! Real breanch!\n"); errorHandler(errLex); } terminateLoop = true; numberDoubleCase = true; break; } } break; } if(!dotContained) // není-li obsažena tečka, už ani nepřijde, předchozí znak musí být eE, neměla by nastat situace, kdy by nebyl. { if(eContained && !signContained) { // jeste neni obsazeno znamenko if(c == '+' || c == '-') { strBuffer[fcv] = c; signContained = true; break; } else { if(isdigit(c)) // pokud tam nebude znaménko, ale číslo, stejně se budeme tvářit jako by znaménko bylo obsaženo (tzn. teď už nemůže být znovu) { strBuffer[fcv] = c; signContained = true; break; } else // jakýkoliv jiný znak není povolen == chyba literálu { fprintf(stderr, "Wrong format for REAL!\n"); errorHandler(errLex); } } break; } if(eContained && signContained) { // obsazeno ecko i znamenko => prijimame pouze cisla, cokoliv jineho vede na odeslani tokenu if(isdigit(c)) { strBuffer[fcv] = c; break; } else // pokud neni prijato cislo, je cas odeslat token { strBuffer[fcv] = '\0'; tokType = t_expr_dou; tokDouble = strtod(strBuffer, &strtodErrPtr); if(strBuffer == strtodErrPtr) { fprintf(stderr, "Trial to convert non-convertable string to double!\n"); errorHandler(errLex); } terminateLoop = true; numberDoubleCase = true; break; } break; } } break; case WRONG_OPERATOR: fprintf(stderr, "Operator does not exist!\n"); free(strBuffer); free(entityBuffer); errorHandler(errLex); break; case UNEXPECTED_CHAR: if(c == EOF || c == -1) { //printf("End of file occured!\n"); return -1; } fseek(fd, -2, SEEK_CUR); // Swiggity swooty, I’m comin’ for that booty c = fgetc(fd); fprintf(stderr, "Unexpected character! ('%c')\n", c); free(strBuffer); free(entityBuffer); errorHandler(errLex); break; default: fprintf(stderr, "Case that should never occur just occured. Mayday, mayday, we're going down!\n"); errorHandler(errLex); break; } //reallocation works JUST FINE ;) (proud of this one) if((fcv+1) == baseStringLength) { // time for reallocation //printf("Additional memory required!\n"); newAllocationSpace = 2*baseStringLength; tmpBuffer = realloc(strBuffer, newAllocationSpace); if(!tmpBuffer) { free(strBuffer); // in case reallocation failed, it is important to free strBuffer fprintf(stderr, "Realloc failed!\n"); errorHandler(errInt); } else { strBuffer = tmpBuffer; } if(strBuffer == NULL) { fprintf(stderr, "Failed to allocate additional space!\n"); errorHandler(errInt); } else { baseStringLength = newAllocationSpace; // printf("Reallocation successfull!\n"); // printf("Allocated %d bytes of memory.\n", baseStringLength); } } fcv++; if(terminateLoop) { TToken->type = tokType; TToken->val_int = -1; TToken->val_flo = -1.0; if(tokType == t_fun_id) { TToken->val_str = malloc(baseStringLength); // once again, sizeof(char) is one by its definition, so there's no need to use it if(TToken->val_str == NULL) { fprintf(stderr, "Token allocation for string failed!\n"); errorHandler(errInt); } strcpy(TToken->val_str, strBuffer); } if(inString) { TToken->val_str = malloc(baseStringLength); if(TToken->val_str == NULL) { fprintf(stderr, "Token allocation for string failed!\n"); errorHandler(errInt); } strcpy(TToken->val_str, strBuffer); } if(!inString) { makeStringLowerCase(strBuffer); // makes content of strBuffer lowercase (required for successfull comparism) if(strcmp(strBuffer, "begin") == 0) { TToken->type = t_begin; } else if(strcmp(strBuffer, "boolean") == 0) { TToken->type = t_boolean; } else if(strcmp(strBuffer, "do") == 0) { TToken->type = t_do; } else if(strcmp(strBuffer, "else") == 0) { TToken->type = t_else; } else if(strcmp(strBuffer, "end") == 0) { TToken->type = t_end; } else if(strcmp(strBuffer, "false") == 0) { TToken->type = t_expr_boo; TToken->val_int = 0; //(#!#) } else if(strcmp(strBuffer, "find") == 0) { TToken->type = t_fun_id; TToken->val_str = malloc(baseStringLength); if(TToken->val_str == NULL) { fprintf(stderr, "Token allocation for string failed!\n"); free(strBuffer); free(entityBuffer); errorHandler(errInt); } strcpy(TToken->val_str,strBuffer); } else if(strcmp(strBuffer, "forward") == 0) { TToken->type = t_forward; } else if(strcmp(strBuffer, "function") == 0) { TToken->type = t_function; } else if(strcmp(strBuffer, "if") == 0) { TToken->type = t_if; } else if(strcmp(strBuffer, "integer") == 0) { TToken->type = t_integer; } else if(strcmp(strBuffer, "readln") == 0) { TToken->type = t_readln; } else if(strcmp(strBuffer, "real") == 0) { TToken->type = t_real; } else if(strcmp(strBuffer, "sort") == 0) { TToken->type = t_fun_id; TToken->val_str = malloc(baseStringLength); if(TToken->val_str == NULL) { fprintf(stderr, "Token allocation for string failed!\n"); free(strBuffer); free(entityBuffer); errorHandler(errInt); } strcpy(TToken->val_str,strBuffer); } else if(strcmp(strBuffer, "string") == 0) { TToken->type = t_string; } else if(strcmp(strBuffer, "then") == 0) { TToken->type = t_then; } else if(strcmp(strBuffer, "true") == 0) { TToken->type = t_expr_boo; TToken->val_int = 1; //(#!#) } else if(strcmp(strBuffer, "var") == 0) { TToken->type = t_var; } else if(strcmp(strBuffer, "while") == 0) { TToken->type = t_while; } else if(strcmp(strBuffer, "write") == 0) { TToken->type = t_write; } } if(numberIntCase) { TToken->val_int = tokInt; TToken->val_flo = -1.0; } if(numberDoubleCase) { TToken->val_flo = tokDouble; TToken->val_int = -1; } if(TToken->type == t_var_id) // Pokud je token type stále t_var_id, uložím do stringové složky v tokenové struktuře jeho LOWERCASE název { TToken->val_str = malloc(baseStringLength); if(TToken->val_str == NULL) { fprintf(stderr, "Token allocation for string failed!\n"); errorHandler(errInt); } makeStringLowerCase(strBuffer); strcpy(TToken->val_str,strBuffer); } if(fseeker) // pokud fseekback není vypnutý, posuneme se ukazatelem do souboru o jednu zpět, abychom vykompenzovali (a připravili pro další průchod) znak, co nám ukončil tento průchod. { fseek(fd, -1, SEEK_CUR); } //printf("Odeslán token s typem: %d (values: %s|%d|%f)\n", TToken->type, TToken->val_str, TToken->val_int, TToken->val_flo); free(strBuffer); free(entityBuffer); return 1; break; } } }
char XmlReader::getHtmlChar () { //m_charBuf.setLength (0); int curPos = 0; int oldPos = m_pos; int i = 0; char c = getNextChar (); if (c == '#') { c = getNextChar (); int base = 10; if (c == 'x') { base = 16;; c = getNextChar (); } while ((c >= '0' && c <= '9') || (base == 16 && c >= 'a' && c <= 'f') || (base == 16 && c >= 'A' && c <= 'F') ) { if (c >= '0' && c <= '9') { i = base * i + (c-'0'); } else if (c >= 'A' && c <= 'F') { i = base * i + (10 + c - 'A'); } else if (c >= 'a' && c <= 'f') { i = base * i + (10 + c - 'a'); } c = getNextChar (); } if (c != ';') { MESSAGE ("getString: expecting ';' instead of '%c' while parsing &#xyz;\n", c); } return (char)i; } while (c != '\0' && c != ';' && i++ < 10) { setCharBuf (curPos++, c); c = getNextChar (); } setCharBuf (curPos, 0); if (strcmp (m_charBuf, "amp") == 0) { return '&'; } else if (strcmp (m_charBuf, "lt") == 0) { return '<'; } else if (strcmp (m_charBuf, "gt") == 0) { return '>'; } else if (strcmp (m_charBuf, "apos") == 0) { return '\''; } else if (strcmp (m_charBuf, "quot") == 0) { return '"'; } else if (strcmp (m_charBuf, "deg") == 0) { return convert (0xC2, 0xB0); } else if (strcmp (m_charBuf, "nbsp") == 0) { return convert (0xC2, 0xA0); } else if (strcmp (m_charBuf, "ecirc") == 0) { return convert (0xC3, 0xAA); } else if (strcmp (m_charBuf, "eacute") == 0) { return convert (0xC3, 0xA9); } else if (strcmp (m_charBuf, "egrave") == 0) { return convert (0xC3, 0xA8); } else if (strcmp (m_charBuf, "agrave") == 0) { return convert (0xC3, 0xE0); } else if (strcmp (m_charBuf, "ccedil") == 0) { return convert (0xC3, 0xE7); } else { m_pos = oldPos; return '&'; } }
// function getToken returns the // next token in source file TokenType getToken(void) { // index for storing into tokenString int tokenStringIndex = 0; // holds current token to be returned TokenType currentToken; // current state - always begins at START StateType state = START; // flag to indicate save to tokenString int save; while (state != DONE) { char c = getNextChar(); save = TRUE; switch (state) { case START: if (isdigit(c)) state = INNUM; else if (isalpha(c)) state = INID; else if (c == ':') state = INASSIGN; else if ((c == ' ') || (c == '\t') || (c == '\n')) save = FALSE; else if (c == '{') { save = FALSE; state = INCOMMENT; } else { state = DONE; switch (c) { case EOF: save = FALSE; currentToken = ENDFILE; break; case '=': currentToken = EQ; break; case '<': currentToken = LT; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case '/': currentToken = OVER; break; case '(': currentToken = LPAREN; case ')': currentToken = RPAREN; case ';': currentToken = SEMI; break; default: currentToken = ERROR; break; } } break; case INCOMMENT: save = FALSE; if (c == '}') state = START; break; case INASSIGN: state = DONE; if (c == '=') currentToken = ASSIGN; else { // backup in the input ungetNextChar(); save = FALSE; currentToken = ERROR; } break; case INNUM: if (!isdigit(c)) { // backup in the input ungetNextChar(); save = FALSE; state = DONE; currentToken = NUM; } break; case INID: if (!isalpha(c)) { // backup in the input ungetNextChar(); save = FALSE; state = DONE; currentToken = ID; } break; case DONE: default: // should never happen fprintf(listing, "Scanner Bug: state= %d\n", state); state = DONE; currentToken = ERROR; break; } if ((save) && (tokenStringIndex <= MAXTOKENLEN)) tokenString [ tokenStringIndex ++ ] = c; if (state == DONE) { tokenString [ tokenStringIndex ] = '\0'; if (currentToken == ID) currentToken = reservedLookup(tokenString); } } if (TraceScan) { fprintf(listing, "\t%d: ", lineno); printToken(currentToken, tokenString); } return currentToken; } // end getToken
/* function getToken returns the * next token in source file */ TokenType getToken(void) { /* index for storing into tokenString */ int tokenStringIndex = 0; /* holds current token to be returned */ TokenType currentToken; /* current state - always begins at START */ StateType state = START; /* flag to indicate save to tokenString */ int save; while (state != DONE) { int c = getNextChar(); save = true; //........... switch (state) { case START: if (isdigit(c)) state = INNUM; else if (isalpha(c)) state = INID; //else if (c == ':') //............... //state = INASSIGN; else if (c == '<') state = INLE; else if (c == '>') state = INGE; else if (c == '=') state = INEQ; else if (c == '!') state = INNE; else if ((c == ' ') || (c == '\t') || (c == '\n')) save = false; else if (c == '/') { //save = FALSE; state = ENTERING_COMMENT; } else { state = DONE; switch (c) { case EOF: save = false; currentToken = ENDFILE; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case '/': currentToken = ENTERING_COMMENT; break; case '(': currentToken = LPAREN; break; case ')': currentToken = RPAREN; break; case ';': currentToken = SEMI; break; case '[': currentToken = LBRACKET; break; case ']': currentToken = RBRACKET; break; case '{': currentToken = LBRACE; break; case '}': currentToken = RBRACE; break; default: currentToken = ERROR; break; } } break; /*case ENTERING_COMMENT: save = false; if (c == EOF) //???????????????? { state = DONE; currentToken = ENDFILE; } else if (c == '*') state = INCOMMENT; else { currentToken = TIMES; save = true; state = DONE; } break; */ case ENTERING_COMMENT: save = false; if(c == '*') state = INCOMMENT; else { ungetNextChar(); currentToken = OVER; state = DONE; save = true; } break; case INCOMMENT: save = false; if(c == '*') state = EXITING_COMMENT; break; case EXITING_COMMENT: save = false; if(c == '/') state = START; else if (c == '*') state = EXITING_COMMENT; else state = INCOMMENT; break; case INLE: state = DONE; if (c == '=') currentToken = LE; else { /* backup in the input */ ungetNextChar(); //save = FALSE; currentToken = LT; } break; case INGE: state = DONE; if (c == '=') currentToken = GE; else { /* backup in the input */ ungetNextChar(); //save = FALSE; currentToken = GT; } break; case INEQ: state = DONE; if (c == '=') currentToken = EQ; else { /* backup in the input */ ungetNextChar(); //save = FALSE; currentToken = ASSIGN; } break; case INNE: //不等号。。。。。。。。。。。。 state = DONE; if (c == '=') currentToken = NE; else { /* backup in the input */ ungetNextChar(); save = false; currentToken = ERROR; } break; case INNUM: if (!isdigit(c)) { /* backup in the input */ ungetNextChar(); save = false; state = DONE; currentToken = NUM; } break; case INID: if (!isalpha(c)) { /* backup in the input */ ungetNextChar(); save = false; state = DONE; currentToken = ID; } break; case DONE: default: /* should never happen */ //fprintf(listing,"Scanner Bug: state= %d\n",state); state = DONE; currentToken = ERROR; break; } if ((save) && (tokenStringIndex <= MAXTOKENLEN)) { tokenString[tokenStringIndex++] = (char) c; //tokenIndex[tokenStringIndex] = currentToken; } if (state == DONE) { tokenString[tokenStringIndex] = '\0'; if (currentToken == ID) { currentToken = reservedLookup(tokenString); } } } /* if (TraceScan) { fprintf(listing,"\t%d: ",lineno); printToken(currentToken,tokenString); }*/ //printf("YES\n"); //fputs(ansToken[currentToken],target); printf("%s\n",ansToken[currentToken]); return currentToken; } /* end getToken */
static TToken getToken() { oSrcString = sSrcString; int ch = getNextChar(); bool verbStr=false; switch (ch) { case EOFCH: case 0: currTok = tEnd; break; case L',': currTok = tComma; break; case L'+': currTok = tPlus; break; case L'-': currTok = tMinus; break; case L'*': currTok = tMul; break; case L'/': currTok = tDiv; break; case L'(': currTok = tLp; break; case L')': currTok = tRp; break; case L'^': if ((ch = getChar()) == L'^') currTok = tBoolXor; else { putBack(ch); currTok = tBitXor; } break; case L'~': if ((ch = getChar()) != L' ') { putBack(ch); currTok = tBitNot; break; } putBack(ch); //???? currTok = tEnd; break; case L'|': if ((ch = getChar()) == L'|') currTok = tBoolOr; else { putBack(ch); currTok = tBitOr; } break; case L'&': if ((ch = getChar()) == L'&') currTok = tBoolAnd; else { putBack(ch); currTok = tBitAnd; } break; case L'=': if ((ch = getChar()) == L'=') currTok = tEq; else { putBack(ch); currTok = tLet; } break; case L'>': switch ((ch = getChar())) { case L'=': currTok = tGe; break; case L'>': currTok = tBitShr; break; default: putBack(ch); currTok = tGt; break; } break; case L'<': switch (ch = getChar()) { case L'=': currTok = tLe; break; case L'<': currTok = tBitShl; break; default: putBack(ch); currTok = tLt; break; } break; case L'!': if ((ch = getChar()) != L'=') { putBack(ch); currTok = tNot; break; } else currTok = tNe; break; case L'@': ch = getChar(); if (ch != L'"') { putBack(ch); break; } verbStr=true; case L'\"': { TToken __currTok = tNo; currVar = L""; while (((ch = getChar()) != EOFCH)) { if (ch == L'\"') { if (verbStr) { ch = getChar(); if (ch != L'\"') { putBack(ch); break; } } else break; } if (ch == L'\\' && !verbStr) { switch (ch = getChar()) { case L'a' : ch = L'\a'; break; case L'b' : ch = L'\b'; break; case L'f' : ch = L'\f'; break; case L'n' : ch = L'\n'; break; case L'r' : ch = L'\r'; break; case L't' : ch = L'\t'; break; case L'v' : ch = L'\v'; break; case L'\'': ch = L'\''; break; case L'\"': ch = L'\"'; break; case L'\\': ch = L'\\'; break; case L'0': case L'1': case L'2': case L'3': case L'4': case L'5': case L'6': case L'7': // octal: \d \dd \ddd { BYTE n = ch - L'0'; if ((unsigned int)(ch = getChar()) >= L'0' && (unsigned int)ch < L'8') { n = 8 * n + ch - L'0'; if ((unsigned int)(ch = getChar()) >= L'0' && (unsigned int)ch < L'8') n = 8 * n + ch - L'0'; else putBack(ch); } else putBack(ch); ch = n; break; } case L'x': { if (iswxdigit(ch = getChar())) { wchar_t value=hex2ch(ch); for (int ii=0; ii<3; ii++) { if (iswxdigit(ch = getChar())) { value=(value<<4)|hex2ch(ch); } else { putBack(ch); break; } } ch = value; } else { keyMacroParseError(err_Bad_Hex_Control_Char,--sSrcString,pSrcString); __currTok = tEnd; } break; } default: { keyMacroParseError(err_Bad_Control_Char,--sSrcString,pSrcString); __currTok = tEnd; break; } } } if (__currTok != tNo) break; currVar.AppendStr((wchar_t)ch); } if (__currTok == tNo) currTok = tStr; else currTok = __currTok; break; } case L'.': { ch = getChar(); if (iswdigit(ch)) { putBack(ch); ch=L'.'; } else { currTok = tEnd; //??? break; } } case L'0': case L'1': case L'2': case L'3': case L'4': case L'5': case L'6': case L'7': case L'8': case L'9': { static wchar_t buffer[256]; wchar_t *ptrbuffer=buffer; bool isNum = false; bool isHex = false; bool isE = false; bool isPoint = false; int ch2; for (;;) { *ptrbuffer++=(wchar_t)ch; switch (ch) { case L'x': case L'X': if (ptrbuffer == buffer + 2) { ch = getChar(); if (iswxdigit(ch)) { isHex=true; putBack(ch); } else { putBack(ch); isNum=true; break; } } break; case L'.': if (isPoint || isE) { isNum=true; break; } isPoint=true; break; case L'e': case L'E': if (isHex) break; if (isE) { isNum=true; break; } isE=true; ch2 = getChar(); if (ch2 == L'-' || ch2 == L'+') { int ch3=getChar(); if (iswdigit(ch3)) { *ptrbuffer++=(wchar_t)ch2; *ptrbuffer++=(wchar_t)ch3; } else { putBack(ch3); // !iswdigit putBack(ch2); // -+ putBack(ch); // eE } } else if (!iswdigit(ch2)) { putBack(ch2); // !iswdigit putBack(ch); // eE } else putBack(ch); break; case L'a': case L'A': case L'b': case L'B': case L'c': case L'C': case L'd': case L'D': case L'f': case L'F': if (!isHex) { isNum=true; break; } case L'0': case L'1': case L'2': case L'3': case L'4': case L'5': case L'6': case L'7': case L'8': case L'9': //isNum=true; break; default: isNum=true; break; } if (isNum) break; ch = getChar(); } if (ch != EOFCH) putBack(ch); *ptrbuffer++=(wchar_t)0; bool CheckIntNumber=true; if (buffer[0]) { if (!(buffer[1] == L'x' || buffer[1] == L'X')) { for (ptrbuffer=buffer; *ptrbuffer ; ptrbuffer++) { if (*ptrbuffer == L'e' || *ptrbuffer == L'E' || *ptrbuffer == L'.') { CheckIntNumber=false; break; } else if (!iswdigit(*ptrbuffer)) break; } } } else CheckIntNumber=false; if (CheckIntNumber) { currVar = _wcstoi64(buffer,&ptrbuffer,0); currTok = tInt; } else { currVar = wcstod(buffer,&ptrbuffer); currTok = tFloat; } break; } case L'%': ch = getChar(); if ((IsAlphaNum(ch) || ch == L'_') || (ch == L'%' && (IsAlphaNum(*sSrcString) || *sSrcString == L'_'))) { getVarName(ch); putBack(ch); currTok = tVar; } else keyMacroParseError(err_Var_Expected,L""); // BUG nameString break; default: { if (IsAlpha(ch)) // || ch == L'_' ???? { TToken __currTok = tNo; getFarName(ch); if (ch == L' ') { while (ch == L' ') ch = getNextChar(); } if (ch == L'(') //!!!! а пробелы пропустить? ДА! __currTok = tFunc; else { putBack(ch); for (int i = 0 ; i < MKeywordsSize ; i++) if (!StrCmpI(nameString, MKeywords[i].Name)) { FARVar = MKeywords[i].Value; __currTok = tFARVar; break; } if (__currTok == tNo) { if (IsProcessFunc || currTok == tFunc || currTok == tLt) // TODO: уточнить { if (KeyNameMacroToKey(nameString) == -1 && KeyNameToKey(nameString) == -1 && checkMacroConst(nameString)) __currTok = tConst; else { DWORD k=KeyNameToKey(nameString); if (k != (DWORD)-1) { currVar = (__int64)k; __currTok = tInt; //?? } else { keyMacroParseError(err_Var_Expected,oSrcString,pSrcString,nameString); } } } else { if (KeyNameMacroToKey(nameString) == -1) { if (KeyNameToKey(nameString) == -1) { if (checkMacroConst(nameString)) __currTok = tConst; else keyMacroParseError(err_Unrecognized_keyword,nameString); } else { currVar = (__int64)KeyNameToKey(nameString); __currTok = tInt; //?? } } } } } if (__currTok != tNo) currTok=__currTok; } else currTok = tEnd; break; } } return currTok; }
/* Lexical analyser for XML. Return the next token reading input as required. It uses a one token look ahead and push back mechanism (LAR1 parser). Text token identifiers are left in the tokBuf parser buffer on exit. This Lex has special cases for the states MPR_XML_ELT_DATA where we have an optimized read of element data, and MPR_XML_AFTER_LS where we distinguish between element names, processing instructions and comments. */ static MprXmlToken getXmlToken(MprXml *xp, int state) { MprBuf *tokBuf; char *cp; int c, rc; mprAssert(state >= 0); tokBuf = xp->tokBuf; if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } mprFlushBuf(tokBuf); /* Special case parsing for names and for element data. We do this for performance so we can return to the caller the largest token possible. */ if (state == MPR_XML_ELT_DATA) { /* Read all the data up to the start of the closing element "<" or the start of a sub-element. */ if (c == '<') { if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } if (c == '/') { return MPR_XMLTOK_LS_SLASH; } putLastChar(xp, c); return MPR_XMLTOK_LS; } do { if (mprPutCharToBuf(tokBuf, c) < 0) { return MPR_XMLTOK_TOO_BIG; } if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } } while (c != '<'); /* Put back the last look-ahead character */ putLastChar(xp, c); /* If all white space, then zero the token buffer */ for (cp = tokBuf->start; *cp; cp++) { if (!isspace((uchar) *cp & 0x7f)) { return MPR_XMLTOK_TEXT; } } mprFlushBuf(tokBuf); return MPR_XMLTOK_TEXT; } while (1) { switch (c) { case ' ': case '\n': case '\t': case '\r': break; case '<': if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } if (c == '/') { return MPR_XMLTOK_LS_SLASH; } putLastChar(xp, c); return MPR_XMLTOK_LS; case '=': return MPR_XMLTOK_EQ; case '>': return MPR_XMLTOK_GR; case '/': if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } if (c == '>') { return MPR_XMLTOK_SLASH_GR; } return MPR_XMLTOK_ERR; case '\"': case '\'': xp->quoteChar = c; /* Fall through */ default: /* We handle element names, attribute names and attribute values here. We do NOT handle data between elements here. Read the token. Stop on white space or a closing element ">" */ if (xp->quoteChar) { if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } while (c != xp->quoteChar) { if (mprPutCharToBuf(tokBuf, c) < 0) { return MPR_XMLTOK_TOO_BIG; } if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } } xp->quoteChar = 0; } else { while (!isspace((uchar) c) && c != '>' && c != '/' && c != '=') { if (mprPutCharToBuf(tokBuf, c) < 0) { return MPR_XMLTOK_TOO_BIG; } if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } } putLastChar(xp, c); } if (mprGetBufLength(tokBuf) < 0) { return MPR_XMLTOK_ERR; } mprAddNullToBuf(tokBuf); if (state == MPR_XML_AFTER_LS) { /* If we are just inside an element "<", then analyze what we have to see if we have an element name, instruction or comment. Tokbuf will hold "?" for instructions or "!--" for comments. */ if (mprLookAtNextCharInBuf(tokBuf) == '?') { /* Just ignore processing instructions */ rc = scanFor(xp, "?>"); if (rc < 0) { return MPR_XMLTOK_TOO_BIG; } else if (rc == 0) { return MPR_XMLTOK_ERR; } return MPR_XMLTOK_INSTRUCTIONS; } else if (mprLookAtNextCharInBuf(tokBuf) == '!') { if (strncmp((char*) tokBuf->start, "![CDATA[", 8) == 0) { mprAdjustBufStart(tokBuf, 8); rc = scanFor(xp, "]]>"); if (rc < 0) { return MPR_XMLTOK_TOO_BIG; } else if (rc == 0) { return MPR_XMLTOK_ERR; } return MPR_XMLTOK_CDATA; } else { mprFlushBuf(tokBuf); rc = scanFor(xp, "-->"); if (rc < 0) { return MPR_XMLTOK_TOO_BIG; } else if (rc == 0) { return MPR_XMLTOK_ERR; } return MPR_XMLTOK_COMMENT; } } } trimToken(xp); return MPR_XMLTOK_TEXT; } if ((c = getNextChar(xp)) < 0) { return MPR_XMLTOK_EOF; } } /* Should never get here */ mprAssert(0); return MPR_XMLTOK_ERR; }
/** * Find the next token in a string. * * @param pcbToken contains the number of characters that have been read */ static NextToken GetNextToken(XML *pXML, size_t *pcbToken, enum TokenTypeTag *pType) { NextToken result; const TCHAR *lpXML; TCHAR ch; TCHAR chTemp; size_t nSize; bool nFoundMatch; unsigned n; bool nIsText = false; // Find next non-white space character ch = FindNonWhiteSpace(pXML); if (gcc_unlikely(ch == 0)) { // If we failed to obtain a valid character *pcbToken = 0; *pType = eTokenError; result.pStr = NULL; return result; } // Cache the current string pointer lpXML = pXML->lpXML; result.pStr = &lpXML[pXML->nIndex - 1]; chTemp = 0; switch (ch) { // Check for quotes case _T('\''): case _T('\"'): // Type of token *pType = eTokenQuotedText; chTemp = ch; n = pXML->nIndex; // Set the size nSize = 1; nFoundMatch = false; // Search through the string to find a matching quote while (((ch = getNextChar(pXML))) != 0) { nSize++; if (ch == chTemp) { nFoundMatch = true; break; } if (ch == _T('<')) break; } // If we failed to find a matching quote if (!nFoundMatch) { pXML->nIndex = n - 1; ch = getNextChar(pXML); nIsText = true; break; } // 4.02.2002 if (FindNonWhiteSpace(pXML)) { pXML->nIndex--; } break; // Equals (used with attribute values) case _T('='): nSize = 1; *pType = eTokenEquals; break; // Close tag case _T('>'): nSize = 1; *pType = eTokenCloseTag; break; // Check for tag start and tag end case _T('<'): // Peek at the next character to see if we have an end tag '</', // or an xml declaration '<?' chTemp = pXML->lpXML[pXML->nIndex]; // If we have a tag end... if (chTemp == _T('/')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenTagEnd; nSize = 2; } // If we have an XML declaration tag else if (chTemp == _T('?')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenDeclaration; nSize = 2; } // Otherwise we must have a start tag else { *pType = eTokenTagStart; nSize = 1; } break; // Check to see if we have a short hand type end tag ('/>'). case _T('/'): // Peek at the next character to see if we have a short end tag '/>' chTemp = pXML->lpXML[pXML->nIndex]; // If we have a short hand end tag... if (chTemp == _T('>')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenShortHandClose; nSize = 2; break; } // If we haven't found a short hand closing tag then drop into the // text process // Other characters default: nIsText = true; } // If this is a TEXT node if (nIsText) { // Indicate we are dealing with text *pType = eTokenText; nSize = 1; bool nExit = false; while (!nExit && ((ch = getNextChar(pXML)) != 0)) { switch (ch) { // Break when we find white space case _T('\n'): case _T(' '): case _T('\t'): case _T('\r'): nExit = true; break; // If we find a slash then this maybe text or a short hand end tag. case _T('/'): // Peek at the next character to see it we have short hand end tag chTemp = pXML->lpXML[pXML->nIndex]; // If we found a short hand end tag then we need to exit the loop if (chTemp == _T('>')) { pXML->nIndex--; // 03.02.2002 nExit = true; } else { nSize++; } break; // Break when we find a terminator and decrement the index and // column count so that we are pointing at the right character // the next time we are called. case _T('<'): case _T('>'): case _T('='): pXML->nIndex--; nExit = true; break; case 0: nExit = true; break; default: nSize++; } } } *pcbToken = nSize; return result; }
/* Hex: 0(x|X)[DIGITS] Octal: 0[DIGITS] Float: [DIGITS].[DIGITS][(e|E)[+|-]DIGITS] */ static int makeNumberToken(EcCompiler *cp, EcToken *tp, int c) { EcStream *stream; stream = cp->stream; if (c == '0') { c = getNextChar(stream); if (tolower((uchar) c) == 'x') { /* Hex */ addCharToToken(tp, '0'); do { addCharToToken(tp, c); c = getNextChar(stream); } while (isxdigit(c)); putBackChar(stream, c); setTokenID(tp, T_NUMBER, -1, 0); return finalizeToken(tp); } else if ('0' <= c && c <= '7') { /* Octal */ addCharToToken(tp, '0'); do { addCharToToken(tp, c); c = getNextChar(stream); } while ('0' <= c && c <= '7'); putBackChar(stream, c); setTokenID(tp, T_NUMBER, -1, 0); return finalizeToken(tp); } else { putBackChar(stream, c); c = '0'; } } /* Float */ while (isdigit((uchar) c)) { addCharToToken(tp, c); c = getNextChar(stream); } if (c == '.') { addCharToToken(tp, c); c = getNextChar(stream); } while (isdigit((uchar) c)) { addCharToToken(tp, c); c = getNextChar(stream); } if (tolower((uchar) c) == 'e') { addCharToToken(tp, c); c = getNextChar(stream); if (c == '+' || c == '-') { addCharToToken(tp, c); c = getNextChar(stream); } while (isdigit((uchar) c)) { addCharToToken(tp, c); c = getNextChar(stream); } } putBackChar(stream, c); setTokenID(tp, T_NUMBER, -1, 0); return finalizeToken(tp); }
void pushGetNextChar(void) { pushChar(NEXT_CHAR); getNextChar(); }
/* Copy a string token into the given UnicodeString. Upon entry, we have already read the first character of the string token, which is not a whitespace character (but may be a QUOTE or ESCAPE). This function reads all subsequent characters that belong with this string, and copy them into the token parameter. The other important, and slightly convoluted purpose of this function is to merge adjacent strings. It looks forward a bit, and if the next non comment, non whitespace item is a string, it reads it in as well. If two adjacent strings are quoted, they are merged without intervening space. Otherwise a single SPACE character is inserted. */ static enum ETokenType getStringToken(UCHARBUF* buf, UChar32 initialChar, struct UString *token, UErrorCode *status) { UBool lastStringWasQuoted; UChar32 c; UChar target[3] = { '\0' }; UChar *pTarget = target; int len=0; UBool isFollowingCharEscaped=FALSE; UBool isNLUnescaped = FALSE; UChar32 prevC=0; /* We are guaranteed on entry that initialChar is not a whitespace character. If we are at the EOF, or have some other problem, it doesn't matter; we still want to validly return the initialChar (if nothing else) as a string token. */ if (U_FAILURE(*status)) { return TOK_ERROR; } /* setup */ lastStringWasQuoted = FALSE; c = initialChar; ustr_setlen(token, 0, status); if (U_FAILURE(*status)) { return TOK_ERROR; } for (;;) { if (c == QUOTE) { if (!lastStringWasQuoted && token->fLength > 0) { ustr_ucat(token, SPACE, status); if (U_FAILURE(*status)) { return TOK_ERROR; } } lastStringWasQuoted = TRUE; for (;;) { c = ucbuf_getc(buf,status); /* EOF reached */ if (c == U_EOF) { return TOK_EOF; } /* Unterminated quoted strings */ if (U_FAILURE(*status)) { return TOK_ERROR; } if (c == QUOTE && !isFollowingCharEscaped) { break; } if (c == ESCAPE && !isFollowingCharEscaped) { pTarget = target; c = unescape(buf, status); if (c == U_ERR) { return TOK_ERROR; } if(c == CR || c == LF){ isNLUnescaped = TRUE; } } if(c==ESCAPE && !isFollowingCharEscaped){ isFollowingCharEscaped = TRUE; }else{ U_APPEND_CHAR32(c, pTarget,len); pTarget = target; ustr_uscat(token, pTarget,len, status); isFollowingCharEscaped = FALSE; len=0; if(c == CR || c == LF){ if(isNLUnescaped == FALSE && prevC!=CR){ lineCount++; } isNLUnescaped = FALSE; } } if (U_FAILURE(*status)) { return TOK_ERROR; } prevC = c; } } else { if (token->fLength > 0) { ustr_ucat(token, SPACE, status); if (U_FAILURE(*status)) { return TOK_ERROR; } } if(lastStringWasQuoted){ if(getShowWarning()){ warning(lineCount, "Mixing quoted and unquoted strings"); } if(isStrict()){ return TOK_ERROR; } } lastStringWasQuoted = FALSE; /* if we reach here we are mixing * quoted and unquoted strings * warn in normal mode and error in * pedantic mode */ if (c == ESCAPE) { pTarget = target; c = unescape(buf, status); /* EOF reached */ if (c == U_EOF) { return TOK_ERROR; } } U_APPEND_CHAR32(c, pTarget,len); pTarget = target; ustr_uscat(token, pTarget,len, status); len=0; if (U_FAILURE(*status)) { return TOK_ERROR; } for (;;) { /* DON'T skip whitespace */ c = getNextChar(buf, FALSE, NULL, status); /* EOF reached */ if (c == U_EOF) { ucbuf_ungetc(c, buf); return TOK_STRING; } if (U_FAILURE(*status)) { return TOK_STRING; } if (c == QUOTE || c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) { ucbuf_ungetc(c, buf); break; } if (isWhitespace(c)) { break; } if (c == ESCAPE) { pTarget = target; c = unescape(buf, status); if (c == U_ERR) { return TOK_ERROR; } } U_APPEND_CHAR32(c, pTarget,len); pTarget = target; ustr_uscat(token, pTarget,len, status); len=0; if (U_FAILURE(*status)) { return TOK_ERROR; } } } /* DO skip whitespace */ c = getNextChar(buf, TRUE, NULL, status); if (U_FAILURE(*status)) { return TOK_STRING; } if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) { ucbuf_ungetc(c, buf); return TOK_STRING; } } }
static WicErrors scanNum(pTokData tokData) { tokData->repr.constant.type = CONSTT_INT_CONST; tokData->repr.constant.radix = RADT_DECIMAL; if( NEXT_CHAR == '0' ) { pushGetNextChar(); if( NEXT_CHAR == 'x' || NEXT_CHAR == 'X' ) { tokData->repr.constant.radix = RADT_HEX; pushGetNextChar(); while(isxdigit(NEXT_CHAR)) { pushGetNextChar(); } } else { /* scan octal or float number */ if (NEXT_CHAR >= '0' && NEXT_CHAR <= '7') { tokData->repr.constant.radix = RADT_OCTAL; } while (NEXT_CHAR >= '0' && NEXT_CHAR <= '7') { pushGetNextChar(); } if( isdigit(NEXT_CHAR) || NEXT_CHAR == '.' || NEXT_CHAR == 'e' || NEXT_CHAR == 'E' ) { while (isdigit(NEXT_CHAR)) { pushGetNextChar(); } if (NEXT_CHAR == '.' || NEXT_CHAR == 'e' || NEXT_CHAR == 'E' ) { if (!pushFloatDotExp(tokData, 0)){ return RERR_INV_FLOAT_CONST; } } else { return RERR_INV_INT_CONST; } goto Convert; } } } else { /* scan decimal number */ while( NEXT_CHAR >= '0' && NEXT_CHAR <= '9' ) { pushGetNextChar(); } if( NEXT_CHAR == '.' || NEXT_CHAR == 'e' || NEXT_CHAR == 'E' ) { if (!pushFloatDotExp(tokData, 0)){ return RERR_INV_FLOAT_CONST; } goto Convert; } } if( NEXT_CHAR == 'L' || NEXT_CHAR == 'l' ) { tokData->repr.constant.type = CONSTT_LONG_CONST; getNextChar(); if( NEXT_CHAR == 'u' || NEXT_CHAR == 'U' ) { getNextChar(); tokData->repr.constant.type = CONSTT_ULONG_CONST; } } else if( NEXT_CHAR == 'u' || NEXT_CHAR == 'U' ) { getNextChar(); if( NEXT_CHAR == 'l' || NEXT_CHAR == 'L' ) { getNextChar(); tokData->repr.constant.type = CONSTT_ULONG_CONST; } else { tokData->repr.constant.type = CONSTT_UINT_CONST; } } Convert: return convStr2Const(tokData); }
uint8_t SocketReader::read() throw (int) { return uint8_t(getNextChar()); }
WicErrors getNextToken(pToken tok) { pTokTab tokTabPtr; WicErrors retVal = ERR_NONE; static long tokAfterDefine = 2; // used to flag '(' in #define x( as a // special parentheses int temp; assert(currTokF >= 0); currTokLen = 0; currTok[currTokLen] = 0; TOK_NUM_AFTER_NEW_LINE++; /* Used for #preprocessor directives */ tokAfterDefine++; /* Used for #preprocessor directives */ g_currLineNum = LINE_NUM; g_currColNum = COL_NUM; /* When getNextToken gets called, STATE may be one of: TS_START, TS_COMMENT. */ temp = skipBlank(); if (STATE == TS_START) { setTokPos( tok->pos, TOK_FILE_NAME, currTokF, LINE_NUM, COL_NUM, LINES_BEFORE, temp, orderLineNum ); while (NEXT_CHAR == '') { getNextChar(); tok->pos->spacesBefore = skipBlank(); } if (isalpha(NEXT_CHAR) || NEXT_CHAR == '_') { if (!scanId()) { char saveChar = NEXT_CHAR; currTokLen = 0; currTok[currTokLen] = 0; getNextChar(); if (saveChar == '"') { retVal = scanStr(tok->data); } else { retVal = scanChar(tok->data); } goto Return; } } else if (isdigit(NEXT_CHAR)) { retVal = scanNum(tok->data); goto Return; } else switch (NEXT_CHAR) { case '\'': getNextChar(); retVal = scanChar(tok->data); goto Return; break; case '"': if (currLineIsInclude) { getNextChar(); retVal = scanIncludeFileName(tok->data, '"'); goto Return; } else { getNextChar(); retVal = scanStr(tok->data); goto Return; break; } case '\n': pushGetNextChar(); currLineIsInclude = 0; break; case '!': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '#': pushGetNextChar(); if (TOK_NUM_AFTER_NEW_LINE == 1) { skipBlank(); if (isalpha(NEXT_CHAR) || NEXT_CHAR == '_') { scanId(); } else { tok->data->code = Y_PRE_NULL; retVal = ERR_NONE; goto Return; } } else { if (NEXT_CHAR == '#') { pushGetNextChar(); } } break; case '%': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '&': pushGetNextChar(); if (NEXT_CHAR == '&') { pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } } break; case '(': pushGetNextChar(); break; case ')': pushGetNextChar(); break; case '*': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '+': pushGetNextChar(); if (NEXT_CHAR == '+') { pushGetNextChar(); } else if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case ',': pushGetNextChar(); break; case '-': pushGetNextChar(); if (NEXT_CHAR == '-') { pushGetNextChar(); } else if (NEXT_CHAR == '=') { pushGetNextChar(); } else if (NEXT_CHAR == '>') { pushGetNextChar(); } break; case '.': pushGetNextChar(); if (NEXT_CHAR == '.') { pushGetNextChar(); if (NEXT_CHAR == '.') { pushGetNextChar(); } else { retVal = RERR_INV_CHAR; goto Return; } } else if (isdigit(NEXT_CHAR)) { if (pushFloatDotExp(tok->data, 1)) { retVal = convStr2Const(tok->data); goto Return; } else { retVal = RERR_INV_CHAR; goto Return; } } break; case '/': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } else if (NEXT_CHAR == '*') { /* comment begin */ popChars(1); STATE = TS_COMMENT; getNextChar(); retVal = scanComment(tok->data); goto Return; } else if (NEXT_CHAR == '/') { popChars(1); STATE = TS_COMMENT; getNextChar(); retVal = scanCPlusPlusComment(tok->data); goto Return; } break; case ':': pushGetNextChar(); if (NEXT_CHAR == '>') { pushGetNextChar(); } break; case ';': pushGetNextChar(); break; case '<': if (currLineIsInclude) { getNextChar(); retVal = scanIncludeFileName(tok->data, '>'); goto Return; } else { pushGetNextChar(); if (NEXT_CHAR == '<') { pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } } else if (NEXT_CHAR == '=') { pushGetNextChar(); } } break; case '=': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '>': pushGetNextChar(); if (NEXT_CHAR == '>') { pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } } else if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '?': pushGetNextChar(); break; case '[': pushGetNextChar(); break; case ']': pushGetNextChar(); break; case '^': pushGetNextChar(); if (NEXT_CHAR == '=') pushGetNextChar(); break; case '{': pushGetNextChar(); break; case '|': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } else if (NEXT_CHAR == '|') { pushGetNextChar(); } break; case '}': pushGetNextChar(); break; case '~': pushGetNextChar(); break; case (char) EOF: tok->data->code = Y_EOF; retVal = ERR_NONE; goto Return; break; default: /* Eat up an ivalid character */ getNextChar(); retVal = RERR_INV_CHAR; goto Return; } tokTabPtr = tabLookup(currTok); if (tokTabPtr != NULL) { tok->data->code = tokTabPtr->code; if (tok->data->code == Y_PRE_INCLUDE) { currLineIsInclude = 1; } if (tok->data->code == Y_PRE_DEFINE) { tokAfterDefine = 0; } if (tok->data->code == Y_LEFT_PAREN && tokAfterDefine == 2) { // the case of #define x(... if (tok->pos->spacesBefore == 0) { tok->data->code = Y_PRE_SPECIAL_LEFT_PAREN; } } tok->data->repr.string = registerString(tokTabPtr->name, !FREE_STRING); } else { if (currTok[0] == '#') { retVal = RERR_INV_PREPROCESSOR; goto Return; } else { tok->data->code = Y_ID; tok->data->repr.string = registerString(wicStrdup(currTok), FREE_STRING); } } } else if (STATE == TS_COMMENT) { setTokPos(tok->pos, TOK_FILE_NAME, currTokF, LINE_NUM, COL_NUM, LINES_BEFORE, 0, orderLineNum); retVal = scanComment(tok->data); goto Return; } else { assert(0); } Return: if (tok->data->code != Y_PRE_NEWLINE) { tok->pos->linesBefore = tok->pos->lineNum - PREV_TOK_LINE_NUM; PREV_TOK_LINE_NUM = tok->pos->lineNum; } else { tok->pos->linesBefore = 0; } zapTokPos(g_currPos); g_currPos = dupTokPos(tok->pos, NULL); return retVal; }
/* function getToken returns the * next token in source file */ TokenType getToken(void) { /* index for storing into tokenString */ int tokenStringIndex = 0; /* holds current token to be returned */ TokenType currentToken; /* current state - always begins at START */ StateType state = START; /* flag to indicate save to tokenString */ int save; while (state != DONE) { int c = getNextChar(); save = TRUE; switch (state) { case START: if (isdigit(c)) state = INNUM; else if (isalpha(c)) state = INID; else if (c == '=') state = INASSIGN; else if (c == '<') state = INLESS; else if (c == '>') state = INGREATER; else if (c == '!') state = INNOTEQ; else if ((c == ' ') || (c == '\t') || (c == '\n')) save = FALSE; else if (c == '/') { save = FALSE; state = INCOMMENT1; } else { state = DONE; switch (c) { case EOF: save = FALSE; currentToken = ENDFILE; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case ';': currentToken = SEMI; break; case ',': currentToken = COMMA; break; case '(': currentToken = LPAREN; break; case ')': currentToken = RPAREN; break; case '[': currentToken = SLBRACKET; break; case ']': currentToken = SRBRACKET; break; case '{': currentToken = LBRACKET; break; case '}': currentToken = RBRACKET; break; default: currentToken = ERROR; break; } } break; case INCOMMENT1: save = FALSE; if (c == '*') { state = INCOMMENT2; } else { state = DONE; ungetNextChar(); currentToken = OVER; } break; case INCOMMENT2: save = FALSE; if(c == EOF) { state = DONE; currentToken = ENDFILE; } else if (c == '*') { state = INCOMMENT3; } break; case INCOMMENT3: save = FALSE; if (c == '/') { state = START; } else { state = INCOMMENT2; } break; case INASSIGN: state = DONE; if (c == '=') currentToken = EQ; else { /* backup in the input */ ungetNextChar(); save = FALSE; currentToken = ASSIGN; } break; case INLESS: state = DONE; if (c == '=') currentToken = LESSEQ; else { /* backup in the input */ ungetNextChar(); save = FALSE; currentToken = LESS; } break; case INGREATER: state = DONE; if (c == '=') currentToken = GREATEREQ; else { /* backup in the input */ ungetNextChar(); save = FALSE; currentToken = GREATER; } break; case INNOTEQ: state = DONE; if (c == '=') currentToken = NOTEQ; else { /* backup in the input */ ungetNextChar(); save = FALSE; currentToken = ERROR; } break; case INNUM: if (!isdigit(c)) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = NUM; } break; case INID: if (!isalpha(c)) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = ID; } break; case DONE: default: /* should never happen */ fprintf(listing,"Scanner Bug: state= %d\n",state); state = DONE; currentToken = ERROR; break; } if ((save) && (tokenStringIndex <= MAXTOKENLEN)) tokenString[tokenStringIndex++] = (char) c; if (state == DONE) { tokenString[tokenStringIndex] = '\0'; if (currentToken == ID) currentToken = reservedLookup(tokenString); } } if (TraceScan) { fprintf(listing,"\t%d: ",lineno); printToken(currentToken,tokenString); } return currentToken; } /* end getToken */
std::string JLDIO::findAndUseMacros(std::string raw) { //find declaractions std::unordered_map<std::string, std::string> macros; /// list of keys in descending order of string size std::list<std::string> orderedKeyList; std::string line = ""; std::string out = ""; int cutAfter = 0; int thisLine = 0; int maxMacroSize = 0; for (char c : raw) { if(c != '\n') line += c; else { thisLine = line.size() + 1; if(line.substr(0, 8) == "#define ") { // position after "#define " int i = 7; while (line[i] == ' ' || line[i] == '\t') i++; line = line.substr(i); i = 0; while (line[i] != ' ' && line[i] != '\t') i++; auto key = line.substr(0, i); auto iter = orderedKeyList.begin(); // sorting performance should not matter since // there shouldnt be many macros anyway while (iter != orderedKeyList.end()) { if(iter->size() < key.size()) break; iter++; } orderedKeyList.insert(iter, key); macros.insert({key, line.substr(i+1)}); } else break; cutAfter += thisLine; line = ""; } } out = raw.substr(cutAfter); // No macros found if (!orderedKeyList.size()) return out; maxMacroSize = orderedKeyList.front().size(); // std::cout<<"maxMacroSize: "<<maxMacroSize<<"\n"; // std::cout<<"orderedKeyList: "; // for (auto s : orderedKeyList) // std::cout<<s<<" "; // std::cout<<"\n"<<std::flush; // find and replace std::ostringstream oss; std::istringstream iss(out); std::list<char> workingBuffer; char charBuffer; // Fill buffer while(workingBuffer.size() < maxMacroSize) { if(!getNextChar(charBuffer, oss, iss, &workingBuffer)) { oss << std::flush; return oss.str(); } workingBuffer.push_back(charBuffer); } do { bool matchFound = false; for(auto key : orderedKeyList) { auto kvp = macros.find(key); assert(kvp != macros.end() && "orderedKeyList should only contain valid keys"); auto value = kvp->second; if(key == listToString(workingBuffer).substr(0, key.size())) { // remove key from workingBuffer for (int i=0; i<key.size(); i++) { // std::cout<<"REMOVE: '"<<workingBuffer.front()<<"'\n"; workingBuffer.pop_front(); } // insert value in to the output // std::cout<<"INSERT: '"<<value<<"'\n"; oss << '\"' << value << '\"'; // refill workingBuffer // same as the one above while(workingBuffer.size() < maxMacroSize) { if(!getNextChar(charBuffer, oss, iss, &workingBuffer)) { break; } else { workingBuffer.push_back(charBuffer); } } matchFound = true; break; } } if(!matchFound) { oss << workingBuffer.front(); // move to next char workingBuffer.pop_front(); // refill workingBuffer // because getNextChar() can empty the buffer // same as the one above while(workingBuffer.size() < maxMacroSize) { if(!getNextChar(charBuffer, oss, iss, &workingBuffer)) { break; } else { workingBuffer.push_back(charBuffer); } } if(workingBuffer.empty()) { oss<<std::flush; return oss.str(); } } } while(true); assert(0 && "Unexpected exit"); oss<<std::flush; return oss.str(); }
/* La función getToken el siguiente token * en el archivo fuente */ TokenType getToken(void) { /* indice para guardar en tokenString */ int tokenStringIndex = 0; /* guarda el token actual para ser retornado */ TokenType currentToken; /* estado actual - siempre comienza con START */ StateType state = START; /* flag para indicar el guardado en tokenString */ int save; /* flags para indicar el estado de un NUM */ int real = 0; int negativo = 0; int cientifico = 0; while (state != DONE) { int c = getNextChar(); save = TRUE; switch (state) { case START: if (isdigit(c) || (c == '-')) state = INNUM; else if (isalpha(c)) state = INID; else if (c == '=') state = INASSIGN; else if ((c == ' ') || (c == '\t') || (c == '\n')) save = FALSE; else if (c == '/') { c = getNextChar(); if (c == '/') { save = FALSE; state = INCOMMENT; } else { ungetNextChar(); state = START; } } else { state = DONE; switch (c) { case EOF: save = FALSE; currentToken = ENDFILE; break; case '<': currentToken = LT; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case '/': currentToken = OVER; break; case '(': currentToken = LPAREN; break; case ')': currentToken = RPAREN; break; case ';': currentToken = SEMI; break; default: currentToken = ERROR; break; } } break; case INCOMMENT: save = FALSE; if (c == EOF) { state = DONE; currentToken = ENDFILE; } else if (c == '\n') state = START; break; case INASSIGN: state = DONE; if (c == '=') currentToken = EQ; else { ungetNextChar(); currentToken =ASSIGN; } break; case INNUM: if (!isdigit(c)) { if(c=='-' && negativo == 0){ negativo = 1; }else if (c=='.' && real == 0){ real = 1; }else if (c=='e' && cientifico == 0){ cientifico = 1; c = getNextChar(); real = 0; if (c=='-'){ negativo = 0; }else{ ungetNextChar(); } }else{ /* backup en el input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = NUM; } } break; case INID: if (!isalpha(c)) { /* backup en el input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = ID; } break; case DONE: default: /* nunca deberia suceder */ fprintf(listing,"Scanner Bug: state= %d\n",state); state = DONE; currentToken = ERROR; break; } if ((save) && (tokenStringIndex <= MAXTOKENLEN)) tokenString[tokenStringIndex++] = (char) c; if (state == DONE) { tokenString[tokenStringIndex] = '\0'; if (currentToken == ID) currentToken = reservedLookup(tokenString); } } if (TraceScan) { fprintf(listing,"\t%d: ",lineno); printToken(currentToken,tokenString); } return currentToken; } /* fin del getToken */
int ecGetRegExpToken(EcInput *input) { EcToken *token, *tp; EcStream *stream; int c; stream = input->stream; tp = token = input->token; mprAssert(tp != 0); initializeToken(tp, stream); addCharToToken(tp, '/'); while (1) { c = getNextChar(stream); switch (c) { case -1: return makeToken(tp, 0, T_ERR, 0); case 0: if (stream->flags & EC_STREAM_EOL) { return makeToken(tp, 0, T_NOP, 0); } return makeToken(tp, 0, T_EOF, 0); case '/': addCharToToken(tp, '/'); while (1) { c = getNextChar(stream); if (c != 'g' && c != 'i' && c != 'm' && c != 'y') { putBackChar(stream, c); break; } addCharToToken(tp, c); } return makeToken(tp, 0, T_REGEXP, 0); case '\\': c = getNextChar(stream); if (c == '\r' || c == '\n' || c == 0) { ecReportError(input->compiler, "warning", stream->name, stream->lineNumber, 0, stream->column, "Illegal newline in regular expression"); return makeToken(tp, 0, T_ERR, 0); } addCharToToken(tp, '\\'); addCharToToken(tp, c); break; case '\r': case '\n': ecReportError(input->compiler, "warning", stream->name, stream->lineNumber, 0, stream->column, "Illegal newline in regular expression"); return makeToken(tp, 0, T_ERR, 0); default: addCharToToken(tp, c); } } }