WicErrors getNextToken(pToken tok) { pTokTab tokTabPtr; WicErrors retVal = ERR_NONE; static long tokAfterDefine = 2; // used to flag '(' in #define x( as a // special parentheses int temp; assert(currTokF >= 0); currTokLen = 0; currTok[currTokLen] = 0; TOK_NUM_AFTER_NEW_LINE++; /* Used for #preprocessor directives */ tokAfterDefine++; /* Used for #preprocessor directives */ g_currLineNum = LINE_NUM; g_currColNum = COL_NUM; /* When getNextToken gets called, STATE may be one of: TS_START, TS_COMMENT. */ temp = skipBlank(); if (STATE == TS_START) { setTokPos( tok->pos, TOK_FILE_NAME, currTokF, LINE_NUM, COL_NUM, LINES_BEFORE, temp, orderLineNum ); while (NEXT_CHAR == '') { getNextChar(); tok->pos->spacesBefore = skipBlank(); } if (isalpha(NEXT_CHAR) || NEXT_CHAR == '_') { if (!scanId()) { char saveChar = NEXT_CHAR; currTokLen = 0; currTok[currTokLen] = 0; getNextChar(); if (saveChar == '"') { retVal = scanStr(tok->data); } else { retVal = scanChar(tok->data); } goto Return; } } else if (isdigit(NEXT_CHAR)) { retVal = scanNum(tok->data); goto Return; } else switch (NEXT_CHAR) { case '\'': getNextChar(); retVal = scanChar(tok->data); goto Return; break; case '"': if (currLineIsInclude) { getNextChar(); retVal = scanIncludeFileName(tok->data, '"'); goto Return; } else { getNextChar(); retVal = scanStr(tok->data); goto Return; break; } case '\n': pushGetNextChar(); currLineIsInclude = 0; break; case '!': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '#': pushGetNextChar(); if (TOK_NUM_AFTER_NEW_LINE == 1) { skipBlank(); if (isalpha(NEXT_CHAR) || NEXT_CHAR == '_') { scanId(); } else { tok->data->code = Y_PRE_NULL; retVal = ERR_NONE; goto Return; } } else { if (NEXT_CHAR == '#') { pushGetNextChar(); } } break; case '%': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '&': pushGetNextChar(); if (NEXT_CHAR == '&') { pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } } break; case '(': pushGetNextChar(); break; case ')': pushGetNextChar(); break; case '*': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '+': pushGetNextChar(); if (NEXT_CHAR == '+') { pushGetNextChar(); } else if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case ',': pushGetNextChar(); break; case '-': pushGetNextChar(); if (NEXT_CHAR == '-') { pushGetNextChar(); } else if (NEXT_CHAR == '=') { pushGetNextChar(); } else if (NEXT_CHAR == '>') { pushGetNextChar(); } break; case '.': pushGetNextChar(); if (NEXT_CHAR == '.') { pushGetNextChar(); if (NEXT_CHAR == '.') { pushGetNextChar(); } else { retVal = RERR_INV_CHAR; goto Return; } } else if (isdigit(NEXT_CHAR)) { if (pushFloatDotExp(tok->data, 1)) { retVal = convStr2Const(tok->data); goto Return; } else { retVal = RERR_INV_CHAR; goto Return; } } break; case '/': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } else if (NEXT_CHAR == '*') { /* comment begin */ popChars(1); STATE = TS_COMMENT; getNextChar(); retVal = scanComment(tok->data); goto Return; } else if (NEXT_CHAR == '/') { popChars(1); STATE = TS_COMMENT; getNextChar(); retVal = scanCPlusPlusComment(tok->data); goto Return; } break; case ':': pushGetNextChar(); if (NEXT_CHAR == '>') { pushGetNextChar(); } break; case ';': pushGetNextChar(); break; case '<': if (currLineIsInclude) { getNextChar(); retVal = scanIncludeFileName(tok->data, '>'); goto Return; } else { pushGetNextChar(); if (NEXT_CHAR == '<') { pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } } else if (NEXT_CHAR == '=') { pushGetNextChar(); } } break; case '=': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '>': pushGetNextChar(); if (NEXT_CHAR == '>') { pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } } else if (NEXT_CHAR == '=') { pushGetNextChar(); } break; case '?': pushGetNextChar(); break; case '[': pushGetNextChar(); break; case ']': pushGetNextChar(); break; case '^': pushGetNextChar(); if (NEXT_CHAR == '=') pushGetNextChar(); break; case '{': pushGetNextChar(); break; case '|': pushGetNextChar(); if (NEXT_CHAR == '=') { pushGetNextChar(); } else if (NEXT_CHAR == '|') { pushGetNextChar(); } break; case '}': pushGetNextChar(); break; case '~': pushGetNextChar(); break; case (char) EOF: tok->data->code = Y_EOF; retVal = ERR_NONE; goto Return; break; default: /* Eat up an ivalid character */ getNextChar(); retVal = RERR_INV_CHAR; goto Return; } tokTabPtr = tabLookup(currTok); if (tokTabPtr != NULL) { tok->data->code = tokTabPtr->code; if (tok->data->code == Y_PRE_INCLUDE) { currLineIsInclude = 1; } if (tok->data->code == Y_PRE_DEFINE) { tokAfterDefine = 0; } if (tok->data->code == Y_LEFT_PAREN && tokAfterDefine == 2) { // the case of #define x(... if (tok->pos->spacesBefore == 0) { tok->data->code = Y_PRE_SPECIAL_LEFT_PAREN; } } tok->data->repr.string = registerString(tokTabPtr->name, !FREE_STRING); } else { if (currTok[0] == '#') { retVal = RERR_INV_PREPROCESSOR; goto Return; } else { tok->data->code = Y_ID; tok->data->repr.string = registerString(wicStrdup(currTok), FREE_STRING); } } } else if (STATE == TS_COMMENT) { setTokPos(tok->pos, TOK_FILE_NAME, currTokF, LINE_NUM, COL_NUM, LINES_BEFORE, 0, orderLineNum); retVal = scanComment(tok->data); goto Return; } else { assert(0); } Return: if (tok->data->code != Y_PRE_NEWLINE) { tok->pos->linesBefore = tok->pos->lineNum - PREV_TOK_LINE_NUM; PREV_TOK_LINE_NUM = tok->pos->lineNum; } else { tok->pos->linesBefore = 0; } zapTokPos(g_currPos); g_currPos = dupTokPos(tok->pos, NULL); return retVal; }
void EBNFScanner::nextToken() { if(currentToken > -1 && currentToken < tokens.size() - 1) { currentToken++; return; } while (hasNextCharacter()) { nextCharacter(); // Scan the Identifier type. [a-zA-Z\-_] if(TokenScannerHelper::isAlpha(currentCharacter)) { Token tok = scanIdentifier(); tokens.append(std::move(tok)); currentToken++; return; } // Scan a literal else if (TokenScannerHelper::isQuote(currentCharacter)) { Token tok = scanLiteral(); tokens.append(std::move(tok)); currentToken++; return; } // Scan an operator Token. else if (TokenScannerHelper::isOperator(currentCharacter)) { if (currentCharacter == '(' && peekCharacter() == '*') { Token tok = scanComment(); tokens.append(std::move(tok)); currentToken++; return; } else { Token tok = scanOperator(); tokens.append(std::move(tok)); currentToken++; return; } } else if (TokenScannerHelper::isWhiteSpace(currentCharacter)) { // Ignore white spaces. } else { raiseError (String("Invalid Character.")); } checkNewLine(); } tokens.append(Token(Token::TypeEof, String(""), createPosition())); currentToken++; }
int GetToken(void) { /* skip white space and comment */ while( isspace(nextch) || ('/' == nextch)) { if (nextch == '\n') { lineno++; } /* skip comment */ if( nextch == '/') { nextch=getc(infile); if(nextch == '/') { /* eat to end of line */ do { nextch=getc(infile); } while(nextch != '\n'); lineno++; } else if(nextch == '*') scanComment(); else { return('/'); } } nextch = getc(infile); } if (nextch == EOF) return( T_EOF); if (isalpha (nextch) || nextch == '_' ) /*scan identifier */ { char *s = lexeme; do { *s = nextch; s++; nextch = getc(infile); } while( isalpha(nextch) || isdigit( nextch ) || (nextch == '_' ) ); *s = '\0'; return ID; } switch (nextch) { case ':' : nextch = getc(infile); return COLON; case ';' : nextch = getc(infile); return SEMI; case '|' : nextch = getc(infile); return BAR; case '(' : nextch = getc(infile); return LPAREN; case ')' : nextch = getc(infile); return RPAREN; case '*' : nextch = getc(infile); return STAR; case '?' : nextch = getc(infile); return QUEST; case '+' : nextch = getc(infile); return PLUS; case '{' : nextch = getc(infile); return LBRACE; case '}' : nextch = getc(infile); return RBRACE; case ',' : nextch = getc(infile); return COMMA; case '%' : nextch = getc(infile); if (nextch == '%') { nextch = getc(infile); strcpy(lexeme, "%%" ); return END_SPEC; } else if(isalpha(nextch)) { char *s = lexeme; do { *s++ = nextch; nextch = getc(infile); } while(isalpha(nextch)); *s = '\0'; if (strcmp(lexeme, "token") == 0) { return TOKEN_SPEC; } else { SyntaxError("Unknown directive"); return( BAD_TOKEN ); } } else return( BAD_TOKEN ); case '\"': /*multichar token "token" or single char "c" */ { char *s = lexeme; while (nextch != EOF) { nextch = toupper(getc(infile)); while (nextch <= ' ' ) nextch = toupper(getc(infile)); if (nextch == '\"'){ nextch = getc(infile); break; } *s = nextch; s++; }; *s = '\0'; if (strlen(lexeme) == 1) { /*single char*/ expand_char(lexeme, lexeme[1]); return CHR_LIT; } else return ID; } /* single char token 'c'*/ case '\'': expand_char(lexeme, getc(infile)); nextch = getc(infile); if (nextch != '\'') { SyntaxError("Single char token must be terminated with \'."); return(BAD_TOKEN); } else { nextch = getc(infile); return CHR_LIT; } } return BAD_TOKEN; }
void Compiler::scanPossibleComment(){ if(tok->getnext() == T_COMMENT){ scanComment(false); } else tok->rewind(); }
void Compiler::scanStmt() { char buf[256]; // temporary buffer int t; switch(tok->getnext()){ case T_IDENT: // it's an expression case T_INT: case T_FLOAT: case T_SUB: case T_BITNOT: case T_PLING: case T_STRING: case T_BACKTICK: case T_OPREN: // deal with debugging words! if(!strcmp(tok->getstring(),"dumplocs")){ cg->emit(OP_SPECIAL,0);break; } if(!strcmp(tok->getstring(),"breakpoint")){ cg->emit(OP_SPECIAL,1);break; } tok->rewind(); // put the token back // scan the expression, might be a label if(!scanExpr(true)) // clear all statements if not a func or other oddity, // or just a dummy for recreation purposes if in immediate mode. cg->emit(cg->isCompiling()?OP_ENDESTMT:OP_ENDESTMT2); break; case T_LOAD: { if(cg->isCompiling()) error("can only run 'load' in interactive mode"); Session *s; if(tok->getnext()!=T_STRING) error("expected a string after 'load'"); try { s = new Session(ses->api); s->feedFile(tok->getstring()); } catch(Exception &e){ delete s; throw e; } delete s; } break; case T_SAVE: { if(cg->isCompiling()) error("can only run 'save' in interactive mode"); if(tok->getnext()!=T_STRING) error("expected a string after 'save'"); const char *fname = tok->getstring(); FILE *a; if(!strlen(fname)) a = stdout; else a = fopen(fname,"w"); if(!a) error("cannot open file '%s'",fname); Serialiser *ser = new Serialiser(ses); ser->write(a); if(strlen(fname)) fclose(a); delete ser; } break; case T_SAVEVAR: { if(cg->isCompiling()) error("can only run 'savevar' in interactive mode"); if(tok->getnext()!=T_IDENT) error("expected a variable name after 'savevar'"); const char *vname = tok->getstring(); int vdesc = lana->consts->findOrCreateString(vname); if(tok->getnext()!=T_STRING) error("expected a string after 'savevar'"); const char *fname = tok->getstring(); // try to get the value Value *v; int id; id = lana->globs->find(vdesc); if(id>=0) { v = lana->globs->get(id); // it's a global } else { id = ses->findSesVar(vdesc); if(id<0) error("variable not found: %s",lana->consts->getStr(vdesc)); v = ses->getSesVar(id); } FILE *a; if(!strlen(fname)) a = stdout; else a = fopen(fname,"w"); if(!a) error("cannot open file '%s'",fname); Serialiser *ser = new Serialiser(ses); ser->serialiseValue(a,v,lana->consts->getStr(vdesc)); if(strlen(fname)) fclose(a); delete ser; } break; case T_FOR: scanFor(); break; case T_ENDFOR: scanEndFor(); break; case T_THIS: tok->rewind(); // put the token back if(!scanExpr(true)) // clear all statements if not a func or other oddity, // or just a dummy for recreation purposes if in immediate mode. cg->emit(cg->isCompiling()?OP_ENDESTMT:OP_ENDESTMT2); break; case T_GOTO: if(!cg->isCompiling()) error("must be compiling a function/procedure to use '%s'",tok->getstring()); scanGoto(); break; case T_ENDFUNC: scanEndFunc(); break; case T_END: if(!(lana->opFlags & LOP_STRIPCOMMENTS)) cg->emit(OP_BLANKLINE); // yes, these are wasteful .. very slightly break; case T_IF: // first we push a special value onto the compiler stack // to mark the start of this if..elseif..elseif..endif cg->current->cpush(-9999); // we scan the expression if(scanExpr()) error("cannot use a function/procedure expression in if"); // stack and output an incomplete if - but this might be a normal if, or a quick if. cg->current->cpushhere(); // now for some cleverness. Is the next token a colon? if(tok->getnext() == T_COLON){ cg->emit(OP_QUICKIF,-100); // if so, parse the next statement recursively scanStmt(); // note that we don't need to output a quick endif, since the recreator // doesn't need it! instruction *ptr = cg->current->cpoplocandcheck(OP_QUICKIF,OP_QUICKIF); // MUST be an OP_IF, no ELSE. if(!ptr) error("not a simple statement in quick-if"); // write the IF, ELSE or ELSEIF again with the correct distance *ptr = INST(INSTOP(*ptr),cg->current->getdiff(ptr)); // now pop off! int n; do{ n = cg->current->cpop(); }while(n!=-9999); } else { // not - put it back! tok->rewind(); cg->emit(OP_IF,-100); } break; case T_ENDIF: { // get the corresponding OP_IF, OP_ELSEIF or OP_ELSE cg->emit(OP_ENDIF); instruction *ptr = cg->current->cpoplocandcheck(OP_IF,OP_ELSE); if(!ptr) error("mismatched endif"); // write the IF, ELSE or ELSEIF again with the correct distance *ptr = INST(INSTOP(*ptr),cg->current->getdiff(ptr)); // now pop and fixup OP_JMPELSEIFs until we get the special -9999 which marked the start for(;;){ int n = cg->current->cpop(); if(n==-9999)break; // we're done! // we're not done - get the code pointer instruction *ptr = cg->current->getPtr(n*sizeof(instruction)); // make sure it's a OP_JMPELSEIF! if(INSTOP(*ptr)!=OP_JMPELSEIF) error("badly formed conditional statement"); // change it so that it jumps to the current location *ptr = INST(INSTOP(*ptr),cg->current->getdiff(ptr)); } } break; case T_ELSEIF: { // pop the instruction off the stack, an OP_IF or OP_ELSEIF instruction *ptr = cg->current->cpoplocandcheck(OP_IF,OP_ELSEIF); // first we need to terminate the previous condition, so // push the location and output a OP_JMPELSEIF ready to fill in. // This will get left on the stack! cg->current->cpushhere(); cg->emit(OP_JMPELSEIF,-100); // now make the IF or ELSEIF we popped jump to this point *ptr = INST(INSTOP(*ptr),cg->current->getdiff(ptr)); // now scan the expression if(scanExpr()) error("cannot use a function/procedure expression in if"); // push the location, and.. // output an OP_ELSEIF with a dummy jump cg->current->cpushhere(); cg->emit(OP_ELSEIF,-100); } break; case T_ELSE: { // write the OP_ELSE which will become a jump forward, // but first recording the location int elseloc = cg->current->getloc(); cg->emit(OP_ELSE,-100); // now we need to make the IF jump to here // get the corresponding OP_IF or OP_ELSEIF instruction *ptr = cg->current->cpoplocandcheck(OP_IF,OP_ELSEIF); if(!ptr) error("mismatched else"); // write the IF again with the correct jump distance int diff = cg->current->getdiff(ptr); *ptr = INST(INSTOP(*ptr),diff); // now push the location of the OP_ELSE, which // will get processed by the OP_ENDIF cg->current->cpush(elseloc); } break; // pop the location case T_RETURN: if(!cg->isCompiling()) error("must be compiling a function/procedure to use '%s'",tok->getstring()); if(tok->getnext() == T_END){ // end of line? tok->rewind(); // no return value if(cg->current->ldth.flags & LDTF_RETURNS) error("functions must return a value"); cg->emit(OP_RETURN,0); } else { tok->rewind(); if(!(cg->current->ldth.flags & LDTF_RETURNS)) error("procedures cannot return a value"); if(scanExpr()) error("cannot directly return a function"); cg->emit(OP_RETURN,1); } break; case T_WHILE: if(!cg->isCompiling()) error("must be compiling a function/procedure to use '%s'",tok->getstring()); // push the current location onto the stack - this is where ENDWHILE will // jump to cg->current->cpushhere(); // we also create and push the loop data here, so that we can use break and // continue! cg->current->newloop(); // scan and output the expression if(scanExpr()) error("cannot use a function/procedure expression in `while`"); // push the WHILE onto the stack so we can write the terminating jump into it cg->current->cpushhere(); // output OP_WHILE with a dummy cg->emit(OP_WHILE,-100); break; case T_ENDWHILE: { // pop the location of the WHILE from the stack instruction *whileptr = cg->current->cpoplocandcheck(OP_WHILE,OP_WHILE); if(!whileptr) error("mismatched endwhile"); // pop the location for the backward jump instruction *jumpdest = cg->current->cpoplocation(); // output the endwhile, which will do the backward jump cg->emit(OP_ENDWHILE,cg->current->getdiff(jumpdest)); // now patch the while instruction with the forward jump to use if the // condition is false *whileptr = INST(OP_WHILE,cg->current->getdiff(whileptr)); /// and end the loop, setting the break label and popping the loop stack cg->current->endloop(); break; } case T_REPEAT: if(!cg->isCompiling()) error("must be compiling a function/procedure to use '%s'",tok->getstring()); // output OP_REPEAT, pushing its location. We don't // jump to here, though - we jump to the following opcode. // This is done just so we can check that the until matches // a repeat. See T_UNTIL. cg->current->cpushhere(); cg->current->newloop(); // push and initialise a new loop stack entry (see T_WHILE above) cg->emit(OP_REPEAT,0); break; case T_UNTIL: { // scan and output the expression if(scanExpr()) error("cannot use a function/procedure expression in `until`"); // pop the location of the OP_REPEAT from the stack instruction *ptr = cg->current->cpoplocandcheck(OP_REPEAT,OP_REPEAT); if(!ptr) error("mismatched `until'"); // increment this, because we want to save cycles by // jumping past the OP_REPEAT (which is a kind of noop) ptr++; // and output the OP_UNTIL jump cg->emit(OP_UNTIL,cg->current->getdiff(ptr)); cg->current->endloop(); // end the current loop stack entry (see T_ENDWHILE above) break; } case T_BREAK: { // we want to break out of the topmost loop on the loop stack // get address we're about to write to instruction *op = cg->current->getlocptr(); // output the break which will be patched later cg->emit(OP_BREAK,-100); // and this jump as a jump to be patched when the break label is resolved LoopData *d = cg->current->loopstack.peekptr(); if(!d) throw ParseException("break with no loop"); d->breaklabel.jumpFrom(op); } break; case T_CONTINUE: { // we want to terminate the current iteration of the topmost loop on the loop stack // and immediately start the loop code again // get address we're about to write to instruction *op = cg->current->getlocptr(); // output the break which will be patched later cg->emit(OP_CONTINUE,-100); // and this jump as a jump to be patched when the break label is resolved LoopData *d = cg->current->loopstack.peekptr(); if(!d) throw ParseException("continue with no loop"); d->continuelabel.jumpFrom(op); } break; case T_COMMENT: scanComment(true); break; default: error("unexpected token '%s'",tok->getstring()); } // see if there's a comment at the end scanPossibleComment(); if(tok->getnext()!=T_END) error("trailing garbage at end of line"); }
void Html::extractCode(OutBuffer *buf) { //printf("Html::extractCode()\n"); dbuf = buf; // save for other routines buf->reserve(end - p); inCode = 0; while (1) { //printf("p = %p, *p = x%x\n", p, *p); switch (*p) { #if 0 // strings are not recognized outside of tags case '"': case '\'': skipString(); continue; #endif case '<': if (p[1] == '!' && isCommentStart()) { // Comments start with <!-- scanComment(); } else if(p[1] == '!' && isCDATAStart()) { scanCDATA(); } else if (p[1] == '/' && istagstart(*skipWhite(p + 2))) skipTag(); else if (istagstart(*skipWhite(p + 1))) skipTag(); else goto Ldefault; continue; case 0: case 0x1a: break; // end of file case '&': if (inCode) { // Translate character entity into ascii for D parser int c; c = charEntity(); buf->writeUTF8(c); } else p++; continue; case '\r': if (p[1] == '\n') goto Ldefault; case '\n': linnum++; // Always extract new lines, so that D lexer counts the // lines right. buf->writeByte(*p); p++; continue; default: Ldefault: if (inCode) buf->writeByte(*p); p++; continue; } break; } buf->writeByte(0); // ending sentinel //printf("D code is: '%s'\n", (char *)buf->data); }
void Html::skipTag() { enum TagState // what parsing state we're in { TStagstart, // start of tag name TStag, // in a tag name TSrest, // following tag name }; enum TagState state = TStagstart; int inot; unsigned char *tagstart = NULL; int taglen = 0; p++; inot = 0; if (*p == '/') { inot = 1; p++; } while (1) { switch (*p) { case '>': // found end of tag p++; break; case '"': case '\'': state = TSrest; skipString(); continue; case '<': if (p[1] == '!' && isCommentStart()) { // Comments start with <!-- scanComment(); } else if (p[1] == '/' && istagstart(*skipWhite(p + 2))) { error("nested tag"); skipTag(); } else if (istagstart(*skipWhite(p + 1))) { error("nested tag"); skipTag(); } // Treat comments as if they were whitespace state = TSrest; continue; case 0: case 0x1a: error("end of file before end of tag"); break; // end of file case '\r': if (p[1] == '\n') goto Ldefault; case '\n': linnum++; // Always extract new lines, so that code lexer counts the // lines right. dbuf->writeByte(*p); state = TSrest; // end of tag p++; continue; case ' ': case '\t': case '\f': case '\v': if (state == TStagstart) { p++; continue; } default: Ldefault: switch (state) { case TStagstart: // start of tag name assert(istagstart(*p)); state = TStag; tagstart = p; taglen = 0; break; case TStag: if (istag(*p)) { // Continuing tag name taglen++; } else { // End of tag name state = TSrest; } break; case TSrest: break; } p++; continue; } break; } // See if we parsed a <code> or </code> tag if (taglen && memicmp((char *) tagstart, (char *) "CODE", taglen) == 0 && *(p - 2) != '/') // ignore "<code />" (XHTML) { if (inot) { inCode--; if (inCode < 0) inCode = 0; // ignore extra </code>'s } else inCode++; } }