/** * Lex the string into tokens, each of which has a given offset into the string. * Lexing is done by the following algorithm: * (1) If the current character is a space, and if it is then check the next: * (a) If it is another space, then the token is a tab. * (b) If it is some other character, the token is a space. * (2) If the current character is a character (either upper or lower case), or a digit, * then continue until the first non-matching character and that is an ident. * (3) If the current character is a #, then ignore everything until the end of the line. * (4) If the current character is a newline, then the token is a newline. * (5) If the current character is a colon, then the token is just a colon. * (6) If the current character is a quote, then read until the endquote and * declare the string as the contents of the string. */ Token* lex(char* input, int len) { Token* first = newToken(0, 0, 0); Token* last = first; int index = 0; while (index < len-1) { //printf("*"); int start = index; char cur = input[index]; if (isSpace(cur)) { if (isSpace(input[index+1])) { index++; addNewToken(last, TAB, start, index); } else { addNewToken(last, SPACE, index, index); } index++; } else if (isTab(cur)) { index++; addNewToken(last, TAB, start, index); } else if (isChar(cur)) { while (isChar(input[++index])); addNewToken(last, IDENT, start, index); } else if (isComment(cur)) { while (!isNewLine(input[++index])); } else if (isNewLine(cur)) { index++; addNewToken(last, NEWLINE, index, index); } else if (isColon(cur)) { index++; addNewToken(last, COLON, index, index); } else if (isQuote(cur)) { while (!isQuote(input[++index])); addNewToken(last, STRING, start+1, index); index++; /* Pass by the end quote. */ } if (last->next != NULL) last = last->next; } addNewToken(last, NEWLINE, index, index); return first->next; }
void Tokenizer::absorbed(std::streambuf::int_type ch) { if (isTab(ch)) { mCurrentColumn += nTabSize - mCurrentColumn % nTabSize; } else if (isEOL(ch)) { ++mCurrentLine; mCurrentColumn = 0; if (!eof()) { int nch = mpInput->peek(); if (isEOL(nch) && (nch != ch)) mpInput->get(); } } else { ++mCurrentColumn; } }
/*======================================================================* out_char *======================================================================*/ PUBLIC void out_char(CONSOLE* p_con, char ch) { /*if(clear){ p_con->cursor=p_con->original_addr; clear=0; }*/ u8* p_vmem = (u8*)(V_MEM_BASE + p_con->cursor * 2); // last_cursor[0]=p_con->original_addr; int i=0; for(i;i<count;i++){ if(last_cursor[i][0]==p_con->original_addr) break; } if(i==count){ int j=0; for(;j<100;j++){ last_cursor[i][j]=0; } size[i]=0; last_cursor[i][0]=p_con->original_addr; count++; } /* unsigned int current_start_addr; 当前显示到了什么位置 unsigned int original_addr; 当前控制台对应显存位置 unsigned int v_mem_limit; 当前控制台占的显存大小 unsigned int cursor; 当前光标位置 */ //last_cursor[i][++size[i]]=p_con->cursor; // disp_str(shit+'0'); // last_cursor[i][shit]=i; int j=0; switch(ch) { case '\n': if (p_con->cursor < p_con->original_addr + p_con->v_mem_limit - SCREEN_WIDTH) { last_cursor[i][++size[i]]=p_con->cursor; p_con->cursor = p_con->original_addr + SCREEN_WIDTH * ((p_con->cursor - p_con->original_addr) / SCREEN_WIDTH + 1); } break; case '\b': if (p_con->cursor > p_con->original_addr) { if(isTab(p_con)){ p_con->cursor=p_con->cursor-4; *(p_vmem-2) = ' '; *(p_vmem-1) = DEFAULT_CHAR_COLOR; tabSize--; } else{ if((p_con->cursor-p_con->original_addr)%SCREEN_WIDTH==0){ if(size[i]>=1&&size[i]<=99){ p_con->cursor=last_cursor[i][size[i]--]; }else p_con->cursor=last_cursor[i][0]; }else p_con->cursor--; *(p_vmem-2) = ' '; *(p_vmem-1) = DEFAULT_CHAR_COLOR; } } break; case '\t': p_con->cursor+=4; lastTab[tabSize]=p_con->cursor; tabSize++; break; default: if (p_con->cursor < p_con->original_addr + p_con->v_mem_limit - 1) { *p_vmem++ = ch; //*p_vmem++ = DEFAULT_CHAR_COLOR; if(color==2){ *p_vmem++ = F2_COLOR; }else if(color==3){ *p_vmem++ = F3_COLOR; //blue }else if(color==4){ *p_vmem++ = F4_COLOR; }else if(color==5){//green *p_vmem++ = F5_COLOR; }else *p_vmem++=DEFAULT_CHAR_COLOR; //last_cursor= p_con->cursor; p_con->cursor++; } break; } /* if(tab){ lastTab[tabSize]=p_con->cursor; tabSize++; tab=0; }*/ while (p_con->cursor >= p_con->current_start_addr + SCREEN_SIZE) { scroll_screen(p_con, SCR_DN); } flush(p_con); }
Token* Tokenizer::getToken() { int startRow = row; int startCol = col; QChar c = getChar(); // get and store the next character from the string // catch the end of the input string if (atEnd) return new Token(Token::EndOfInput, "END", row, col, row, col); int cType = translator->look2type(c); // since we need to know it often we store it // catch spaces if (isSpace(c)) { QString look; do { look += (isTab(c) ? " " : " "); c = getChar(); } while (isSpace(c) && !atEnd); ungetChar(); return new Token(Token::WhiteSpace, look, startRow, startCol, row, col); } // catch EndOfLine's if (isBreak(c)) { return new Token(Token::EndOfLine, "\\n", startRow, startCol, startRow+1, 1); } // catch comments if (cType == Token::Comment) { QString look; do { look += c; c = getChar(); } while (!isBreak(c) && !atEnd); ungetChar(); return new Token(Token::Comment, look, startRow, startCol, row, col); } // catch strings if (cType == Token::StringDelimiter) { QString look = QString(c); do { c = getChar(); look += c; } while (!(translator->look2type(c) == Token::StringDelimiter && look.right(2) != "\\\"") && !isBreak(c) && !atEnd); return new Token(Token::String, look, startRow, startCol, row, col); } // catch variables if (cType == Token::VariablePrefix) { QString look; do { look += c; c = getChar(); } while (isWordChar(c) || c.category() == QChar::Number_DecimalDigit || c == '_'); ungetChar(); return new Token(Token::Variable, look, startRow, startCol, row, col); } // catch words (known commands or function calls) if (isWordChar(c)) { // first char has to be a letter QString look; do { look += c; c = getChar(); } while (isWordChar(c) || c.isDigit() || c == '_'); // next chars ungetChar(); int type = translator->look2type(look); if (type == Token::Unknown) type = Token::FunctionCall; return new Token(type, look, startRow, startCol, row, col); } // catch numbers if (c.isDigit() || cType == Token::DecimalSeparator) { bool hasDot = false; int localType = cType; QString look; do { if (localType == Token::DecimalSeparator) hasDot = true; look += c; c = getChar(); localType = translator->look2type(c); } while (c.isDigit() || (localType == Token::DecimalSeparator && !hasDot)); ungetChar(); // if all we got is a dot then this is not a number, so return an Error token here if (translator->look2type(look) == Token::DecimalSeparator) return new Token(Token::Error, look, startRow, startCol, row, col); return new Token(Token::Number, look, startRow, startCol, row, col); } // catch previously uncatched 'double charactered tokens' (tokens that ar not in letters, like: == != >= <=) { QString look = QString(c).append(getChar()); int type = translator->look2type(look); if (type != Token::Unknown) return new Token(type, look, startRow, startCol, row, col); ungetChar(); } // catch known tokens of a single character (as last...) if (cType != Token::Unknown) return new Token(cType, static_cast<QString>(c), startRow, startCol, row, col); // this does not neglect calls to functions with a name of length one (checked it) return new Token(Token::Error, static_cast<QString>(c), startRow, startCol, row, col); }
bool Tokenizer::isSpace(const QChar& c) { return (c.category() == QChar::Separator_Space || c == ' ' || isTab(c)); }