示例#1
0
/**
 * Lex the string into tokens, each of which has a given offset into the string.
 * Lexing is done by the following algorithm:
 *  (1) If the current character is a space, and if it is then check the next:
 *  	(a) If it is another space, then the token is a tab.
 *	(b) If it is some other character, the token is a space.
 *  (2) If the current character is a character (either upper or lower case), or a digit,
 *	 then continue until the first non-matching character and that is an ident.
 *  (3) If the current character is a #, then ignore everything until the end of the line.
 *  (4) If the current character is a newline, then the token is a newline.
 *  (5) If the current character is a colon, then the token is just a colon.
 *  (6) If the current character is a quote, then read until the endquote and
 *	 declare the string as the contents of the string.
 */
Token* lex(char* input, int len) {
    Token* first = newToken(0, 0, 0);
    Token* last = first;
    int index = 0;
    while (index < len-1) {
        //printf("*");
        int start = index;
        char cur = input[index];
        if (isSpace(cur)) {
            if (isSpace(input[index+1])) {
                index++;
                addNewToken(last, TAB, start, index);
            } else {
                addNewToken(last, SPACE, index, index);
            }
            index++;
        } else if (isTab(cur)) {
            index++;
            addNewToken(last, TAB, start, index);
        } else if (isChar(cur)) {
            while (isChar(input[++index]));
            addNewToken(last, IDENT, start, index);
        } else if (isComment(cur)) {
            while (!isNewLine(input[++index]));
        } else if (isNewLine(cur)) {
            index++;
            addNewToken(last, NEWLINE, index, index);
        } else if (isColon(cur)) {
            index++;
            addNewToken(last, COLON, index, index);
        } else if (isQuote(cur)) {
            while (!isQuote(input[++index]));
            addNewToken(last, STRING, start+1, index);
            index++; /* Pass by the end quote. */
        }
        if (last->next != NULL)
            last = last->next;
    }
    addNewToken(last, NEWLINE, index, index);

    return first->next;
}
示例#2
0
void Tokenizer::absorbed(std::streambuf::int_type ch)
{
    if (isTab(ch))
    {
        mCurrentColumn += nTabSize - mCurrentColumn % nTabSize;
    }
    else if (isEOL(ch))
    {
        ++mCurrentLine;
        mCurrentColumn = 0;
        if (!eof())
        {
            int nch = mpInput->peek();
            if (isEOL(nch) && (nch != ch))
                mpInput->get();
        }
    }
    else
    {
        ++mCurrentColumn;
    }
}
示例#3
0
文件: console.c 项目: LuGaXu/OS_LAB
/*======================================================================*
			   out_char
 *======================================================================*/
PUBLIC void out_char(CONSOLE* p_con, char ch)
{
        
        /*if(clear){
               p_con->cursor=p_con->original_addr;
               clear=0;
        }*/

         u8* p_vmem = (u8*)(V_MEM_BASE + p_con->cursor * 2);
       // last_cursor[0]=p_con->original_addr;
        int i=0;
        for(i;i<count;i++){
             if(last_cursor[i][0]==p_con->original_addr)
                     break;
        }
        if(i==count){
             int j=0;
             for(;j<100;j++){
                    last_cursor[i][j]=0;
             }
             size[i]=0;
             last_cursor[i][0]=p_con->original_addr;
             count++;
        }

/*
	unsigned int	current_start_addr;	 当前显示到了什么位置	  
	unsigned int	original_addr;		 当前控制台对应显存位置 
	unsigned int	v_mem_limit;		 当前控制台占的显存大小 
	unsigned int	cursor;			 当前光标位置 
*/
        //last_cursor[i][++size[i]]=p_con->cursor;

        // disp_str(shit+'0');
       // last_cursor[i][shit]=i;

        int j=0;
	switch(ch) {
	case '\n':
		if (p_con->cursor < p_con->original_addr +
		    p_con->v_mem_limit - SCREEN_WIDTH) {
                       last_cursor[i][++size[i]]=p_con->cursor;
			p_con->cursor = p_con->original_addr + SCREEN_WIDTH * 
				((p_con->cursor - p_con->original_addr) /
				 SCREEN_WIDTH + 1);
		}
		break;
	case '\b':
		if (p_con->cursor > p_con->original_addr) {
                        if(isTab(p_con)){
                            p_con->cursor=p_con->cursor-4;
                            *(p_vmem-2) = ' ';
			    *(p_vmem-1) = DEFAULT_CHAR_COLOR;
                            tabSize--;
                        }
                        else{ 
                          if((p_con->cursor-p_con->original_addr)%SCREEN_WIDTH==0){
                              if(size[i]>=1&&size[i]<=99){
                                     p_con->cursor=last_cursor[i][size[i]--];
                               }else
                                  p_con->cursor=last_cursor[i][0];
                              
                           }else
			       p_con->cursor--;
                        
			*(p_vmem-2) = ' ';
			*(p_vmem-1) = DEFAULT_CHAR_COLOR;
                        }
		}
		break;
        case '\t':
                   p_con->cursor+=4;
                   lastTab[tabSize]=p_con->cursor;
                   tabSize++;
               break;
	default:
		if (p_con->cursor <
		    p_con->original_addr + p_con->v_mem_limit - 1) {
			*p_vmem++ = ch;
                        //*p_vmem++ = DEFAULT_CHAR_COLOR;
                        if(color==2){
			   *p_vmem++ = F2_COLOR; 
                         }else if(color==3){
			   *p_vmem++ = F3_COLOR; //blue
                         }else if(color==4){
			   *p_vmem++ = F4_COLOR;                            
                         }else if(color==5){//green
			   *p_vmem++ = F5_COLOR;                          
                         }else
			   *p_vmem++=DEFAULT_CHAR_COLOR;   
                        //last_cursor= p_con->cursor;                     
                        p_con->cursor++;
		}
		break;
	}

       /* if(tab){
              lastTab[tabSize]=p_con->cursor;
              tabSize++;
              tab=0;
        }*/

	while (p_con->cursor >= p_con->current_start_addr + SCREEN_SIZE) {
		scroll_screen(p_con, SCR_DN);
	}

	flush(p_con);
}
示例#4
0
Token* Tokenizer::getToken()
{
	int startRow = row;
	int startCol = col;

	QChar c = getChar();  // get and store the next character from the string

	// catch the end of the input string
	if (atEnd)
		return new Token(Token::EndOfInput, "END", row, col, row, col);

	int cType = translator->look2type(c);  // since we need to know it often we store it

	// catch spaces
	if (isSpace(c)) {
		QString look;
		do {
			look += (isTab(c) ? "  " : " ");
			c = getChar();
		} while (isSpace(c) && !atEnd);
		ungetChar();
		return new Token(Token::WhiteSpace, look, startRow, startCol, row, col);
	}

	// catch EndOfLine's
	if (isBreak(c)) {
		return new Token(Token::EndOfLine, "\\n", startRow, startCol, startRow+1, 1);
	}

	// catch comments
	if (cType == Token::Comment) {
		QString look;
		do {
			look += c;
			c = getChar();
		} while (!isBreak(c) && !atEnd);
		ungetChar();
		return new Token(Token::Comment, look, startRow, startCol, row, col);
	}

	// catch strings
	if (cType == Token::StringDelimiter) {
		QString look = QString(c);
		do {
			c = getChar();
			look += c;
		} while (!(translator->look2type(c) == Token::StringDelimiter && look.right(2) != "\\\"") &&
		         !isBreak(c) && !atEnd);
		return new Token(Token::String, look, startRow, startCol, row, col);
	}

	// catch variables
	if (cType == Token::VariablePrefix) {
		QString look;
		do {
			look += c;
			c = getChar();
		} while (isWordChar(c) || c.category() == QChar::Number_DecimalDigit || c == '_');
		ungetChar();
		return new Token(Token::Variable, look, startRow, startCol, row, col);
	}

	// catch words (known commands or function calls)
	if (isWordChar(c)) {  // first char has to be a letter
		QString look;
		do {
			look += c;
			c = getChar();
		} while (isWordChar(c) || c.isDigit() || c == '_');  // next chars
		ungetChar();
		int type = translator->look2type(look);
		if (type == Token::Unknown)
			type = Token::FunctionCall;
		return new Token(type, look, startRow, startCol, row, col);
	}

	// catch numbers
	if (c.isDigit() || cType == Token::DecimalSeparator) {
		bool hasDot = false;

		int localType = cType;
		QString look;
		do {
			if (localType == Token::DecimalSeparator) hasDot = true;
			look += c;
			c = getChar();
			localType = translator->look2type(c);
		} while (c.isDigit() || (localType == Token::DecimalSeparator && !hasDot));
		ungetChar();
		
		// if all we got is a dot then this is not a number, so return an Error token here
		if (translator->look2type(look) == Token::DecimalSeparator)
			return new Token(Token::Error, look, startRow, startCol, row, col);
		
		return new Token(Token::Number, look, startRow, startCol, row, col);
	}

	// catch previously uncatched 'double charactered tokens' (tokens that ar not in letters, like: == != >= <=)
	{
		QString look = QString(c).append(getChar());
		int type = translator->look2type(look);
		if (type != Token::Unknown)
			return new Token(type, look, startRow, startCol, row, col);
		ungetChar();
	}

	// catch known tokens of a single character (as last...)
	if (cType != Token::Unknown)
		return new Token(cType, static_cast<QString>(c), startRow, startCol, row, col);

	// this does not neglect calls to functions with a name of length one (checked it)
	return new Token(Token::Error, static_cast<QString>(c), startRow, startCol, row, col);
}
示例#5
0
bool Tokenizer::isSpace(const QChar& c)
{
	return (c.category() == QChar::Separator_Space || c == ' ' || isTab(c));
}