/** * Lex the string into tokens, each of which has a given offset into the string. * Lexing is done by the following algorithm: * (1) If the current character is a space, and if it is then check the next: * (a) If it is another space, then the token is a tab. * (b) If it is some other character, the token is a space. * (2) If the current character is a character (either upper or lower case), or a digit, * then continue until the first non-matching character and that is an ident. * (3) If the current character is a #, then ignore everything until the end of the line. * (4) If the current character is a newline, then the token is a newline. * (5) If the current character is a colon, then the token is just a colon. * (6) If the current character is a quote, then read until the endquote and * declare the string as the contents of the string. */ Token* lex(char* input, int len) { Token* first = newToken(0, 0, 0); Token* last = first; int index = 0; while (index < len-1) { //printf("*"); int start = index; char cur = input[index]; if (isSpace(cur)) { if (isSpace(input[index+1])) { index++; addNewToken(last, TAB, start, index); } else { addNewToken(last, SPACE, index, index); } index++; } else if (isTab(cur)) { index++; addNewToken(last, TAB, start, index); } else if (isChar(cur)) { while (isChar(input[++index])); addNewToken(last, IDENT, start, index); } else if (isComment(cur)) { while (!isNewLine(input[++index])); } else if (isNewLine(cur)) { index++; addNewToken(last, NEWLINE, index, index); } else if (isColon(cur)) { index++; addNewToken(last, COLON, index, index); } else if (isQuote(cur)) { while (!isQuote(input[++index])); addNewToken(last, STRING, start+1, index); index++; /* Pass by the end quote. */ } if (last->next != NULL) last = last->next; } addNewToken(last, NEWLINE, index, index); return first->next; }
void Token::print() const { if( eol() ) std::cout << "NEWLINE" ; else if( eof() ) std::cout << "ENDMARKER" ; else if( indent() ) std::cout << "INDENT"; else if( dedent() ) std::cout << "DEDENT"; else if( isOpenBrace() ) std::cout << " { "; else if( isCloseBrace() ) std::cout << " } "; else if( isComma() ) std::cout << " , "; else if( isPeriod()) std::cout<< "."; else if( isEqual() ) std::cout << " == "; else if( isNotEqual() ) std::cout << " != "; else if( isLessThan() ) std::cout << " < "; else if( isGreaterThan() ) std::cout << " > "; else if( isLessThanEqual() ) std::cout << " <= "; else if( isGreaterThanEqual() ) std::cout << " >= "; else if( isOpenParen() ) std::cout << " ( " ; else if( isCloseParen() ) std::cout << " ) " ; else if( isAssignmentOperator() ) std::cout << " = "; else if( isColon() ) std::cout << " : " ; else if( isMultiplicationOperator() ) std::cout << " * " ; else if( isAdditionOperator() ) std::cout << " + "; else if( isSubtractionOperator() ) std::cout << " - "; else if( isModuloOperator() ) std::cout << " % "; else if( isDivisionOperator() ) std::cout << " / "; else if( isFloorDivision() ) std::cout << " // "; else if( isOpenBrack() ) std::cout<< "["; else if( isCloseBrack() ) std::cout<< "]"; else if( isName() ) std::cout << getName(); else if( isKeyword() ) std::cout << getKeyword(); else if( isWholeNumber() ) std::cout << getWholeNumber(); else if( isFloat() ) std::cout << getFloat(); else if( isString() ) std::cout << getString(); else if( isCall() ) std::cout << "CALL " << getName(); else if( isSub() ) std::cout << "ARRAY SUB " << getName(); else if( isAppend() ) std::cout << "ARRAY APPEND " << getName(); else if( isPop() ) std::cout << "ARRAY POP " << getName(); else std::cout << "Uninitialized token.\n"; }
bool CDateTime::parse_chinese(string text) { if(text.length() == 0) return false; //System.out.println("========"+text); int start=0; int g[12]; int group; while(true) { group=0; for(int i=0;i<6;i++) { g[i*2] = findDigit(text, start); //第一个数字 if(g[i*2]==-1) break; group++; g[i*2+1] = findNonDigit(text, g[i*2]); if(g[i*2+1]==-1) { //没有非数字字符或者上一组的非数字到本组数字距离超过一个字符 if(i==0|| textLen(text,g[i*2-1],g[i*2])>1) { group--; break; } } start=g[2*i+1]; //非数字 if(start==-1) break; if(i>0) { if( textLen(text,g[i*2-1],g[i*2])>1) { group--; break; } } } if(group>1) break; if(group==0) return false; start=g[1]; if(start==-1) return false; } //while string strb; string datestr; switch(group) { case 6: { if(g[1]-g[0]==2) strb.append("yy"); else strb.append("yyyy"); strb.append(text.substr(g[1], g[2]-g[1])).append("MM"). append(text.substr(g[3], g[4]-g[3])).append("dd"). append(text.substr(g[5], g[6]-g[5])).append("HH"). append(text.substr(g[7], g[8]-g[7])).append("mm"). append(text.substr(g[9], g[10]-g[9])).append("ss"); if (g[11] == -1) datestr=text.substr(g[0],text.length()); else datestr=text.substr(g[0],g[11]-g[0]); break; } case 4: { if(isColon(text,g[5],g[6]) ) { strb.append("MM"). append(text.substr(g[1], g[2]-g[1])).append("dd"). append(text.substr(g[3], g[4]-g[3])).append("HH") .append(text.substr(g[5], g[6]-g[5])).append("mm"); } else { if(g[1]-g[0]==2)strb.append("yy"); else strb.append("yyyy"); strb.append(text.substr(g[1], g[2]-g[1])).append("MM"). append(text.substr(g[3], g[4]-g[3])).append("dd") .append(text.substr(g[5], g[6]-g[5])).append("HH"); } if (g[7] == -1) datestr=text.substr(g[0],text.length()); else datestr=text.substr(g[0],g[7]-g[0]); break; } case 5: { if(isColon(text,g[5],g[6]) ) { strb.append("MM"). append(text.substr(g[1], g[2]-g[1])).append("dd"). append(text.substr(g[3], g[4]-g[3])).append("HH"). append(text.substr(g[5], g[6]-g[5])).append("mm") .append(text.substr(g[7], g[8]-g[7])).append("ss"); } else { if(g[1]-g[0]==2)strb.append("yy"); else strb.append("yyyy"); strb.append(text.substr(g[1], g[2]-g[1])).append("MM"). append(text.substr(g[3], g[4]-g[3])).append("dd"). append(text.substr(g[5], g[6]-g[5])).append("HH") .append(text.substr(g[7], g[8]-g[7])).append("mm"); } if (g[9] == -1) datestr=text.substr(g[0],text.length()); else datestr=text.substr(g[0],g[9]-g[0]); break; } case 3: { if(isColon(text,g[3],g[4]) ) { strb.append("MM"). append(text.substr(g[1], g[2]-g[1])).append("dd") .append(text.substr(g[3], g[4]-g[3])).append("HH"); } else { if(g[1]-g[0]==2)strb.append("yy"); else strb.append("yyyy"); strb.append(text.substr(g[1], g[2]-g[1])).append("MM") .append(text.substr(g[3], g[4]-g[3])).append("dd"); } if (g[5] == -1) datestr=text.substr(g[0],text.length()); else datestr=text.substr(g[0],g[5]-g[0]); break; } case 2: { strb.append("MM").append(text.substr(g[1], g[2]-g[1])).append("dd"); if (g[3] == -1) datestr = text.substr(g[0],text.length()); else datestr = text.substr(g[0], g[3]-g[0]); break; } } //SpiderDateFormat df(strb); return 0;//df.parse(datestr,date); }