RegExpressionT *B(char *e, int *place) { int input; RegExpressionT *ex, *cex; input = NextCharacter(e, *place); switch(input) { default: if (!ValidForB(input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; } else ex = C(e, place); } if (!ex) return NULL; input = NextCharacter(e, *place); switch(input) { case '*': AdvanceParser(e, place); cex = NewRegularExpression(); cex->type = KLEENE; cex->term = ex; return cex; case '+': AdvanceParser(e, place); cex = NewRegularExpression(); cex->type = POSCLOSE; cex->term = ex; return cex; case '?': AdvanceParser(e, place); cex = NewRegularExpression(); cex->type = OPTION; cex->term = ex; return cex; case '|': case '\0': case ')': return ex; default: if (!ValidForB(input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif FreeRegularExpression(ex); return NULL; } else return ex; } }
RegExpressionT *D(char *e, int *place) { int input, peek, nextchar, result, invert = 0; RegExpressionT *ex; CharSetT *cs; input = NextCharacter(e, *place); if (input == '^') { invert = 1; AdvanceParser(e, place); } cs = CreateCharSet(); while(1) { input = NextCharacter(e, *place); switch(input) { case ']': if (invert) InvertCharSet(cs); ex = NewRegularExpression(); ex->type = CHARSET; ex->charset = cs; return ex; default: if (!ValidForD(input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif free (cs); return NULL; } if (input == '\\') peek = 2; else if (input == '#') peek = 4; else peek = 1; nextchar = PeekCharacter(e, place, peek); if (nextchar == '-') result = AddRange(e, place, cs); else result = AddSymbol(e, place, cs); if (result == 0) { #if ERR_MESG fprintf(stderr, "Failed to create character set.\n"); fprintf(stderr, "%s == at %d\n", e, *place); #endif free (cs); return NULL; } } } return NULL; }
int PeekCharacter(char *string, int *place, int steps) { int old = *place, value, i; for (i = 0; i < steps; i++) AdvanceParser(string, place); value = NextCharacter(string, *place); *place = old; return value; }
RegExpressionT *C(char *e, int *place) { int input; RegExpressionT *ex; input = NextCharacter(e, *place); switch(input) { case '[': AdvanceParser(e, place); ex = D(e, place); if (!ex) return NULL; if (!(NextCharacter(e, *place) == ']')) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif FreeRegularExpression(ex); return NULL; } AdvanceParser(e, place); return ex; case '(': AdvanceParser(e, place); ex = P(e, place); if (!ex) return NULL; if (!(NextCharacter(e, *place) == ')')) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif FreeRegularExpression(ex); return NULL; } AdvanceParser(e, place); return ex; default: if (!ValidForC(input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; } else return S(e, place); } return NULL; }
RegExpressionT *P(char *e, int *place) { int input; RegExpressionT *ex, *uex; input = NextCharacter(e, *place); switch(input) { case ')': return (NewRegularExpression()); /* empty expression = empty string (defined) */ default: if (!ValidForE(input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; } else ex = A(e, place); break; } if (!ex) return NULL; input = NextCharacter(e, *place); switch(input) { case ')': return ex; /* end of parenthesized subexpression */ case '|': AdvanceParser(e, place); /* continue with unioned expression */ uex = NewRegularExpression(); uex->type = UNION; uex->term = ex; uex->next = P(e, place); if (!uex->next) { FreeRegularExpression(uex); return NULL; } else return uex; default: FreeRegularExpression(ex); #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; /* indicates no other input accepted */ } return NULL; }
RegExpressionT *A(char *e, int *place) { int input; RegExpressionT *ex, *cex; input = NextCharacter(e, *place); switch (input) { default: if (!ValidForA(input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; } else ex = B(e, place); break; } if (!ex) return NULL; input = NextCharacter(e, *place); switch(input) { case '|': case '\0': case ')': return ex; /* end of concatenation */ default: if (!ValidForA(input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif FreeRegularExpression(ex); return NULL; } cex = NewRegularExpression(); cex->type = CONCAT; cex->term = ex; cex->next = A(e, place); if (!cex->next) { FreeRegularExpression(cex); return NULL; } else return cex; } return NULL; }
//----------------------------------------------------------------------------- TOKEN QuotedToken(READFILE Stream,char OpeningQuote,TokenType Type) { static SuperString LocalValue; int Index; Index = 0; LocalValue[Index] = OpeningQuote; do { IncrementTokenIndex(Stream,&Index); LocalValue[Index] = NextCharacter(Stream); //----Check legality - only visibles and only quote and escape escaped if (LocalValue[Index] < ' ' || LocalValue[Index] > '~') { CharacterError(Stream); } if (LocalValue[Index] == '\\') { IncrementTokenIndex(Stream,&Index); LocalValue[Index] = NextCharacter(Stream); if (LocalValue[Index] != OpeningQuote && LocalValue[Index] != '\\') { CharacterError(Stream); } } } while (LocalValue[Index] != OpeningQuote || LocalValue[Index-1] == '\\'); IncrementTokenIndex(Stream,&Index); LocalValue[Index] = '\0'; //----Strip '' quotes from regular lower words if (LocalValue[0] == '\'' && islower(LocalValue[1])) { Index = 1; //----Make sure it's legal without the ''s while (isalnum(LocalValue[Index]) || LocalValue[Index] == '_') { Index++; } if (Index == strlen(LocalValue) -1) { LocalValue[Index] = '\0'; return(BuildToken(lower_word,&(LocalValue[1]))); } } return(BuildToken(Type,LocalValue)); }
int AddSymbol(char *e, int *place, CharSetT *cs) { int input, symbol, number[3]; input = NextCharacter(e, *place); symbol = input; AdvanceParser(e, place); if (input == '\\') { symbol = NextCharacter(e, *place); AdvanceParser(e, place); } if (input == '#') { number[0] = NextCharacter(e, *place); AdvanceParser(e, place); number[1] = NextCharacter(e, *place); AdvanceParser(e, place); number[2] = NextCharacter(e, *place); AdvanceParser(e, place); if (!isdigit(number[0]) || !isdigit(number[1]) || !isdigit(number[2])) return 0; else symbol = ((number[0] - '0') * 100 + (number[1] - '0') * 10 + number[2] - '0'); if (symbol > 255) return 0; } AddCharacter(symbol, cs); return 1; }
//----------------------------------------------------------------------------- TOKEN GetNextToken(READFILE Stream) { int CurrentChar,PreviousChar; int Index; //----static so it doesn't have to get allocated everytime (very often!) static SuperString LocalValue; //DEBUG printf("char was ==%c==\n",CurrentCharacter(Stream)); if (Stream->Overshot) { //DEBUG printf("overshot\n"); CurrentChar = CurrentCharacter(Stream); } else { //DEBUG printf("get next\n"); CurrentChar = NextCharacter(Stream); } Stream->Overshot = 0; //----Skip whitespace while (isspace(CurrentChar)) { PreviousChar = CurrentChar; CurrentChar = NextCharacter(Stream); //----Check for a blank line, if required if (Stream->NeedNonLogicTokens && PreviousChar == '\n' && CurrentChar == '\n') { return(BuildToken(blank_line_token,"")); } } //DEBUG printf("char is ==%c==\n",CurrentChar); switch (CurrentChar) { case '/': Index = 0; LocalValue[Index++] = CurrentChar; PreviousChar = CurrentChar; CurrentChar = NextCharacter(Stream); if (CurrentChar == '*') { LocalValue[Index] = CurrentChar; while (CurrentChar != EOF && (CurrentChar != '/' || PreviousChar != '*')) { PreviousChar = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); LocalValue[Index] = CurrentChar; } if (CurrentChar == '/') { //----Add eoln if it's there CurrentChar = NextCharacter(Stream); if (CurrentChar == '\n') { IncrementTokenIndex(Stream,&Index); LocalValue[Index] = CurrentChar; } else { Stream->Overshot = 1; } IncrementTokenIndex(Stream,&Index); LocalValue[Index] = '\0'; if (Stream->NeedNonLogicTokens) { return(BuildToken(comment_token,LocalValue)); } else { return(GetNextToken(Stream)); } } else { CharacterError(Stream); } } else { CharacterError(Stream); } break; case '%': case '#': if (Stream->NeedNonLogicTokens) { Index = 0; do { LocalValue[Index] = CurrentChar; IncrementTokenIndex(Stream,&Index); CurrentChar = NextCharacter(Stream); } while (CurrentChar != '\n' && CurrentChar != EOF); LocalValue[Index] = '\0'; Stream->Overshot = 1; return(BuildToken(comment_token,LocalValue)); } else { //----Discard sequences of comments (recursive approach gave stack overflow) do { while (CurrentChar != '\n' && CurrentChar != EOF) { CurrentChar = NextCharacter(Stream); } CurrentChar = NextCharacter(Stream); } while (CurrentChar == '%' || CurrentChar == '#'); Stream->Overshot = 1; return(GetNextToken(Stream)); } break; case '\'': return(QuotedToken(Stream,'\'',lower_word)); break; case '(': return(BuildToken(punctuation,"(")); break; case ')': return(BuildToken(punctuation,")")); break; case '[': return(BuildToken(punctuation,"[")); break; case ']': return(BuildToken(punctuation,"]")); break; case '!': CurrentChar = NextCharacter(Stream); if (CurrentChar == '=') { return(BuildToken(lower_word,"!=")); } else if (CurrentChar == '>') { return(BuildToken(quantifier,"!>")); } else if (CurrentChar == '!') { return(BuildToken(unary_connective,"!!")); } else { Stream->Overshot = 1; return(BuildToken(quantifier,"!")); } break; case '?': CurrentChar = NextCharacter(Stream); if (CurrentChar == '*') { return(BuildToken(quantifier,"?*")); } else if (CurrentChar == '?') { return(BuildToken(unary_connective,"??")); } else { Stream->Overshot = 1; return(BuildToken(quantifier,"?")); } break; case '^': return(BuildToken(quantifier,"^")); break; case '.': return(BuildToken(punctuation,".")); break; case ':': return(BuildToken(punctuation,":")); break; case ',': return(BuildToken(punctuation,",")); break; case '<': CurrentChar = NextCharacter(Stream); if (CurrentChar == '='){ CurrentChar = NextCharacter(Stream); if (CurrentChar == '>') { return(BuildToken(binary_connective,"<=>")); } else { Stream->Overshot = 1; return(BuildToken(binary_connective,"<=")); } } else if (CurrentChar == '~') { CurrentChar = NextCharacter(Stream); if (CurrentChar == '>') { return(BuildToken(binary_connective,"<~>")); } else { CharacterError(Stream); } } else if (CurrentChar == '<') { return(BuildToken(punctuation,"<<")); } else { CharacterError(Stream); } break; case '=': CurrentChar = NextCharacter(Stream); if (CurrentChar == '>') { return(BuildToken(binary_connective,"=>")); } else { Stream->Overshot = 1; return(BuildToken(lower_word,"=")); } break; case '~': CurrentChar = NextCharacter(Stream); if (CurrentChar == '|') { return(BuildToken(binary_connective,"~|")); } else if (CurrentChar == '&') { return(BuildToken(binary_connective,"~&")); } else { Stream->Overshot = 1; return(BuildToken(unary_connective,"~")); } break; case '+': CurrentChar = NextCharacter(Stream); if (CurrentChar == '+') { return(BuildToken(unary_connective,"++")); } else if (NumberToken(Stream,'+',CurrentChar,LocalValue)) { return(BuildToken(number,LocalValue)); } else { Stream->Overshot = 1; return(BuildToken(binary_connective,"+")); } break; case '-': CurrentChar = NextCharacter(Stream); if (CurrentChar == '-') { CurrentChar = NextCharacter(Stream); if (CurrentChar == '>') { return(BuildToken(binary_connective,"-->")); } else { Stream->Overshot = 1; return(BuildToken(unary_connective,"--")); } //----Code copied from below for numbers } else if (NumberToken(Stream,'-',CurrentChar,LocalValue)) { return(BuildToken(number,LocalValue)); } else { Stream->Overshot = 1; return(BuildToken(punctuation,"-")); } break; case '"': return(QuotedToken(Stream,'"',distinct_object)); break; case '|': return(BuildToken(binary_connective,"|")); break; case '&': return(BuildToken(binary_connective,"&")); break; case '@': CurrentChar = NextCharacter(Stream); if (CurrentChar == '+') { return(BuildToken(quantifier,"@+")); } else if (CurrentChar == '-') { return(BuildToken(quantifier,"@-")); } else { Stream->Overshot = 1; return(BuildToken(binary_connective,"@")); } break; case '>': return(BuildToken(binary_connective,">")); break; case '*': return(BuildToken(binary_connective,"*")); break; case EOF: return(BuildToken(endeof,"")); break; default: Index = 0; if (CurrentChar == '$' || islower(CurrentChar)) { do { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } while (isalnum(CurrentChar) || CurrentChar=='_' || //----Allow second $ for system predicates and functions (Index == 1 && CurrentChar == '$' && LocalValue[0] == '$')); LocalValue[Index] = '\0'; Stream->Overshot = 1; //----Ensure $ words have some length Index = 0; while (LocalValue[Index] == '$') { Index++; } if (Index > 0 && !islower(LocalValue[Index])) { CharacterError(Stream); } //----Replace equal by = for now (can remove in future) //----At some point I did comment this out, but it's still needed for //----reformatting old, e.g., EP proofs. // if (!strcmp(LocalValue,"equal")) { // strcpy(LocalValue,"="); // } return(BuildToken(lower_word,LocalValue)); } else if (isupper(CurrentChar)) { do { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } while (isalnum(CurrentChar) || (CurrentChar=='_')); LocalValue[Index] = '\0'; Stream->Overshot = 1; //----Nasty hack to allow end of file to be specified by user on input stream if (!strcmp(LocalValue,"EOF__")) { return(BuildToken(endeof,"")); } else { return(BuildToken(upper_word,LocalValue)); } //----Numbers } else if (NumberToken(Stream,'\0',CurrentChar,LocalValue)) { return(BuildToken(number,LocalValue)); } else { CharacterError(Stream); } break; } //----Need a default return for the error cases which compiler doesn't get return(NULL); }
//----------------------------------------------------------------------------- int NumberToken(READFILE Stream,char PreviousChar,char CurrentChar, SuperString LocalValue) { int Index; if (isdigit(CurrentChar)) { Index = 0; //----If signed, keep sign if (PreviousChar != '\0') { LocalValue[Index++] = PreviousChar; } do { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } while (isdigit(CurrentChar)); //----Rationals, and reals from SPASS-XDB (what is personal hack) if (CurrentChar == '/' || CurrentChar == '\\') { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); if (CurrentChar == '+' || CurrentChar == '-') { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } //----Check there's something in the denominator if (isdigit(CurrentChar)) { do { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } while (isdigit(CurrentChar)); } else { CharacterError(Stream); } } else { //----Reals if (CurrentChar == '.') { do { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } while (isdigit(CurrentChar)); } if (CurrentChar == 'E' || CurrentChar == 'e') { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); if (CurrentChar == '+' || CurrentChar == '-') { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } if (isdigit(CurrentChar)) { do { LocalValue[Index] = CurrentChar; CurrentChar = NextCharacter(Stream); IncrementTokenIndex(Stream,&Index); } while (isdigit(CurrentChar)); } else { //----Exponent without numbers CharacterError(Stream); } } } LocalValue[Index] = '\0'; Stream->Overshot = 1; return(1); } else { return(0); } }
tokenInfo* getNextToken(FILE *fp) { int state=0,currBuffPos=-1; char cur=0; char buff[MAX_LENGTH]; while(true) { cur = NextCharacter(fp); if(cur==26) { return InitializeToken("$1",TK_DOLLAR); } currBuffPos++; buff[currBuffPos]=cur; switch(state) { case 0: switch(cur) { case '\n': lineNo++; case '\t': case '\r': case ' ': currBuffPos=-1; break; case '<': state=1; break; case '>': state=5; break; case '=': state = 8; break; case '!': state=10; break; case '&': state=12; break; case '@': state=15; break; case '#': state=18; break; case '_': state=20; break; case ']': return InitializeToken("]",TK_SQR); case '[': return InitializeToken("[",TK_SQL); case '/': return InitializeToken("/",TK_DIV); case '-': return InitializeToken("-",TK_MINUS); case '*': return InitializeToken("*",TK_MUL); case '(': return InitializeToken("(",TK_OP); case ')': return InitializeToken(")",TK_CL); case '.': return InitializeToken(".",TK_DOT); case ',': return InitializeToken(",",TK_COMMA); case ':': return InitializeToken(":",TK_COLON); case '+': return InitializeToken("+",TK_PLUS); case ';': return InitializeToken(";",TK_SEM); case '~': return InitializeToken("~",TK_NOT); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': state=23; break; case 'b': case 'c': case 'd': state=26; break; case 'a': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': state=29; break; case '%': state=42; break; default: buff[currBuffPos+1]='\0'; sprintf(errormsg, "ERROR_2: Unknown Symbol <%s> at line number %llu", buff, lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_ERROR); } break; case 1: switch(cur) { case '-': state=2; break; case '=': return InitializeToken("<=",TK_LE); default: bufferPos--; return InitializeToken("<",TK_LT); } break; case 2: if(cur=='-')state=3; else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '-' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_ASSIGNOP); } break; case 3: if(cur=='-')return InitializeToken("<---",TK_ASSIGNOP); else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '-' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_ASSIGNOP); } break; case 5: if(cur=='=')return InitializeToken(">=",TK_GE); //else return InitializeToken("ERROR: = expected",TK_ERROR); else { bufferPos--; return InitializeToken(">",TK_GT); } break; case 8: if(cur=='=')return InitializeToken("==",TK_EQ); else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '=' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_EQ); } break; case 10: if(cur=='=')return InitializeToken("!=",TK_NE); else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '=' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_NE); } break; case 12: if(cur=='&')state=13; else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '&' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_AND); } break; case 13: if(cur=='&')return InitializeToken("&&&",TK_AND); else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '&' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_AND); } break; case 15: if(cur=='@')state=16; else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '@' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_OR); } break; case 16: if(cur=='@')return InitializeToken("@@@",TK_OR); else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected '@' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_OR); } case 18: switch(cur) { case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': state=19; break; default: bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s> ,a-z expected at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_RECORDID); break; } case 19: if(cur<'a'||cur>'z'){ bufferPos--; buff[currBuffPos]='\0'; return InitializeToken(buff,TK_RECORDID); } break; case 20: if((cur>='a'&&cur<='z')||(cur>='A'&&cur<='Z')) state=21; else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected alphabet at line %llu",buff, lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_FUNID); } break; case 21: if((cur>='a'&&cur<='z')||(cur>='A'&&cur<='Z')); else if(cur=='0'||cur=='1'||cur=='2'||cur=='3'||cur=='4'||cur=='5'||cur=='6'||cur=='7'||cur=='8'||cur=='9') state=22; else { bufferPos--; buff[currBuffPos]='\0'; if(strcmp(buff,"_main")==0) return InitializeToken(buff,TK_MAIN); return InitializeToken(buff,TK_FUNID); } break; case 22: if(cur=='0'||cur=='1'||cur=='2'||cur=='3'||cur=='4'||cur=='5'||cur=='6'||cur=='7'||cur=='8'||cur=='9'); else { bufferPos--; buff[currBuffPos]='\0'; return InitializeToken(buff,TK_FUNID); } break; case 23: if(cur=='0'||cur=='1'||cur=='2'||cur=='3'||cur=='4'||cur=='5'||cur=='6'||cur=='7'||cur=='8'||cur=='9'); else if(cur=='.') state=24; else { bufferPos--; buff[currBuffPos]='\0'; return InitializeToken(buff,TK_NUM); } break; case 24: if(cur=='0'||cur=='1'||cur=='2'||cur=='3'||cur=='4'||cur=='5'||cur=='6'||cur=='7'||cur=='8'||cur=='9') state=25; else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s>, expected atleast one digit after '.' at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_RNUM); } break; case 25: if(cur=='0'||cur=='1'||cur=='2'||cur=='3'||cur=='4'||cur=='5'||cur=='6'||cur=='7'||cur=='8'||cur=='9') { buff[currBuffPos+1]='\0'; return InitializeToken(buff,TK_RNUM); } else { bufferPos--; buff[currBuffPos]='\0'; sprintf(errormsg,"ERROR_3: Unknown pattern <%s> at line %llu",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_RNUM); } break; case 26: if(cur>='2'&&cur<='7') state=27; else if(cur>='a'&&cur<='z') state=29; else { bufferPos--; buff[currBuffPos]='\0'; return InitializeToken(buff,TK_FIELDID); } break; case 27: if(currBuffPos==20) { while(cur>='b'&&cur<='d')cur = NextCharacter(fp); while(cur>='2'&&cur<='7')cur = NextCharacter(fp); bufferPos--; buff[20]='\0'; sprintf(errormsg,"ERROR_1: identifier: %s at line %llu is longer than the prescribed length of 20 characters",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_ID); } else if(cur>='2'&&cur<='7') state=28; else if(cur>='b'&&cur<='d'); else { bufferPos--; buff[currBuffPos]='\0'; return InitializeToken(buff,TK_ID); } break; case 28: if(currBuffPos==20) { while(cur>='2'&&cur<='7')cur = NextCharacter(fp); bufferPos--; buff[20]='\0'; sprintf(errormsg,"ERROR_1: identifier: %s at line %llu is longer than the prescribed length of 20 characters",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_ID); } else if(cur>='2'&&cur<='7'); else { bufferPos--; buff[currBuffPos]='\0'; return InitializeToken(buff,TK_ID); } break; case 29: if(currBuffPos==20) { while(cur>='a'&&cur<='z')cur = NextCharacter(fp); bufferPos--; buff[20]='\0'; sprintf(errormsg,"ERROR_1: identifier: %s at line %llu is longer than the prescribed length of 20 characters",buff,lineNo); return InitializeErrorToken(errormsg,TK_ERROR,TK_FIELDID); } else if(cur>='a'&&cur<='z'); else { bufferPos--; buff[currBuffPos]='\0'; if(strcmp(buff,"with")==0) return InitializeToken(buff,TK_WITH); else if(strcmp(buff,"parameters")==0) return InitializeToken(buff,TK_PARAMETERS); else if(strcmp(buff,"end")==0) return InitializeToken(buff,TK_END); else if(strcmp(buff,"while")==0) return InitializeToken(buff,TK_WHILE); else if(strcmp(buff,"int")==0) return InitializeToken(buff,TK_INT); else if(strcmp(buff,"real")==0) return InitializeToken(buff,TK_REAL); else if(strcmp(buff,"type")==0) return InitializeToken(buff,TK_TYPE); else if(strcmp(buff,"global")==0) return InitializeToken(buff,TK_GLOBAL); else if(strcmp(buff,"parameter")==0) return InitializeToken(buff,TK_PARAMETER); else if(strcmp(buff,"list")==0) return InitializeToken(buff,TK_LIST); else if(strcmp(buff,"input")==0) return InitializeToken(buff,TK_INPUT); else if(strcmp(buff,"output")==0) return InitializeToken(buff,TK_OUTPUT); else if(strcmp(buff,"endwhile")==0) return InitializeToken(buff,TK_ENDWHILE); else if(strcmp(buff,"if")==0) return InitializeToken(buff,TK_IF); else if(strcmp(buff,"endif")==0) return InitializeToken(buff,TK_ENDIF); else if(strcmp(buff,"then")==0) return InitializeToken(buff,TK_THEN); else if(strcmp(buff,"read")==0) return InitializeToken(buff,TK_READ); else if(strcmp(buff,"write")==0) return InitializeToken(buff,TK_WRITE); else if(strcmp(buff,"return")==0) return InitializeToken(buff,TK_RETURN); else if(strcmp(buff,"record")==0) return InitializeToken(buff,TK_RECORD); else if(strcmp(buff,"endrecord")==0) return InitializeToken(buff,TK_ENDRECORD); else if(strcmp(buff,"call")==0) return InitializeToken(buff,TK_CALL); else if(strcmp(buff,"else")==0) return InitializeToken(buff,TK_ELSE); else return InitializeToken(buff,TK_FIELDID); } break; case 42: currBuffPos=-1; if(cur=='\n') { lineNo++; state=0; } break; default: printf("WRONG STATE\n"); ; } } }
STextOffset TTextLayout::PointToOffset(const TPoint& point, bool round) const { TCoord h = point.h - fInset.h; if (h < 0) h = 0; TCoord v = point.v - fInset.v; if (v < 0) v = 0; uint32 line; for (line = 0; line < fLineCount; line++) { if (fLineBreaks[line].vertOffset > v) { if (line > 0) line--; break; } } if (line >= fLineCount) line = fLineCount - 1; STextOffset result = LineToOffset(line); const TChar* lineStart; STextOffset lineLength; TCoord vertOffset, ascent, height; GetLine(line, lineStart, lineLength, vertOffset, ascent, height); if (lineLength == 0) return result; const TChar* lineEnd = lineStart + lineLength; const TChar* text = lineStart; const TChar* lastText = text; TCoord horizOffset = 0; TCoord lastHorizOffset = 0; while (text < lineEnd) { NextCharacter(text); TCoord horizOffset = MeasureText(lineStart, text - lineStart, 0); if (horizOffset > h) { if (round && h - lastHorizOffset + 1 >= horizOffset - h) return result + (text - lineStart); else return result + (lastText - lineStart); } lastHorizOffset = horizOffset; lastText = text; } if (round && h - lastHorizOffset + 1 >= horizOffset - h) return result + (text - lineStart); else return result + (lastText - lineStart); }
bool CXMLParser::NextElement(CXMLElement* theElement) { for (;;) { theElement->mType = CXMLElement::TYPE_NONE; theElement->mSection = mSection; theElement->mValue = ""; theElement->mAttributes.clear(); theElement->mInstruction.erase(); bool hasSpace = false; bool inQuote = false; bool gotEndQuote = false; bool doingAttribute = false; bool AttributeVal = false; std::string aAttributeKey; std::string aAttributeValue; std::string aLastAttributeKey; for (;;) { // Process character by character char c = 0; int aVal; //if (mBufferedText.size() > 0) //{ // c = mBufferedText[mBufferedText.size()-1]; // mBufferedText.pop_back(); // aVal = 1; //} //else //{ // if (mFile != NULL) // aVal = fread(&c, 1, 1, mFile); // else // aVal = 0; //} c = NextCharacter( &aVal ); if (aVal == 1) { // If the character was a newline and this character // is a single 'tick', then the entire line is a // comment and we should ignore it... just slurp up // the chatacters until the next newline. // if( mLastCharWasNewline==true ) { if( c=='\'' ) { while( aVal && c!='\n' ) { c = NextCharacter( &aVal ); } } else { mLastCharWasNewline=false; } } bool processChar = false; if (c == '\n') { mLineNum++; mLastCharWasNewline = true; } if (theElement->mType == CXMLElement::TYPE_COMMENT) { // Just add text to theElement->mInstruction until we find --> std::string* aStrPtr = &theElement->mInstruction; *aStrPtr += c; int aLen = aStrPtr->length(); if ((c == '>') && (aLen >= 3) && ((*aStrPtr)[aLen - 2] == '-') && ((*aStrPtr)[aLen - 3] == '-')) { *aStrPtr = aStrPtr->substr(0, aLen - 3); break; } } else if (theElement->mType == CXMLElement::TYPE_INSTRUCTION) { // Just add text to theElement->mInstruction until we find ?> std::string* aStrPtr = &theElement->mValue; if ((theElement->mInstruction.length() != 0) || (::isspace(c))) aStrPtr = &theElement->mInstruction; *aStrPtr += c; int aLen = aStrPtr->length(); if ((c == '>') && (aLen >= 2) && ((*aStrPtr)[aLen - 2] == '?')) { *aStrPtr = aStrPtr->substr(0, aLen - 2); break; } } else { if (c == '"') { inQuote = !inQuote; if (theElement->mType==CXMLElement::TYPE_NONE || theElement->mType==CXMLElement::TYPE_ELEMENT) processChar = true; if (!inQuote) gotEndQuote = true; } else if (!inQuote) { if (c == '<') { if (theElement->mType == CXMLElement::TYPE_ELEMENT) { //TODO: Fix buffered text. Not sure what I meant by that. //OLD: mBufferedText = c + mBufferedText; mBufferedText.push_back(c); break; } if (theElement->mType == CXMLElement::TYPE_NONE) { theElement->mType = CXMLElement::TYPE_START; } else { Fail("Unexpected '<'"); return false; } } else if (c == '>') { if (theElement->mType == CXMLElement::TYPE_START) { bool insertEnd = false; if (aAttributeKey == "/") { // We will get this if we have a space before the />, so we can ignore it // and go about our business now insertEnd = true; } else { // Probably isn't committed yet if (aAttributeKey.length() > 0) { // theElement->mAttributes[aLastAttributeKey] = aAttributeValue; aAttributeKey = XMLDecodeString(aAttributeKey); aAttributeValue = XMLDecodeString(aAttributeValue); aLastAttributeKey = aAttributeKey; AddAttribute(theElement, aLastAttributeKey, aAttributeValue); aAttributeKey = ""; aAttributeValue = ""; } if (aLastAttributeKey.length() > 0) { std::string aVal = theElement->mAttributes[aLastAttributeKey]; int aLen = aVal.length(); if ((aLen > 0) && (aVal[aLen-1] == '/')) { // Its an empty element, fake start and end segments // theElement->mAttributes[aLastAttributeKey] = aVal.substr(0, aLen - 1); AddAttribute(theElement, aLastAttributeKey, XMLDecodeString(aVal.substr(0, aLen - 1))); insertEnd = true; } } else { int aLen = theElement->mValue.length(); if ((aLen > 0) && (theElement->mValue[aLen-1] == '/')) { // Its an empty element, fake start and end segments theElement->mValue = theElement->mValue.substr(0, aLen - 1); insertEnd = true; } } } // Do we want to fake an ending section? if (insertEnd) { std::string anAddString = "</" + theElement->mValue + ">"; int anOldSize = mBufferedText.size(); int anAddLength = anAddString.length(); mBufferedText.resize(anOldSize + anAddLength); for (int i = 0; i < anAddLength; i++) mBufferedText[anOldSize + i] = anAddString[anAddLength - i - 1]; // clear out aAttributeKey, since it contains "/" as its value and will insert // it into the element's attribute map. aAttributeKey = ""; //OLD: mBufferedText = "</" + theElement->mValue + ">" + mBufferedText; } if (mSection.length() != 0) mSection += "/"; mSection += theElement->mValue; break; } else if (theElement->mType == CXMLElement::TYPE_END) { int aLastSlash = mSection.rfind('/'); if ((aLastSlash == -1) && (mSection.length() == 0)) { Fail("Unexpected End"); return false; } std::string aLastSectionName = mSection.substr(aLastSlash + 1); if (aLastSectionName != theElement->mValue) { Fail("End '" + theElement->mValue + "' Doesn't Match Start '" + aLastSectionName + "'"); return false; } if (aLastSlash == -1) mSection.erase(mSection.begin(), mSection.end()); else mSection.erase(mSection.begin() + aLastSlash, mSection.end()); break; } else { Fail("Unexpected '>'"); return false; } } else if ((c == '/') && (theElement->mType == CXMLElement::TYPE_START) && (theElement->mValue == "")) { theElement->mType = CXMLElement::TYPE_END; } else if ((c == '?') && (theElement->mType == CXMLElement::TYPE_START) && (theElement->mValue == "")) { theElement->mType = CXMLElement::TYPE_INSTRUCTION; } else if (::isspace((uchar) c)) { if (theElement->mValue != "") hasSpace = true; // It's a comment! if ((theElement->mType == CXMLElement::TYPE_START) && (theElement->mValue == "!--")) theElement->mType = CXMLElement::TYPE_COMMENT; } else if ((uchar) c > 32) { processChar = true; } else { Fail("Illegal Character"); return false; } } else { processChar = true; } if (processChar) { if (theElement->mType == CXMLElement::TYPE_NONE) theElement->mType = CXMLElement::TYPE_ELEMENT; if (theElement->mType == CXMLElement::TYPE_START) { if (hasSpace) { if ((!doingAttribute) || ((!AttributeVal) && (c != '=')) || ((AttributeVal) && ((aAttributeValue.length() > 0) || gotEndQuote))) { if (doingAttribute) { aAttributeKey = XMLDecodeString(aAttributeKey); aAttributeValue = XMLDecodeString(aAttributeValue); // theElement->mAttributes[aAttributeKey] = aAttributeValue; AddAttribute(theElement, aAttributeKey, aAttributeValue); aAttributeKey = ""; aAttributeValue = ""; aLastAttributeKey = aAttributeKey; } else { doingAttribute = true; } AttributeVal = false; } hasSpace = false; } std::string* aStrPtr = NULL; if (!doingAttribute) { aStrPtr = &theElement->mValue; } else { if (c == '=') { AttributeVal = true; gotEndQuote = false; } else { if (!AttributeVal) aStrPtr = &aAttributeKey; else aStrPtr = &aAttributeValue; } } if (aStrPtr != NULL) { *aStrPtr += c; } } else { if (hasSpace) { theElement->mValue += " "; hasSpace = false; } theElement->mValue += c; } } } } else { if (theElement->mType != CXMLElement::TYPE_NONE) Fail("Unexpected End of File"); return false; } } if (aAttributeKey.length() > 0) { aAttributeKey = XMLDecodeString(aAttributeKey); aAttributeValue = XMLDecodeString(aAttributeValue); // theElement->mAttributes[aAttributeKey] = aAttributeValue; AddAttribute(theElement, aAttributeKey, aAttributeValue); } theElement->mValue = XMLDecodeString(theElement->mValue); // Ignore comments if ((theElement->mType != CXMLElement::TYPE_COMMENT) || mAllowComments) return true; } }
int AddRange(char *e, int *place, CharSetT *cs) { int first, second, input, number1[3], number2[3]; input = NextCharacter(e, *place); first = input; AdvanceParser(e, place); if(input == '\\') { first = NextCharacter(e, *place); AdvanceParser(e, place); } if (input == '#') { number1[0] = NextCharacter(e, *place); AdvanceParser(e, place); number1[1] = NextCharacter(e, *place); AdvanceParser(e, place); number1[2] = NextCharacter(e, *place); AdvanceParser(e, place); if (!isdigit(number1[0]) || !isdigit(number1[1]) || !isdigit(number1[2])) return 0; else first = ((number1[0] - '0') * 100 + (number1[1] - '0') * 10 + number1[2] - '0'); if (first > 255) return 0; } else if (strchr(specialChars, first)) return 0; if (NextCharacter(e, *place) != '-') return 0; AdvanceParser(e, place); input = NextCharacter(e, *place); AdvanceParser(e, place); second = input; if(input == '\\') { second = NextCharacter(e, *place); AdvanceParser(e, place); } else if (input == '#') { number2[0] = NextCharacter(e, *place); AdvanceParser(e, place); number2[1] = NextCharacter(e, *place); AdvanceParser(e, place); number2[2] = NextCharacter(e, *place); AdvanceParser(e, place); if (!isdigit(number2[0]) || !isdigit(number2[1]) || !isdigit(number2[2])) return 0; else second = ((number2[0] - '0') * 100 + (number2[1] - '0') * 10 + number2[2] - '0'); if (second > 255) return 0; } else if (strchr(specialChars, second)) return 0; if (first >= second) return 0; AddCharacterRange(first, second, cs); return 1; }
RegExpressionT *S(char *e, int *place) { int input, number[3]; RegExpressionT *ex; CharSetT *cs; input = NextCharacter(e, *place); switch(input) { case '{': cs = CreateCharSet(); while(1) { AdvanceParser(e, place); input = NextCharacter(e, *place); if (input == '}') break; switch(input) { case 's': AddCharacter(' ', cs); break; case 'n': AddCharacter('\n', cs); break; case 'r': AddCharacter('\r', cs); break; case 't': AddCharacter('\t', cs); break; case 'f': AddCharacter('\f', cs); break; default: #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif free(cs); return NULL; } } AdvanceParser(e, place); ex = NewRegularExpression(); ex->type = CHARSET; ex->charset = cs; return ex; case '$': AdvanceParser(e, place); ex = NewRegularExpression(); ex->type = CHARSET; ex->charset = BuildPrintableCharSet(); return ex; case '.': AdvanceParser(e, place); ex = NewRegularExpression(); ex->type = ANYCHAR; return ex; case '&': AdvanceParser(e, place); ex = NewRegularExpression(); ex->type = EMPTY; return ex; case '\\': AdvanceParser(e, place); input = NextCharacter(e, *place); AdvanceParser(e, place); ex = NewRegularExpression(); ex->type = ATOM; ex->value = input; return ex; case '#': AdvanceParser(e, place); number[0] = NextCharacter(e, *place); AdvanceParser(e, place); number[1] = NextCharacter(e, *place); AdvanceParser(e, place); number[2] = NextCharacter(e, *place); AdvanceParser(e, place); if (!isdigit(number[0]) || !isdigit(number[1]) || !isdigit(number[2])) { #if ERR_MESG fprintf(stderr, "Specified character value not a number.\n"); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; } input = (((number[0] - '0') * 100) + ((number[1] - '0') * 10) + number[2] - '0'); if (input >= 256) { #if ERR_MESG fprintf(stderr, "Invalid character value: %d.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; } ex = NewRegularExpression(); ex->type = ATOM; ex->value = input; return ex; default: if (strchr(specialChars, input)) { #if ERR_MESG fprintf(stderr, "Bad regexp: character %c.\n", input); fprintf(stderr, "%s == at %d\n", e, *place); #endif return NULL; } AdvanceParser(e, place); ex = NewRegularExpression(); ex->type = ATOM; ex->value = input; return ex; } return NULL; }