int daplex(YYSTYPE* lvalp, DAPparsestate* state) { DAPlexstate* lexstate = state->lexstate; int token; int c; unsigned int i; char* p; char* tmp; YYSTYPE lval = NULL; token = 0; ocbytesclear(lexstate->yytext); /* invariant: p always points to current char */ for(p=lexstate->next; token==0&&(c=*p); p++) { if(c == '\n') { lexstate->lineno++; } else if(c <= ' ' || c == '\177') { /* whitespace: ignore */ } else if(c == '#') { /* single line comment */ while((c=*(++p))) { if(c == '\n') break; } } else if(strchr(lexstate->worddelims,c) != NULL) { /* don't put in lexstate->yytext to avoid memory leak */ token = c; } else if(c == '"') { int more = 1; /* We have a string token; will be reported as WORD_STRING */ while(more && (c=*(++p))) { if(c == '"') { more = 0; continue; } #ifdef DAP2ENCODE if(c == '\\') { /* Resolve spec ambiguity about handling of \c: 1. !KEEPSLASH: convert \c to c for any character c 2. KEEPSLASH: convert \c to \c for any character c; that is, keep the backslash. It is clear that the problem being addressed was \". But it is unclear what to to do about \n: convert to Ascii LF or leave as \n. This code will leave as \n and assume higher levels of code will address the issue. */ #ifdef KEEPSLASH dapaddyytext(lexstate,c); #endif c=*(++p); if(c == '\0') more = 0; } #else /*Non-standard*/ switch (c) { case '\\': c=*(++p); switch (c) { case 'r': c = '\r'; break; case 'n': c = '\n'; break; case 'f': c = '\f'; break; case 't': c = '\t'; break; case 'x': { int d1,d2; c = '?'; ++p; d1 = tohex(*p++); if(d1 < 0) { daperror(state,"Illegal \\xDD in TOKEN_STRING"); } else { d2 = tohex(*p++); if(d2 < 0) { daperror(state,"Illegal \\xDD in TOKEN_STRING"); } else { c=(((unsigned int)d1)<<4) | (unsigned int)d2; } } } break; default: break; } break; default: break; } #endif /*!DAP2ENCODE*/ if(more) dapaddyytext(lexstate,c); } token=WORD_STRING; } else if(strchr(lexstate->wordchars1,c) != NULL) { int isdatamark = 0; /* we have a WORD_WORD */ dapaddyytext(lexstate,c); while((c=*(++p))) { #ifdef URLCVT if(c == '%' && p[1] != 0 && p[2] != 0 && strchr(hexdigits,p[1]) != NULL && strchr(hexdigits,p[2]) != NULL) { int d1,d2; d1 = tohex(p[1]); d2 = tohex(p[2]); if(d1 >= 0 || d2 >= 0) { c=(((unsigned int)d1)<<4) | (unsigned int)d2; p+=2; } } else { if(strchr(lexstate->wordcharsn,c) == NULL) { p--; break; } } dapaddyytext(lexstate,c); #else if(strchr(lexstate->wordcharsn,c) == NULL) { p--; break; } dapaddyytext(lexstate,c); #endif } /* Special check for Data: */ tmp = ocbytescontents(lexstate->yytext); if(strcmp(tmp,"Data")==0 && *p == ':') { dapaddyytext(lexstate,*p); p++; if(p[0] == '\n') { token = SCAN_DATA; isdatamark = 1; p++; } else if(p[0] == '\r' && p[1] == '\n') { token = SCAN_DATA; isdatamark = 1; p+=2; } } if(!isdatamark) { /* check for keyword */ token=WORD_WORD; /* assume */ for(i=0;; i++) { if(keywords[i] == NULL) break; if(strcasecmp(keywords[i],tmp)==0) { token=keytokens[i]; break; } } } } else { /* illegal */ } } lexstate->next = p; strncpy(lexstate->lasttokentext,ocbytescontents(lexstate->yytext),MAX_TOKEN_LENGTH); lexstate->lasttoken = token; if(ocdebug >= 2) dumptoken(lexstate); /*Put return value onto Bison stack*/ if(ocbyteslength(lexstate->yytext) == 0) lval = NULL; else { lval = ocbytesdup(lexstate->yytext); oclistpush(lexstate->reclaim,(void*)lval); } if(lvalp) *lvalp = lval; return token; /* Return the type of the token. */ }
int daplex(YYSTYPE* lvalp, DAPparsestate* state) { DAPlexstate* lexstate = state->lexstate; int token; int c; unsigned int i; char* p=lexstate->next; char* tmp; token = 0; ocbytesclear(lexstate->yytext); /* invariant: p always points to current char */ for(p=lexstate->next;token==0&&(c=*p);p++) { if(c == '\n') { lexstate->lineno++; } else if(c <= ' ' || c == '\177') { /* whitespace: ignore */ } else if(c == '#') { /* single line comment */ while((c=*(++p))) {if(c == '\n') break;} } else if(strchr(lexstate->worddelims,c) != NULL) { /* don't put in lexstate->yytext to avoid memory leak */ token = c; } else if(c == '"') { int more = 1; /* We have a string token; will be reported as SCAN_WORD */ while(more && (c=*(++p))) { #ifdef NONSTDCVT switch (c) { case '"': more=0; break; case '\\': c=*(++p); switch (c) { case 'r': c = '\r'; break; case 'n': c = '\n'; break; case 'f': c = '\f'; break; case 't': c = '\t'; break; case 'x': { int d1,d2; c = '?'; ++p; d1 = tohex(*p++); if(d1 < 0) { daperror(state,"Illegal \\xDD in TOKEN_STRING"); } else { d2 = tohex(*p++); if(d2 < 0) { daperror(state,"Illegal \\xDD in TOKEN_STRING"); } else { c=(((unsigned int)d1)<<4) | (unsigned int)d2; } } } break; default: break; } break; default: break; } #else /*!NONSTDCVT*/ if(c == '"') more = 0; else if(c == '\\') { c=*(++p); if(c == '\0') more = false; if(c != '"') {c = '\\'; --p;} } #endif /*!NONSTDCVT*/ if(more) dapaddyytext(lexstate,c); } token=SCAN_WORD; } else if(strchr(lexstate->wordchars1,c) != NULL) { /* we have a SCAN_WORD */ dapaddyytext(lexstate,c); while((c=*(++p))) { #ifdef URLCVT if(c == '%' && p[1] != 0 && p[2] != 0 && strchr(hexdigits,p[1]) != NULL && strchr(hexdigits,p[2]) != NULL) { #ifdef WRONG /* Should not unescape %xx occurrences */ int d1,d2; d1 = tohex(p[1]); d2 = tohex(p[2]); if(d1 >= 0 || d2 >= 0) { c=(((unsigned int)d1)<<4) | (unsigned int)d2; p+=2; } #endif } else { if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;} } dapaddyytext(lexstate,c); #else if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;} dapaddyytext(lexstate,c); #endif } /* Special check for Data: */ tmp = ocbytescontents(lexstate->yytext); if(strcmp(tmp,"Data")==0 && *p == ':') { dapaddyytext(lexstate,*p); p++; token = SCAN_DATA; } else { /* check for keyword */ token=SCAN_WORD; /* assume */ for(i=0;;i++) { if(keywords[i] == NULL) break; if(strcasecmp(keywords[i],tmp)==0) { token=keytokens[i]; break; } } } } else { /* illegal */ } } lexstate->next = p; strncpy(lexstate->lasttokentext,ocbytescontents(lexstate->yytext),MAX_TOKEN_LENGTH); lexstate->lasttoken = token; if(ocdebug >= 2) dumptoken(lexstate); /*Put return value onto Bison stack*/ if(ocbyteslength(lexstate->yytext) == 0) *lvalp = NULL; else { *lvalp = ocbytesdup(lexstate->yytext); oclistpush(lexstate->reclaim,(ocelem)*lvalp); } return token; /* Return the type of the token. */ }