static void eatComment (lexingState * st) { boolean unfinished = TRUE; boolean lastIsStar = FALSE; const unsigned char *c = st->cp + 2; while (unfinished) { /* we've reached the end of the line.. * so we have to reload a line... */ if (c == NULL || *c == '\0') { st->cp = fileReadLine (); /* WOOPS... no more input... * we return, next lexing read * will be null and ok */ if (st->cp == NULL) return; c = st->cp; } /* we've reached the end of the comment */ else if (*c == ')' && lastIsStar) unfinished = FALSE; /* here we deal with imbricated comment, which * are allowed in OCaml */ else if (c[0] == '(' && c[1] == '*') { st->cp = c; eatComment (st); c = st->cp; if (c == NULL) return; lastIsStar = FALSE; c++; } /* OCaml has a rule which says : * * "Comments do not occur inside string or character literals. * Nested comments are handled correctly." * * So if we encounter a string beginning, we must parse it to * get a good comment nesting (bug ID: 3117537) */ else if (*c == '"') { st->cp = c; eatString (st); c = st->cp; } else { lastIsStar = '*' == *c; c++; } } st->cp = c; }
static Value eatArray(std::istream& stream) { Value obj(Type::arrayValue); while (!stream.eof()) { ltrim(stream); if (stream.peek() == ']') { stream.get(); break; } obj.push_back(eatValue(stream)); ltrim(stream); char token = stream.get(); if (token == '/') { eatComment(stream); token = stream.get(); } if (token == ']') break; assert(token == ','); } return obj; };
/* findNext: * Return next XML unit. This is either <..>, an HTML * comment <!-- ... -->, or characters up to next <. */ static char *findNext(char *s, agxbuf* xb) { char* t = s + 1; char c; int rc; if (*s == '<') { if ((*t == '!') && !strncmp(t + 1, "--", 2)) t = eatComment(t + 3); else while (*t && (*t != '>')) t++; if (*t != '>') { agerr(AGWARN, "Label closed before end of HTML element\n"); state.warn = 1; } else t++; } else { rc = agxbputc(xb, *s); while ((c = *t) && (c != '<')) { if ((c == '&') && (*(t+1) != '#')) { t = scanEntity(t + 1, xb); } else { rc = agxbputc(xb, c); t++; } } } return t; }
static Value eatObject(std::istream& stream) { Value obj(Type::objectValue); while (!stream.eof()) { ltrim(stream); char token = stream.get(); if (token == '}') break; //empty object if (token == '/') { eatComment(stream); token = stream.get(); } assert(token == '"'); Value key = eatString(stream); ltrim(stream); token = stream.get(); assert(token == ':'); ltrim(stream); Value val = eatValue(stream); obj[key.asString()] = val; ltrim(stream); token = stream.get(); if (token == '}') break; if (token != ',') throw "arg"; assert(token == ','); } return obj; };
// // getToken() - this function gets a whitespace delineated token out // of the input stream and returns it in the provided buffer 'buf' // // 'buf' is null terminated // // it returns the number of characters in 'buf', or 0 if there is // a failure // // failures include: // hitting end-of-file before a non-whitespace character // hitting a comment and never getting out of it // running out of space provided in 'buf' // static int getToken( FILE* stream, char* buf, int buflen ) { // remove white space // read non-white space chars until a white space int ch = fgetc( stream ); int i = 0; // read until we hit a non whitespace char for ( ;; ) { // if white space, keep reading if ( isspace( ch ) ) ch = fgetc( stream ); // did we run out of file? else if ( ch == EOF ) return 0; // is this the beginning of a comment? else if ( ch == '#' ) { // read till end of comment if ( !eatComment( stream ) ) return 0; ch = fgetc( stream ); } // we got a non-white space, non-comment character else break; } // okay, now we are at the start of the token while( ch != EOF && !isspace( ch ) ) { buf[i++] = (char)ch; if ( i >= buflen-1 ) // out of buffer return 0; ch = fgetc( stream ); } // null terminate the string buf[i] = 0; return i; }
std::string Toker::getTok() { std::string tok = ""; stripWhiteSpace(); if(isSingleCharTok(nextChar)) { getChar(); tok.append(1,currChar); return tok; } do { if(isFileEnd()) return tok; getChar(); if(isBeginComment()) { if(tok.length() > 0) { this->putback(currChar); return tok; } tok = eatComment(); if(doReturnComments) return tok; else { tok = ""; continue; } } if(isBeginQuote()) { if(tok.length() > 0) { this->putback(currChar); return tok; } tok = eatQuote(); return tok; } std::locale loc; if(!isspace(currChar, loc)) tok.append(1,currChar); } while(!isTokEnd() || tok.length() == 0); return tok; }
static int Parse (FILE * fp, int (*sfunc) (char *), int (*pfunc) (char * option, char * value, int num_flags, char** flags)) { int c; c = eatWhitespace (fp); while ((c != EOF) && (c > 0)) { switch (c) { case '\n': /* blank line */ { c = eatWhitespace (fp); break; } case ';': /* comment line */ case '#': { c = eatComment (fp); break; } case '[': /* section header */ { if (Section (fp, sfunc) < 0) { return (-1); } c = eatWhitespace (fp); break; } default: /* parameter line */ { if (Parameter (fp, pfunc, c) < 0) return (-1); c = eatWhitespace (fp); } } } return 0; }
static Value eatValue(std::istream& stream) { ltrim(stream); char token = stream.get(); if (token == '{') return eatObject(stream); if (token == '[') return eatArray(stream); if ((token >= '0' && token <= '9') || token == '.' || token == '-') return eatNumeric(stream, token); if (token == '"') return eatString(stream); if (token == 't' || token == 'f') return eatBool(stream); if (token == 'n') return eatNull(stream); if (token == '/') { eatComment(stream); return eatValue(stream); } throw "Unable to parse json"; };
/* The lexer is in charge of reading the file. * Some of sub-lexer (like eatComment) also read file. * lexing is finished when the lexer return Tok_EOF */ static objcKeyword lex (lexingState * st) { int retType; /* handling data input here */ while (st->cp == NULL || st->cp[0] == '\0') { st->cp = readLineFromInputFile (); if (st->cp == NULL) return Tok_EOF; return Tok_EOL; } if (isAlpha (*st->cp) || (*st->cp == '_')) { readIdentifier (st); retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC); if (retType == -1) /* If it's not a keyword */ { return ObjcIDENTIFIER; } else { return retType; } } else if (*st->cp == '@') { readIdentifierObjcDirective (st); retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC); if (retType == -1) /* If it's not a keyword */ { return Tok_any; } else { return retType; } } else if (isSpace (*st->cp)) { eatWhiteSpace (st); return lex (st); } else switch (*st->cp) { case '(': st->cp++; return Tok_PARL; case '\\': st->cp++; return Tok_Backslash; case '#': st->cp++; return Tok_Sharp; case '/': if (st->cp[1] == '*') /* ergl, a comment */ { eatComment (st); return lex (st); } else if (st->cp[1] == '/') { st->cp = NULL; return lex (st); } else { st->cp++; return Tok_any; } break; case ')': st->cp++; return Tok_PARR; case '{': st->cp++; return Tok_CurlL; case '}': st->cp++; return Tok_CurlR; case '[': st->cp++; return Tok_SQUAREL; case ']': st->cp++; return Tok_SQUARER; case ',': st->cp++; return Tok_COMA; case ';': st->cp++; return Tok_semi; case ':': st->cp++; return Tok_dpoint; case '"': eatString (st); return Tok_any; case '+': st->cp++; return Tok_PLUS; case '-': st->cp++; return Tok_MINUS; case '*': st->cp++; return Tok_Asterisk; case '<': st->cp++; return Tok_ANGLEL; case '>': st->cp++; return Tok_ANGLER; default: st->cp++; break; } /* default return if nothing is recognized, * shouldn't happen, but at least, it will * be handled without destroying the parsing. */ return Tok_any; }
Token Lexer::getNextToken() { // Eat whitespace tokenString = ""; while (isspace(currentChar) && currentChar != '\n' && currentChar != EOF) { getChar(); } if (currentChar == '\n') { line++; column = 0; getChar(); return NEW_LINE; } if (currentChar == '\'') { eatComment(); return NEW_LINE; } // Match operators if (currentChar == '+') { getChar(); return PLUS; } if (currentChar == '-') { getChar(); return MINUS; } if (currentChar == '*') { getChar(); return MULTIPLY; } if (currentChar == '/') { getChar(); return DIVIDE; } if (currentChar == '(') { getChar(); return BRACKET_OPEN; } if (currentChar == ')') { getChar(); return BRACKET_CLOSE; } if (currentChar == '=') { getChar(); return EQUALS; } if (currentChar == ',') { getChar(); return COMMA; } if (currentChar == '%') { getChar(); return PERCENT; } if (currentChar == '&') { getChar(); return AMPERSAND; } if (currentChar == '!') { getChar(); return EXCLAMATION; } if (currentChar == '#') { getChar(); return HASH; } if (currentChar == '$') { getChar(); return DOLLAR; } if (currentChar == EOF) { getChar(); return END_OF_FILE; } // Match not equals and less than if (currentChar == '<') { getChar(); if (currentChar == '>') { getChar(); return NOT_EQUALS; } else if (currentChar == '=') { getChar(); return LESS_THAN_OR_EQUALS; } else { return LESS_THAN; } } // Match greater thans if (currentChar == '>') { getChar(); if (currentChar == '=') { getChar(); return GREATER_THAN_OR_EQUALS; } else { return GREATER_THAN; } } // Match string literals if (currentChar == '"') { stringValue = ""; while (true) { getChar(); if (currentChar == '"') { getChar(); if (currentChar != '"') break; } stringValue += currentChar; } return STRING; } // Match numbers if (isdigit(currentChar)) { bool dotSeen = false; std::string numberString; do { if (currentChar == '.') { if (dotSeen) break; else dotSeen = true; } numberString += currentChar; getChar(); } while (isdigit(currentChar) || currentChar == '.'); tokenString = numberString; if (dotSeen) { singleValue = strtod(numberString.c_str(), NULL); return SINGLE; } else { integerValue = strtol(numberString.c_str(), NULL, 10); return INTEGER; } } // Match identifiers and keywords if (isalpha(currentChar)) { std::string identString; do { identString += tolower(currentChar); getChar(); } while (isalnum(currentChar) || currentChar == '_'); identifierValue = identString; tokenString = identString; if (identString == "dim") return DIM; else if (identString == "as") return AS; else if (identString == "print") return PRINT; else if (identString == "if") return IF; else if (identString == "then") return THEN; else if (identString == "else") return ELSE; else if (identString == "elseif") return ELSE_IF; else if (identString == "end") return END; else if (identString == "exit") return EXIT; else if (identString == "select") return SELECT; else if (identString == "case") return CASE; else if (identString == "for") return FOR; else if (identString == "step") return STEP; else if (identString == "next") return NEXT; else if (identString == "continue") return CONTINUE; else if (identString == "to") return TO; else if (identString == "while") return WHILE; else if (identString == "wend") return WEND; else if (identString == "do") return DO; else if (identString == "loop") return LOOP; else if (identString == "until") return UNTIL; else if (identString == "function") return FUNCTION; else if (identString == "sub") return SUB; else if (identString == "declare") return DECLARE; else if (identString == "return") return RETURN; else if (identString == "rem") { eatComment(); return NEW_LINE; } else return IDENTIFIER; } // Return an unknown token for whatever else is there tokenString = currentChar; return UNKNOWN; }
/* The lexer is in charge of reading the file. * Some of sub-lexer (like eatComment) also read file. * lexing is finished when the lexer return Tok_EOF */ static ocamlKeyword lex (lexingState * st) { int retType; /* handling data input here */ while (st->cp == NULL || st->cp[0] == '\0') { st->cp = fileReadLine (); if (st->cp == NULL) return Tok_EOF; } if (isAlpha (*st->cp)) { readIdentifier (st); retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml); if (retType == -1) /* If it's not a keyword */ { return OcaIDENTIFIER; } else { return retType; } } else if (isNum (*st->cp)) return eatNumber (st); else if (isSpace (*st->cp)) { eatWhiteSpace (st); return lex (st); } /* OCaml permit the definition of our own operators * so here we check all the consecuting chars which * are operators to discard them. */ else if (isOperator[*st->cp]) return eatOperator (st); else switch (*st->cp) { case '(': if (st->cp[1] == '*') /* ergl, a comment */ { eatComment (st); return lex (st); } else { st->cp++; return Tok_PARL; } case ')': st->cp++; return Tok_PARR; case '[': st->cp++; return Tok_BRL; case ']': st->cp++; return Tok_BRR; case '{': st->cp++; return Tok_CurlL; case '}': st->cp++; return Tok_CurlR; case '\'': st->cp++; return Tok_Prime; case ',': st->cp++; return Tok_comma; case '=': st->cp++; return Tok_EQ; case ';': st->cp++; return Tok_semi; case '"': eatString (st); return Tok_Val; case '_': st->cp++; return Tok_Val; case '#': st->cp++; return Tok_Sharp; case '\\': st->cp++; return Tok_Backslash; default: st->cp++; break; } /* default return if nothing is recognized, * shouldn't happen, but at least, it will * be handled without destroying the parsing. */ return Tok_Val; }