/* parses declarations of the form * $var = VALUE * $var; */ static boolean parseVariable (tokenInfo *const token) { tokenInfo *name; boolean readNext = TRUE; accessType access = CurrentStatement.access; name = newToken (); copyToken (name, token, TRUE); readToken (token); if (token->type == TOKEN_EQUAL_SIGN) { phpKind kind = K_VARIABLE; if (token->parentKind == K_FUNCTION) kind = K_LOCAL_VARIABLE; readToken (token); if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_function && PhpKinds[kind].enabled) { if (parseFunction (token, name)) readToken (token); readNext = (boolean) (token->type == TOKEN_SEMICOLON); } else { makeSimplePhpTag (name, kind, access); readNext = FALSE; } } else if (token->type == TOKEN_SEMICOLON) { /* generate tags for variable declarations in classes * class Foo { * protected $foo; * } * but don't get fooled by stuff like $foo = $bar; */ if (token->parentKind == K_CLASS || token->parentKind == K_INTERFACE || token->parentKind == K_TRAIT) makeSimplePhpTag (name, K_VARIABLE, access); } else readNext = FALSE; deleteToken (name); return readNext; }
static boolean parseType (tokenInfo *const token) { tokenInfo* const id = newToken (); copyToken (id, token); readToken (token); if (isType (token, TOKEN_COLON)) /* check for "{entity: TYPE}" */ { readToken (id); readToken (token); } if (isKeyword (id, KEYWORD_like)) { if (isType (token, TOKEN_IDENTIFIER) || isKeyword (token, KEYWORD_Current)) readToken (token); } else { if (isKeyword (id, KEYWORD_attached) || isKeyword (id, KEYWORD_detachable) || isKeyword (id, KEYWORD_expanded)) { copyToken (id, token); readToken (token); } if (isType (id, TOKEN_IDENTIFIER)) { if (isType (token, TOKEN_OPEN_BRACKET)) parseGeneric (token, FALSE); else if ((strcmp ("BIT", vStringValue (id->string)) == 0)) readToken (token); /* read token after number of bits */ } } deleteToken (id); return TRUE; }
static void parseFunctionOrMethod (tokenInfo *const token) { // FunctionDecl = "func" identifier Signature [ Body ] . // Body = Block. // // MethodDecl = "func" Receiver MethodName Signature [ Body ] . // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . // BaseTypeName = identifier . // Skip over receiver. readToken (token); if (isType (token, TOKEN_OPEN_PAREN)) skipToMatched (token); if (isType (token, TOKEN_IDENTIFIER)) { tokenInfo *functionToken = copyToken (token); // Start recording signature signature = vStringNew (); // Skip over parameters. readToken (token); skipToMatchedNoRead (token); vStringStripLeading (signature); vStringStripTrailing (signature); makeTag (functionToken, GOTAG_FUNCTION, NULL, GOTAG_UNDEFINED, signature->buffer); deleteToken (functionToken); vStringDelete(signature); // Stop recording signature signature = NULL; readToken (token); // Skip over result. skipType (token); // Skip over function body. if (isType (token, TOKEN_OPEN_CURLY)) skipToMatched (token); } }
/* parses declarations of the form * const NAME = VALUE */ static boolean parseConstant (tokenInfo *const token) { tokenInfo *name; readToken (token); /* skip const keyword */ if (token->type != TOKEN_IDENTIFIER && token->type != TOKEN_KEYWORD) return FALSE; name = newToken (); copyToken (name, token, TRUE); readToken (token); if (token->type == TOKEN_EQUAL_SIGN) makeSimplePhpTag (name, K_DEFINE, ACCESS_UNDEFINED); deleteToken (name); return token->type == TOKEN_EQUAL_SIGN; }
/* parses namespace declarations * namespace Foo {} * namespace Foo\Bar {} * namespace Foo; * namespace Foo\Bar; * namespace; * napespace {} */ static boolean parseNamespace (tokenInfo *const token) { tokenInfo *nsToken = newToken (); vStringClear (CurrentNamesapce); copyToken (nsToken, token, FALSE); do { readToken (token); if (token->type == TOKEN_IDENTIFIER) { if (vStringLength (CurrentNamesapce) > 0) { const char *sep; sep = phpScopeSeparatorFor(K_NAMESPACE, K_NAMESPACE); vStringCatS (CurrentNamesapce, sep); } vStringCat (CurrentNamesapce, token->string); } } while (token->type != TOKEN_EOF && token->type != TOKEN_SEMICOLON && token->type != TOKEN_OPEN_CURLY); vStringTerminate (CurrentNamesapce); if (vStringLength (CurrentNamesapce) > 0) makeNamespacePhpTag (nsToken, CurrentNamesapce); if (token->type == TOKEN_OPEN_CURLY) enterScope (token, NULL, -1); deleteToken (nsToken); return TRUE; }
/* parses a trait: * trait Foo {} */ static boolean parseTrait (tokenInfo *const token) { boolean readNext = TRUE; tokenInfo *name; readToken (token); if (token->type != TOKEN_IDENTIFIER) return FALSE; name = newToken (); copyToken (name, token, TRUE); makeSimplePhpTag (name, K_TRAIT, ACCESS_UNDEFINED); readToken (token); if (token->type == TOKEN_OPEN_CURLY) enterScope (token, name->string, K_TRAIT); else readNext = FALSE; deleteToken (name); return readNext; }
static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent) { boolean is_class = FALSE; boolean read_next_token = TRUE; vString * saveScope = vStringNew (); tokenInfo *const parent = newToken (); /* backup the parent token to allow calls like parseBlock(token, token) */ copyToken (parent, orig_parent); token->nestLevel++; /* * Make this routine a bit more forgiving. * If called on an open_curly advance it */ if ( isType (token, TOKEN_OPEN_CURLY) && isKeyword(token, KEYWORD_NONE) ) readToken(token); if (! isType (token, TOKEN_CLOSE_CURLY)) { /* * Read until we find the closing brace, * any nested braces will be handled within */ do { read_next_token = TRUE; if (isKeyword (token, KEYWORD_this)) { /* * Means we are inside a class and have found * a class, not a function */ is_class = TRUE; vStringCopy(saveScope, token->scope); addToScope (token, parent->string); /* * Ignore the remainder of the line * findCmdTerm(token); */ read_next_token = parseLine (token, parent, is_class); vStringCopy(token->scope, saveScope); } else if (isKeyword (token, KEYWORD_var) || isKeyword (token, KEYWORD_let) || isKeyword (token, KEYWORD_const)) { /* * Potentially we have found an inner function. * Set something to indicate the scope */ vStringCopy(saveScope, token->scope); addToScope (token, parent->string); read_next_token = parseLine (token, parent, is_class); vStringCopy(token->scope, saveScope); } else if (isKeyword (token, KEYWORD_function)) { vStringCopy(saveScope, token->scope); addToScope (token, parent->string); parseFunction (token); vStringCopy(token->scope, saveScope); } else if (isType (token, TOKEN_OPEN_CURLY)) { /* Handle nested blocks */ parseBlock (token, parent); } else { /* * It is possible for a line to have no terminator * if the following line is a closing brace. * parseLine will detect this case and indicate * whether we should read an additional token. */ read_next_token = parseLine (token, parent, is_class); } /* * Always read a new token unless we find a statement without * a ending terminator */ if( read_next_token ) readToken(token); /* * If we find a statement without a terminator consider the * block finished, otherwise the stack will be off by one. */ } while (! isType (token, TOKEN_EOF) && ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token); } deleteToken (parent); vStringDelete(saveScope); token->nestLevel--; return is_class; }
/** * Enter: * src => English text * return null if errors, else phoneme string */ static void text2Phonemes(const char * src){ // int outIndex = 0;// Current offset into phonemes int inIndex = -1; // Starts at -1 so that a leading space is assumed while(inIndex==-1 || src[inIndex]){ // until end of text int maxMatch=0; // Max chars matched on input text // int numOut=0; // Number of characters copied to output stream for the best match int maxWildcardPos = 0; // Start with first vocab entry const char* vocabEntry = s_vocab; // Keep track of best match so far const char* bestEntry = null; int bestWildCardInPos=0; char bestWildCard=0; boolean bestHasWhiteSpace=FALSE; int wildcardInPos; // Get next phoneme, P2 while(getVocab(vocabEntry,0)){ int y; char wildcard=0; // The wildcard character boolean hasWhiteSpace=FALSE; wildcardInPos=0; // The index in the vocab where it occurs for(y=0;;y++){ char nextCharIn,nextVocabChar; // Get next char from user input // Make next char upper case and remove control characters nextCharIn = (y + inIndex == -1) ? ' ' : src[y + inIndex]; if(nextCharIn>='a' && nextCharIn<='z'){ nextCharIn = nextCharIn - 'a' + 'A'; }else if(nextCharIn<' '){ nextCharIn = ' '; } // Get next text char from vocab nextVocabChar = getVocab(vocabEntry,y); if( (nextVocabChar & 0x80)){ nextVocabChar = 0; } // If its a wildcard then save its value and position if(nextVocabChar=='#' && nextCharIn >= 'A' && nextCharIn <= 'Z'){ wildcard = nextCharIn; // The character equivalent to the '#' wildcardInPos=y; continue; } // Check if vocab is looking for end of word if(nextVocabChar=='_'){ // try to match against a white space hasWhiteSpace=TRUE; if(whitespace(nextCharIn)){ continue; } y--; break; } // check for end of either string if(nextVocabChar==0 || nextCharIn==0){ break; } if(nextVocabChar != nextCharIn){ break; } } // See if its the longest complete match so far if(y > maxMatch && ( getVocab(vocabEntry,y) & 0x80) == 0x80){ // This is the longest complete match maxMatch = y; maxWildcardPos = 0; // Point to the start of the phoneme bestEntry = vocabEntry + y; bestWildCardInPos = wildcardInPos; bestWildCard = wildcard; bestHasWhiteSpace = hasWhiteSpace; } // Move to start of next entry while(getVocab(vocabEntry,y++)); // Move to end of phoneme asciiz vocabEntry += y; }// check next phoneme // 15 - end of vocab table //16 if(bestHasWhiteSpace==TRUE){ maxMatch--; } //17 if(maxMatch==0){ loggerP(PSTR("No token for ")); logger(&src[inIndex]); loggerCRLF(); return; } // Copy data for best match { int y; // Copy the matching phrase changing any '#' to the phoneme for the wildcard for(y=0;;y++){ char c = getVocab(bestEntry,y) & 0x7f; // Get the next phoneme character if(c==0){ y++; // move to start of next vocab entry break; } if(c=='#'){ if(getVocab(bestEntry,y+1)==0){ // replacement ends in wild card maxWildcardPos = bestWildCardInPos; }else{ // Copy the phonemes for the wild card character copyToken(bestWildCard); } }else{ rprintfChar(c); // output the phoneme character } } } inIndex += (maxWildcardPos>0) ? maxWildcardPos : maxMatch; } }
static void enterScope (tokenInfo *const parentToken, const vString *const extraScope, const int parentKind) { tokenInfo *token = newToken (); int origParentKind = parentToken->parentKind; copyToken (token, parentToken, TRUE); if (extraScope) { token->parentKind = parentKind; addToScope (token, extraScope, origParentKind); } readToken (token); while (token->type != TOKEN_EOF && token->type != TOKEN_CLOSE_CURLY) { boolean readNext = TRUE; switch (token->type) { case TOKEN_OPEN_CURLY: enterScope (token, NULL, -1); break; case TOKEN_KEYWORD: switch (token->keyword) { /* handle anonymous classes */ case KEYWORD_new: readToken (token); if (token->keyword != KEYWORD_class) readNext = FALSE; else { char buf[32]; tokenInfo *name = newToken (); copyToken (name, token, TRUE); snprintf (buf, sizeof buf, "AnonymousClass%u", ++AnonymousID); vStringCopyS (name->string, buf); readNext = parseClassOrIface (token, K_CLASS, name); deleteToken (name); } break; case KEYWORD_class: readNext = parseClassOrIface (token, K_CLASS, NULL); break; case KEYWORD_interface: readNext = parseClassOrIface (token, K_INTERFACE, NULL); break; case KEYWORD_trait: readNext = parseTrait (token); break; case KEYWORD_function: readNext = parseFunction (token, NULL); break; case KEYWORD_const: readNext = parseConstant (token); break; case KEYWORD_define: readNext = parseDefine (token); break; case KEYWORD_namespace: readNext = parseNamespace (token); break; case KEYWORD_private: CurrentStatement.access = ACCESS_PRIVATE; break; case KEYWORD_protected: CurrentStatement.access = ACCESS_PROTECTED; break; case KEYWORD_public: CurrentStatement.access = ACCESS_PUBLIC; break; case KEYWORD_var: CurrentStatement.access = ACCESS_PUBLIC; break; case KEYWORD_abstract: CurrentStatement.impl = IMPL_ABSTRACT; break; default: break; } break; case TOKEN_VARIABLE: readNext = parseVariable (token); break; default: break; } if (readNext) readToken (token); } copyToken (parentToken, token, FALSE); parentToken->parentKind = origParentKind; deleteToken (token); }
/* parse a function * * if @name is NULL, parses a normal function * function myfunc($foo, $bar) {} * function &myfunc($foo, $bar) {} * function myfunc($foo, $bar) : type {} * * if @name is not NULL, parses an anonymous function with name @name * $foo = function($foo, $bar) {} * $foo = function&($foo, $bar) {} * $foo = function($foo, $bar) use ($x, &$y) {} * $foo = function($foo, $bar) use ($x, &$y) : type {} */ static boolean parseFunction (tokenInfo *const token, const tokenInfo *name) { boolean readNext = TRUE; accessType access = CurrentStatement.access; implType impl = CurrentStatement.impl; tokenInfo *nameFree = NULL; readToken (token); /* skip a possible leading ampersand (return by reference) */ if (token->type == TOKEN_AMPERSAND) readToken (token); if (! name) { if (token->type != TOKEN_IDENTIFIER && token->type != TOKEN_KEYWORD) return FALSE; name = nameFree = newToken (); copyToken (nameFree, token, TRUE); readToken (token); } if (token->type == TOKEN_OPEN_PAREN) { vString *arglist = vStringNew (); int depth = 1; vStringPut (arglist, '('); do { readToken (token); switch (token->type) { case TOKEN_OPEN_PAREN: depth++; break; case TOKEN_CLOSE_PAREN: depth--; break; default: break; } /* display part */ switch (token->type) { case TOKEN_AMPERSAND: vStringPut (arglist, '&'); break; case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break; case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break; case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break; case TOKEN_COLON: vStringPut (arglist, ':'); break; case TOKEN_COMMA: vStringCatS (arglist, ", "); break; case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break; case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break; case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break; case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break; case TOKEN_PERIOD: vStringPut (arglist, '.'); break; case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break; case TOKEN_BACKSLASH: vStringPut (arglist, '\\'); break; case TOKEN_STRING: { vStringCatS (arglist, "'"); vStringCat (arglist, token->string); vStringCatS (arglist, "'"); break; } case TOKEN_IDENTIFIER: case TOKEN_KEYWORD: case TOKEN_VARIABLE: { switch (vStringLast (arglist)) { case 0: case ' ': case '{': case '(': case '[': case '.': case '\\': /* no need for a space between those and the identifier */ break; default: vStringPut (arglist, ' '); break; } if (token->type == TOKEN_VARIABLE) vStringPut (arglist, '$'); vStringCat (arglist, token->string); break; } default: break; } } while (token->type != TOKEN_EOF && depth > 0); vStringTerminate (arglist); makeFunctionTag (name, arglist, access, impl); vStringDelete (arglist); readToken (token); /* normally it's an open brace or "use" keyword */ } /* skip use(...) */ if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_use) { readToken (token); skipOverParens (token); } /* PHP7 return type declaration or if parsing Zephir, skip function return * type hint */ if ((getInputLanguage () == Lang_php && token->type == TOKEN_COLON) || (getInputLanguage () == Lang_zephir && token->type == TOKEN_OPERATOR)) { do readToken (token); while (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_BACKSLASH); } if (token->type == TOKEN_OPEN_CURLY) enterScope (token, name->string, K_FUNCTION); else readNext = FALSE; if (nameFree) deleteToken (nameFree); return readNext; }
static void parseStructMembers (tokenInfo *const token, tokenInfo *const parent_token) { // StructType = "struct" "{" { FieldDecl ";" } "}" . // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] . // AnonymousField = [ "*" ] TypeName . // Tag = string_lit . readToken (token); if (!isType (token, TOKEN_OPEN_CURLY)) return; readToken (token); while (!isType (token, TOKEN_EOF) && !isType (token, TOKEN_CLOSE_CURLY)) { tokenInfo *memberCandidate = NULL; boolean first = TRUE; while (!isType (token, TOKEN_EOF)) { if (isType (token, TOKEN_IDENTIFIER)) { if (first) { // could be anonymous field like in 'struct {int}' - we don't know yet memberCandidate = copyToken (token); first = FALSE; } else { if (memberCandidate) { // if we are here, there was a comma and memberCandidate isn't an anonymous field makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL); deleteToken (memberCandidate); memberCandidate = NULL; } makeTag (token, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL); } readToken (token); } if (!isType (token, TOKEN_COMMA)) break; readToken (token); } // in the case of an anonymous field, we already read part of the // type into memberCandidate and skipType() should return FALSE so no tag should // be generated in this case. if (skipType (token) && memberCandidate) makeTag (memberCandidate, GOTAG_MEMBER, parent_token, GOTAG_STRUCT, NULL); if (memberCandidate) deleteToken (memberCandidate); while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_CURLY) && !isType (token, TOKEN_EOF)) { readToken (token); skipToMatched (token); } if (!isType (token, TOKEN_CLOSE_CURLY)) { // we are at TOKEN_SEMICOLON readToken (token); } } }
static boolean parseTag (tokenInfo *const token, texKind kind) { tokenInfo *const name = newToken (); vString * fullname; boolean useLongName = TRUE; fullname = vStringNew (); vStringClear (fullname); /* * Tex tags are of these formats: * \keyword{any number of words} * \keyword[short desc]{any number of words} * \keyword*[short desc]{any number of words} * * When a keyword is found, loop through all words within * the curly braces for the tag name. */ if (isType (token, TOKEN_KEYWORD)) { copyToken (name, token); readToken (token); } if (isType (token, TOKEN_OPEN_SQUARE)) { useLongName = FALSE; readToken (token); while (! isType (token, TOKEN_CLOSE_SQUARE) ) { if (isType (token, TOKEN_IDENTIFIER)) { if (vStringLength (fullname) > 0) vStringCatS (fullname, " "); vStringCatS (fullname, vStringValue (token->string)); } readToken (token); } vStringTerminate (fullname); vStringCopy (name->string, fullname); makeTexTag (name, kind); } if (isType (token, TOKEN_STAR)) { readToken (token); } if (isType (token, TOKEN_OPEN_CURLY)) { readToken (token); while (! isType (token, TOKEN_CLOSE_CURLY) ) { /* if (isType (token, TOKEN_IDENTIFIER) && useLongName) */ if (useLongName) { if (vStringLength (fullname) > 0) vStringCatS (fullname, " "); vStringCatS (fullname, vStringValue (token->string)); } readToken (token); } if (useLongName) { vStringTerminate (fullname); if (vStringLength (fullname) > 0) { vStringCopy (name->string, fullname); makeTexTag (name, kind); } } } /* * save the name of the last section definitions for scope-resolution * later */ switch (kind) { case TEXTAG_PART: vStringCopy(lastPart, fullname); vStringClear(lastChapter); vStringClear(lastSection); vStringClear(lastSubS); vStringClear(lastSubSubS); break; case TEXTAG_CHAPTER: vStringCopy(lastChapter, fullname); vStringClear(lastSection); vStringClear(lastSubS); vStringClear(lastSubSubS); break; case TEXTAG_SECTION: vStringCopy(lastSection, fullname); vStringClear(lastSubS); vStringClear(lastSubSubS); break; case TEXTAG_SUBSECTION: vStringCopy(lastSubS, fullname); vStringClear(lastSubSubS); break; case TEXTAG_SUBSUBSECTION: vStringCopy(lastSubSubS, fullname); break; default: break; } deleteToken (name); vStringDelete (fullname); return TRUE; }
static boolean parseTag (tokenInfo *const token, texKind kind) { tokenInfo *const name = newToken (); vString * fullname; boolean useLongName = TRUE; fullname = vStringNew (); vStringClear (fullname); /* * Tex tags are of these formats: * \keyword{any number of words} * \keyword[short desc]{any number of words} * \keyword*[short desc]{any number of words} * * When a keyword is found, loop through all words within * the curly braces for the tag name. */ if (isType (token, TOKEN_KEYWORD)) { copyToken (name, token); readToken (token); } if (isType (token, TOKEN_OPEN_SQUARE)) { useLongName = FALSE; readToken (token); while (! isType (token, TOKEN_CLOSE_SQUARE) ) { if (isType (token, TOKEN_IDENTIFIER)) { if (fullname->length > 0) vStringCatS (fullname, " "); vStringCatS (fullname, vStringValue (token->string)); } readToken (token); } vStringTerminate (fullname); vStringCopy (name->string, fullname); makeTexTag (name, kind); } if (isType (token, TOKEN_STAR)) { readToken (token); } if (isType (token, TOKEN_OPEN_CURLY)) { readToken (token); while (! isType (token, TOKEN_CLOSE_CURLY) ) { if (isType (token, TOKEN_IDENTIFIER) && useLongName) { if (fullname->length > 0) vStringCatS (fullname, " "); vStringCatS (fullname, vStringValue (token->string)); } readToken (token); } if (useLongName) { vStringTerminate (fullname); vStringCopy (name->string, fullname); makeTexTag (name, kind); } } deleteToken (name); vStringDelete (fullname); return TRUE; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; boolean newline_encountered = FALSE; /* if we've got a token held back, emit it */ if (NextToken) { copyToken (token, NextToken, FALSE); deleteToken (NextToken); NextToken = NULL; return; } token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); if (include_newlines && (c == '\r' || c == '\n')) newline_encountered = TRUE; i++; } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '`': token->type = TOKEN_TEMPLATE_STRING; parseTemplateString (token->string); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_TEMPLATE_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_BINARY_OPERATOR; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { fileSkipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } if (include_newlines && newline_encountered) { /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left or right obviously invalid. Hopefully this should not * have false negatives (e.g. should not miss insertion of a semicolon) * but might have false positives (e.g. it will wrongfully emit a * semicolon sometimes, i.e. for the newline in "foo\n(bar)"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ /* these already end a statement, so no need to duplicate it */ #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \ (t) == TOKEN_EOF || \ (t) == TOKEN_COMMA || \ (t) == TOKEN_CLOSE_CURLY || \ (t) == TOKEN_OPEN_CURLY) /* these cannot be the start or end of a statement */ #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \ (t) == TOKEN_COLON || \ (t) == TOKEN_PERIOD || \ (t) == TOKEN_BINARY_OPERATOR) if (! IS_STMT_SEPARATOR(LastTokenType) && ! IS_STMT_SEPARATOR(token->type) && ! IS_BINARY_OPERATOR(LastTokenType) && ! IS_BINARY_OPERATOR(token->type) && /* these cannot be followed by a semicolon */ ! (LastTokenType == TOKEN_OPEN_PAREN || LastTokenType == TOKEN_OPEN_SQUARE)) { /* hold the token... */ Assert (NextToken == NULL); NextToken = newToken (); copyToken (NextToken, token, FALSE); /* ...and emit a semicolon instead */ token->type = TOKEN_SEMICOLON; token->keyword = KEYWORD_NONE; vStringClear (token->string); if (repr) vStringPut (token->string, '\n'); } #undef IS_STMT_SEPARATOR #undef IS_BINARY_OPERATOR } LastTokenType = token->type; }
/* parses declarations of the form * use Foo * use Foo\Bar\Class * use Foo\Bar\Class as FooBarClass * use function Foo\Bar\func * use function Foo\Bar\func as foobarfunc * use const Foo\Bar\CONST * use const Foo\Bar\CONST as FOOBARCONST * use Foo, Bar * use Foo, Bar as Baz * use Foo as Test, Bar as Baz * use Foo\{Bar, Baz as Child, Nested\Other, Even\More as Something} */ static boolean parseUse (tokenInfo *const token) { boolean readNext = FALSE; /* we can't know the use type, because class, interface and namespaces * aliases are the same, and the only difference is the referenced name's * type */ const char *refType = "unknown"; vString *refName = vStringNew (); tokenInfo *nameToken = newToken (); boolean grouped = FALSE; readToken (token); /* skip use keyword itself */ if (token->type == TOKEN_KEYWORD && (token->keyword == KEYWORD_function || token->keyword == KEYWORD_const)) { switch (token->keyword) { case KEYWORD_function: refType = PhpKinds[K_FUNCTION].name; break; case KEYWORD_const: refType = PhpKinds[K_DEFINE].name; break; default: break; /* silence compilers */ } readNext = TRUE; } if (readNext) readToken (token); readQualifiedName (token, refName, nameToken); grouped = readNext = (token->type == TOKEN_OPEN_CURLY); do { size_t refNamePrefixLength = grouped ? vStringLength (refName) : 0; /* if it's either not the first name in a comma-separated list, or we * are in a grouped alias and need to read the leaf name */ if (readNext) { readToken (token); readQualifiedName (token, refName, nameToken); } if (token->type == TOKEN_KEYWORD && token->keyword == KEYWORD_as) { readToken (token); copyToken (nameToken, token, TRUE); readToken (token); } if (nameToken->type == TOKEN_IDENTIFIER && PhpKinds[K_ALIAS].enabled) { tagEntryInfo entry; initPhpEntry (&entry, nameToken, K_ALIAS, ACCESS_UNDEFINED); entry.extensionFields.typeRef[0] = refType; entry.extensionFields.typeRef[1] = vStringValue (refName); makePhpTagEntry (&entry); } vStringTruncate (refName, refNamePrefixLength); readNext = TRUE; } while (token->type == TOKEN_COMMA); if (grouped && token->type == TOKEN_CLOSE_CURLY) readToken (token); vStringDelete (refName); deleteToken (nameToken); return (token->type == TOKEN_SEMICOLON); }
static void parseValue (tokenInfo *const token) { if (token->type == TOKEN_OPEN_CURLY) { tokenInfo *name = newToken (); do { readTokenFull (token, TRUE); if (token->type == TOKEN_STRING) { jsonKind tagKind = TAG_NULL; /* default in case of invalid value */ copyToken (name, token); /* skip any possible garbage before the value */ skipToOneOf3 (token, TOKEN_CLOSE_CURLY, TOKEN_COLON, TOKEN_COMMA); if (token->type == TOKEN_COLON) { readToken (token); tagKind = tokenToKind (token->type); pushScope (token, name, tagKind); parseValue (token); popScope (token, name); } makeJsonTag (name, tagKind); } /* skip to the end of the construct */ skipToOneOf2 (token, TOKEN_CLOSE_CURLY, TOKEN_COMMA); } while (token->type != TOKEN_EOF && token->type != TOKEN_CLOSE_CURLY); if (token->type == TOKEN_CLOSE_CURLY) readToken (token); deleteToken (name); } else if (token->type == TOKEN_OPEN_SQUARE) { tokenInfo *name = newToken (); char buf[32]; unsigned int nth = 0; readToken (token); while (token->type != TOKEN_EOF && token->type != TOKEN_CLOSE_SQUARE) { jsonKind tagKind; tagKind = tokenToKind (token->type); copyToken (name, token); snprintf (buf, sizeof buf, "%u", nth++); vStringCopyS (name->string, buf); makeJsonTag (name, tagKind); pushScope (token, name, tagKind); parseValue (token); popScope (token, name); /* skip to the end of the construct */ skipToOneOf2 (token, TOKEN_CLOSE_SQUARE, TOKEN_COMMA); if (token->type != TOKEN_CLOSE_SQUARE) readToken (token); } if (token->type == TOKEN_CLOSE_SQUARE) readToken (token); deleteToken (name); } }
/* * Handle the operator precedence, ... * This is greatly under-productive, but no clue on how to optimize it yet... */ void XPathParser::rearrangeTokens(Token* token, TokenFactory* tokenFactory) { #define copyToken(__to,__from) \ (__to)->type = (__from)->type; \ (__to)->token = (__from)->token; \ (__to)->symbol = (__from)->symbol; AssertBug ( token, "Null token provided !\n" ); Log_XPathParser_Tokenize ( "rearrange Token '%p' (%c/%s)\n", token, token->symbol, token->token.c_str() ); if (token->isSymbol() && token->symbol == '~') { AssertBug ( token->token == "//", "Invalid complex token for descendant : '%s'\n", token->token.c_str() ); /* * The first token of a token chain is '//', so we have to insert a root token before. */ Token* nextToken = tokenFactory->allocToken(); nextToken->type = Token::Symbol; nextToken->symbol = '~'; nextToken->token = "//"; nextToken->next = token->next; token->next = nextToken; token->symbol = '/'; token->token = ""; } Token* lastToken = NULL; int candidatePrecedence = 0; Token* candidateToken = NULL; Token* candidateLastToken = NULL; Log_XPathParser_Tokenize ( "***** Rearranging tokens from chain-start Token '%p' (%c/%s)\n", token, token->symbol, token->token.c_str() ); for (Token* tk = token; tk; tk = tk->next) { Log_XPathParser_Tokenize ( "Top Of Loop (token=%p) - rearrange : at tk '%p' (%c/%s)\n", token, tk, tk->symbol, tk->token.c_str() ); if (tk->subExpression2) { Log_XPathParser_Tokenize ( "Token symbol %p ('%c') already has a subExpression !\n", tk, tk->symbol ); lastToken = tk; continue; } if (tk->subExpression) { rearrangeTokens(tk->subExpression, tokenFactory); } if (tk == token) { Log_XPathParser_Tokenize ( "--> Not a token, is at head.\n" ); lastToken = tk; continue; } if (!tk->next) { Log_XPathParser_Tokenize ( "--> Not a token, has no next.\n" ); lastToken = tk; continue; } bool found = false; int myPrecedence = 0; #define __checkToken(__short,__long,__precedence) \ if ( ! found && ( ( tk->isSymbol() && tk->symbol == __short ) \ || ( tk->isQName() && tk->token == __long ) ) && __precedence >= candidatePrecedence ) \ { \ Log_XPathParser_Tokenize ( "Rearrange : found a new token at %p, %c/%s, precedence=%d\n", tk, __short, __long, __precedence); \ found = true; \ myPrecedence = __precedence; \ } __checkToken ( 'B', "or", 30 ) __checkToken ( 'B', "and", 29 ) __checkToken ( '=', "#Equals", 28 ) __checkToken ( '!', "#NotEquals", 27 ) __checkToken ( '<', "#Less", 26 ) __checkToken ( '>', "#Greater", 25 ) __checkToken ( '|', "#Union", 24 ) __checkToken ( '+', "#Plus", 23 ) __checkToken ( '-', "#Minus", 22 ) __checkToken ( '*', "#Multiply", 21 ) __checkToken ( 'B', "div", 20 ) __checkToken ( 'B', "mod", 19 ) if (!found) { Log_XPathParser_Tokenize ( "--> Not a token.\n" ); lastToken = tk; continue; } if (tk->isQName()) { if ((tk == token) || (lastToken && (lastToken->symbol == '/' || lastToken->symbol == '~' || (candidatePrecedence && lastToken->token == tk->token)))) { Log_XPathParser_Tokenize ( "Token '%p' (%c/%s) -> this is NOT a REAL symbol token\n", token, token->symbol, token->token.c_str() ); lastToken = tk; continue; } } Log_XPathParser_Tokenize ( "Token '%p' (%c/%s) -> this is a token (last:%p=%c/%s)\n", token, token->symbol, token->token.c_str(), lastToken, lastToken ? lastToken->symbol : '?', lastToken ? lastToken->token.c_str() : "(none)" ); AssertBug ( tk != token, "Token at head !\n" ); /* * Swap tokens : * The token given as argument is the head token, so it's this one we have to requalify as holding the symbol * The tk, which previously hold the binary operand symbol, will hold the first token. */ AssertBug ( ! tk->subExpression, "Token symbol '%c' already has a subExpression !\n", tk->symbol ); AssertBug ( ! tk->subExpression2, "Token symbol '%c' already has a subExpression !\n", tk->symbol ); candidateToken = tk; candidateLastToken = lastToken; candidatePrecedence = myPrecedence; lastToken = tk; } if (candidateToken) { Log_XPathParser_Tokenize ( "CandidateToken : %p, lastToken : %p, precedence = %d, full chain is :\n", candidateToken, candidateLastToken, candidatePrecedence ); // token->log (); #if PARANOID AssertBug ( candidateToken->next, "Candidate token has no next token !\n" ); #endif if (candidateToken->isQName()) { candidateToken->type = Token::Symbol; candidateToken->symbol = 'B'; } Token temp; copyToken(&temp, token); copyToken(token, candidateToken); copyToken(candidateToken, &temp); candidateToken->subExpression = token->subExpression; token->subExpression2 = candidateToken->next; if (token == candidateLastToken) candidateToken->next = NULL; else candidateToken->next = token->next; token->subExpression = candidateToken; token->next = NULL; candidateLastToken->next = NULL; rearrangeTokens(token->subExpression, tokenFactory); rearrangeTokens(token->subExpression2, tokenFactory); } }
static void parseConstTypeVar (tokenInfo *const token, goKind kind) { // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . // IdentifierList = identifier { "," identifier } . // ExpressionList = Expression { "," Expression } . // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . // TypeSpec = identifier Type . // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . boolean usesParens = FALSE; readToken (token); if (isType (token, TOKEN_OPEN_PAREN)) { usesParens = TRUE; readToken (token); } do { tokenInfo *typeToken = NULL; while (!isType (token, TOKEN_EOF)) { if (isType (token, TOKEN_IDENTIFIER)) { if (kind == GOTAG_TYPE) { typeToken = copyToken (token); readToken (token); if (isKeyword (token, KEYWORD_struct)) makeTag (typeToken, GOTAG_STRUCT, NULL, GOTAG_UNDEFINED, NULL); else if (isKeyword (token, KEYWORD_interface)) makeTag (typeToken, GOTAG_INTERFACE, NULL, GOTAG_UNDEFINED, NULL); else makeTag (typeToken, kind, NULL, GOTAG_UNDEFINED, NULL); break; } else makeTag (token, kind, NULL, GOTAG_UNDEFINED, NULL); readToken (token); } if (!isType (token, TOKEN_COMMA)) break; readToken (token); } if (typeToken) { if (isKeyword (token, KEYWORD_struct)) parseStructMembers (token, typeToken); else skipType (token); deleteToken (typeToken); } else skipType (token); while (!isType (token, TOKEN_SEMICOLON) && !isType (token, TOKEN_CLOSE_PAREN) && !isType (token, TOKEN_EOF)) { readToken (token); skipToMatched (token); } if (usesParens && !isType (token, TOKEN_CLOSE_PAREN)) { // we are at TOKEN_SEMICOLON readToken (token); } } while (!isType (token, TOKEN_EOF) && usesParens && !isType (token, TOKEN_CLOSE_PAREN)); }
/* parse a function * * function myfunc($foo, $bar) {} */ static bool parseFunction (tokenInfo *const token) { bool readNext = true; tokenInfo *nameFree = NULL; const char *access; readToken (token); if (token->type != TOKEN_IDENTIFIER) return false; access = parsePowerShellScope (token); nameFree = newToken (); copyToken (nameFree, token, true); readToken (token); if (token->type == TOKEN_OPEN_PAREN) { vString *arglist = vStringNew (); int depth = 1; vStringPut (arglist, '('); do { readToken (token); switch (token->type) { case TOKEN_OPEN_PAREN: depth++; break; case TOKEN_CLOSE_PAREN: depth--; break; default: break; } /* display part */ switch (token->type) { case TOKEN_CLOSE_CURLY: vStringPut (arglist, '}'); break; case TOKEN_CLOSE_PAREN: vStringPut (arglist, ')'); break; case TOKEN_CLOSE_SQUARE: vStringPut (arglist, ']'); break; case TOKEN_COLON: vStringPut (arglist, ':'); break; case TOKEN_COMMA: vStringCatS (arglist, ", "); break; case TOKEN_EQUAL_SIGN: vStringCatS (arglist, " = "); break; case TOKEN_OPEN_CURLY: vStringPut (arglist, '{'); break; case TOKEN_OPEN_PAREN: vStringPut (arglist, '('); break; case TOKEN_OPEN_SQUARE: vStringPut (arglist, '['); break; case TOKEN_PERIOD: vStringPut (arglist, '.'); break; case TOKEN_SEMICOLON: vStringPut (arglist, ';'); break; case TOKEN_STRING: vStringCatS (arglist, "'...'"); break; case TOKEN_IDENTIFIER: case TOKEN_KEYWORD: case TOKEN_VARIABLE: { switch (vStringLast (arglist)) { case 0: case ' ': case '{': case '(': case '[': case '.': /* no need for a space between those and the identifier */ break; default: vStringPut (arglist, ' '); break; } if (token->type == TOKEN_VARIABLE) vStringPut (arglist, '$'); vStringCat (arglist, token->string); break; } default: break; } } while (token->type != TOKEN_EOF && depth > 0); makeFunctionTag (nameFree, arglist, access); vStringDelete (arglist); readToken (token); } else if (token->type == TOKEN_OPEN_CURLY) { /* filters doesn't need to have an arglist */ makeFunctionTag (nameFree, NULL, access); } if (token->type == TOKEN_OPEN_CURLY) enterScope (token, nameFree->string, K_FUNCTION); else readNext = false; if (nameFree) deleteToken (nameFree); return readNext; }