Esempio n. 1
0
static int skipSingleComment (void)
{
	int c;
	do
	{
		c = getcFromInputFile ();
		if (c == '\r')
		{
			int next = getcFromInputFile ();
			if (next != '\n')
				ungetcToInputFile (next);
			else
				c = next;
		}
		/* ?> in single-line comments leaves PHP mode */
		else if (c == '?')
		{
			int next = getcFromInputFile ();
			if (next == '>')
				InPhp = FALSE;
			else
				ungetcToInputFile (next);
		}
	} while (InPhp && c != EOF && c != '\n' && c != '\r');
	return c;
}
Esempio n. 2
0
static void parseRegExp (void)
{
	int c;
	boolean in_range = FALSE;

	do
	{
		c = getcFromInputFile ();
		if (! in_range && c == '/')
		{
			do /* skip flags */
			{
				c = getcFromInputFile ();
			} while (isalpha (c));
			ungetcToInputFile (c);
			break;
		}
		else if (c == '\n' || c == '\r')
		{
			/* invalid in a regex */
			ungetcToInputFile (c);
			break;
		}
		else if (c == '\\')
			c = getcFromInputFile (); /* skip next character */
		else if (c == '[')
			in_range = TRUE;
		else if (c == ']')
			in_range = FALSE;
	} while (c != EOF);
}
Esempio n. 3
0
File: m4.c Progetto: pjkack/ctags
/* reads a possibly quoted word.  word characters are those passing IS_WORD() */
static void readQuotedWord(vString *const name)
{
	unsigned int depth = 0;
	int openQuote = 0, closeQuote = 0;
	int c = getcFromInputFile();

	closeQuote = getCloseQuote(c);
	if (closeQuote != 0)
	{
		openQuote = c;
		depth ++;
		c = getcFromInputFile();
	}

	for (; c != EOF; c = getcFromInputFile())
	{
		/* don't allow embedded NULs, and prevents to match when quote == 0 (aka none) */
		if (c == 0)
			break;
		/* close before open to support open and close characters to be the same */
		else if (c == closeQuote)
			depth --;
		else if (c == openQuote)
			depth ++;
		else if (IS_WORD(c) || depth > 0)
			vStringPut(name, c);
		else
		{
			ungetcToInputFile(c);
			break;
		}
	}
}
Esempio n. 4
0
static int findPhpStart (void)
{
	int c;
	do
	{
		if ((c = getcFromInputFile ()) == '<')
		{
			c = getcFromInputFile ();
			/* <? and <?php, but not <?xml */
			if (c == '?')
			{
				/* don't enter PHP mode on "<?xml", yet still support short open tags (<?) */
				if (tolower ((c = getcFromInputFile ())) != 'x' ||
					tolower ((c = getcFromInputFile ())) != 'm' ||
					tolower ((c = getcFromInputFile ())) != 'l')
				{
					break;
				}
			}
			/* <script language="php"> */
			else
			{
				ungetcToInputFile (c);
				if (isOpenScriptLanguagePhp ('<'))
					break;
			}
		}
	}
	while (c != EOF);

	return c;
}
Esempio n. 5
0
static vString *parseNumeric (int c)
{
	vString *string = vStringNew ();
	vString *integer = parseInteger (c);
	vStringCopy (string, integer);
	vStringDelete (integer);

	c = getcFromInputFile ();
	if (c == '.')
	{
		integer = parseInteger ('\0');
		vStringPut (string, c);
		vStringCat (string, integer);
		vStringDelete (integer);
		c = getcFromInputFile ();
	}
	if (tolower (c) == 'e')
	{
		integer = parseInteger ('\0');
		vStringPut (string, c);
		vStringCat (string, integer);
		vStringDelete (integer);
	}
	else if (!isspace (c))
		ungetcToInputFile (c);

	vStringTerminate (string);

	return string;
}
Esempio n. 6
0
File: make.c Progetto: jonthn/ctags
static void skipLine (void)
{
    int c;
    do
        c = nextChar ();
    while (c != EOF  &&  c != '\n');
    if (c == '\n')
        ungetcToInputFile (c);
}
Esempio n. 7
0
File: css.c Progetto: pjkack/ctags
static void parseSelector (vString *const string, const int firstChar)
{
	int c = firstChar;
	do
	{
		vStringPut (string, (char) c);
		c = getcFromInputFile ();
	} while (isSelectorChar (c));
	ungetcToInputFile (c);
}
Esempio n. 8
0
static void parseIdentifier (vString *const string, const int firstChar)
{
	int c = firstChar;
	do
	{
		vStringPut (string, (char) c);
		c = getcFromInputFile ();
	} while (isIdentChar (c));
	ungetcToInputFile (c);
	vStringTerminate (string);
}
Esempio n. 9
0
File: go.c Progetto: Dev0Null/ctags
static void parseIdentifier (vString *const string, const int firstChar)
{
	int c = firstChar;
	do
	{
		vStringPut (string, c);
		c = getcFromInputFile ();
	} while (isIdentChar (c));
	vStringTerminate (string);
	ungetcToInputFile (c);		/* always unget, LF might add a semicolon */
}
Esempio n. 10
0
static void parseString (vString *const string, const int delimiter)
{
	boolean end = FALSE;
	while (! end)
	{
		int c = getcFromInputFile ();
		if (c == EOF)
			end = TRUE;
		else if (c == '\\')
		{
			/* Eat the escape sequence (\", \', etc).  We properly handle
			 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
			 * as an unescaped character, which is invalid and handled below.
			 * Also, handle the fact that <LineContinuation> produces an empty
			 * sequence.
			 * See ECMA-262 7.8.4 */
			c = getcFromInputFile ();
			if (c != '\r' && c != '\n')
				vStringPut(string, c);
			else if (c == '\r')
			{
				c = getcFromInputFile();
				if (c != '\n')
					ungetcToInputFile (c);
			}
		}
		else if (c == delimiter)
			end = TRUE;
		else if (c == '\r' || c == '\n')
		{
			/* those are invalid when not escaped */
			end = TRUE;
			/* we don't want to eat the newline itself to let the automatic
			 * semicolon insertion code kick in */
			ungetcToInputFile (c);
		}
		else
			vStringPut (string, c);
	}
	vStringTerminate (string);
}
Esempio n. 11
0
/*	Read a C identifier beginning with "firstChar" and places it into
 *	"name".
 */
static void parseIdentifier (vString *const string, const int firstChar)
{
	int c = firstChar;
	Assert (isIdentChar (c));
	do
	{
		vStringPut (string, c);
		c = getcFromInputFile ();
	} while (isIdentChar (c));
	vStringTerminate (string);
	ungetcToInputFile (c);		/* unget non-identifier character */
}
Esempio n. 12
0
static void parseFreeOperator (vString *const string, const int firstChar)
{
	int c = firstChar;

	do
	{
		vStringPut (string, c);
		c = getcFromInputFile ();
	} while (c > ' ');

	vStringTerminate (string);
	if (!isspace (c))
		ungetcToInputFile (c);  /* unget non-identifier character */
}
Esempio n. 13
0
File: m4.c Progetto: pjkack/ctags
static bool skipLineEnding(int c)
{
	if (c == '\n')
		return true;
	else if (c == '\r')
	{
		/* try to eat the `\n' of a `\r\n' sequence */
		c = getcFromInputFile();
		if (c != '\n')
			ungetcToInputFile(c);
		return true;
	}

	return false;
}
Esempio n. 14
0
File: make.c Progetto: jonthn/ctags
static void readIdentifier (const int first, vString *const id)
{
    int depth = 0;
    int c = first;
    vStringClear (id);
    while (isIdentifier (c) || (depth > 0 && c != EOF && c != '\n'))
    {
        if (c == '(' || c == '}')
            depth++;
        else if (depth > 0 && (c == ')' || c == '}'))
            depth--;
        vStringPut (id, c);
        c = nextChar ();
    }
    ungetcToInputFile (c);
    vStringTerminate (id);
}
Esempio n. 15
0
static int skipSingleComment (void)
{
	int c;
	do
	{
		c = getcFromInputFile ();
		if (c == '\r')
		{
			int next = getcFromInputFile ();
			if (next != '\n')
				ungetcToInputFile (next);
			else
				c = next;
		}
	} while (c != EOF && c != '\n' && c != '\r');
	return c;
}
Esempio n. 16
0
static void parseTemplateString (vString *const string)
{
	int c;
	do
	{
		c = getcFromInputFile ();
		if (c == '`')
			break;
		vStringPut (string, c);
		if (c == '\\')
		{
			c = getcFromInputFile();
			vStringPut(string, c);
		}
		else if (c == '$')
		{
			c = getcFromInputFile ();
			if (c != '{')
				ungetcToInputFile (c);
			else
			{
				int depth = 1;
				/* we need to use the real token machinery to handle strings,
				 * comments, regexes and whatnot */
				tokenInfo *token = newToken ();
				LastTokenType = TOKEN_UNDEFINED;
				vStringPut(string, c);
				do
				{
					readTokenFull (token, FALSE, string);
					if (isType (token, TOKEN_OPEN_CURLY))
						depth++;
					else if (isType (token, TOKEN_CLOSE_CURLY))
						depth--;
				}
				while (! isType (token, TOKEN_EOF) && depth > 0);
				deleteToken (token);
			}
		}
	}
	while (c != EOF);
	vStringTerminate (string);
}
Esempio n. 17
0
static int vGetc (void)
{
	int c;
	if (Ungetc == '\0')
		c = getcFromInputFile ();
	else
	{
		c = Ungetc;
		Ungetc = '\0';
	}
	if (c == '/')
	{
		int c2 = getcFromInputFile ();
		if (c2 == EOF)
			longjmp (Exception, (int) ExceptionEOF);
		else if (c2 == '/')  /* strip comment until end-of-line */
		{
			do
				c = getcFromInputFile ();
			while (c != '\n'  &&  c != EOF);
		}
		else if (c2 == '*')  /* strip block comment */
		{
			c = cppSkipOverCComment();
		}
		else
		{
			ungetcToInputFile (c2);
		}
	}
	else if (c == '"')  /* strip string contents */
	{
		int c2;
		do
			c2 = getcFromInputFile ();
		while (c2 != '"'  &&  c2 != EOF);
		c = '@';
	}
	if (c == EOF)
		longjmp (Exception, (int) ExceptionEOF);
	return c;
}
Esempio n. 18
0
File: html.c Progetto: qzhuyan/ctags
static void readTokenText (tokenInfo *const token, bool collectText)
{
	int c;
	int lastC = 'X';  /* whatever non-space character */

	vStringClear (token->string);

getNextChar:

	c = getcFromInputFile ();

	switch (c)
	{
		case EOF:
			token->type = TOKEN_EOF;
			break;

		case '<':
			ungetcToInputFile (c);
			token->type = TOKEN_TEXT;
			break;

		default:
			if (collectText)
			{
				if (isspace (c))
					c = ' ';
				if (c != ' ' || lastC != ' ')
				{
					if (collectText)
						vStringPut (token->string, c);
					lastC = c;
				}
			}

			goto getNextChar;
	}
}
Esempio n. 19
0
/*  If a numeric is passed in 'c', this is used as the first digit of the
 *  numeric being parsed.
 */
static vString *parseInteger (int c)
{
	vString *string = vStringNew ();

	if (c == '\0')
		c = getcFromInputFile ();
	if (c == '-')
	{
		vStringPut (string, c);
		c = getcFromInputFile ();
	}
	else if (! isdigit (c))
		c = getcFromInputFile ();
	while (c != EOF  &&  (isdigit (c)  ||  c == '_'))
	{
		vStringPut (string, c);
		c = getcFromInputFile ();
	}
	vStringTerminate (string);
	ungetcToInputFile (c);

	return string;
}
Esempio n. 20
0
File: go.c Progetto: Dev0Null/ctags
static void readToken (tokenInfo *const token)
{
	int c;
	static tokenType lastTokenType = TOKEN_NONE;
	boolean firstWhitespace = TRUE;
	boolean whitespace;

	token->type = TOKEN_NONE;
	token->keyword = KEYWORD_NONE;
	vStringClear (token->string);

getNextChar:
	do
	{
		c = getcFromInputFile ();
		token->lineNumber = getInputLineNumber ();
		token->filePosition = getInputFilePosition ();
		if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER ||
						  lastTokenType == TOKEN_STRING ||
						  lastTokenType == TOKEN_OTHER ||
						  lastTokenType == TOKEN_CLOSE_PAREN ||
						  lastTokenType == TOKEN_CLOSE_CURLY ||
						  lastTokenType == TOKEN_CLOSE_SQUARE))
		{
			c = ';';  // semicolon injection
		}
		whitespace = c == '\t'  ||  c == ' ' ||  c == '\r' || c == '\n';
		if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH)
		{
			firstWhitespace = FALSE;
			vStringPut(signature, ' ');
		}
	}
	while (whitespace);

	switch (c)
	{
		case EOF:
			token->type = TOKEN_EOF;
			break;

		case ';':
			token->type = TOKEN_SEMICOLON;
			break;

		case '/':
			{
				boolean hasNewline = FALSE;
				int d = getcFromInputFile ();
				switch (d)
				{
					case '/':
						skipToCharacterInInputFile ('\n');
						/* Line comments start with the
						 * character sequence // and
						 * continue through the next
						 * newline. A line comment acts
						 * like a newline.  */
						ungetcToInputFile ('\n');
						goto getNextChar;
					case '*':
						do
						{
							do
							{
								d = getcFromInputFile ();
								if (d == '\n')
								{
									hasNewline = TRUE;
								}
							} while (d != EOF && d != '*');

							c = getcFromInputFile ();
							if (c == '/')
								break;
							else
								ungetcToInputFile (c);
						} while (c != EOF && c != '\0');

						ungetcToInputFile (hasNewline ? '\n' : ' ');
						goto getNextChar;
					default:
						token->type = TOKEN_OTHER;
						ungetcToInputFile (d);
						break;
				}
			}
			break;

		case '"':
		case '\'':
		case '`':
			token->type = TOKEN_STRING;
			parseString (token->string, c);
			token->lineNumber = getInputLineNumber ();
			token->filePosition = getInputFilePosition ();
			break;

		case '<':
			{
				int d = getcFromInputFile ();
				if (d == '-')
					token->type = TOKEN_LEFT_ARROW;
				else
				{
					ungetcToInputFile (d);
					token->type = TOKEN_OTHER;
				}
			}
			break;

		case '(':
			token->type = TOKEN_OPEN_PAREN;
			break;

		case ')':
			token->type = TOKEN_CLOSE_PAREN;
			break;

		case '{':
			token->type = TOKEN_OPEN_CURLY;
			break;

		case '}':
			token->type = TOKEN_CLOSE_CURLY;
			break;

		case '[':
			token->type = TOKEN_OPEN_SQUARE;
			break;

		case ']':
			token->type = TOKEN_CLOSE_SQUARE;
			break;

		case '*':
			token->type = TOKEN_STAR;
			break;

		case '.':
			token->type = TOKEN_DOT;
			break;

		case ',':
			token->type = TOKEN_COMMA;
			break;

		default:
			if (isStartIdentChar (c))
			{
				parseIdentifier (token->string, c);
				token->lineNumber = getInputLineNumber ();
				token->filePosition = getInputFilePosition ();
				token->keyword = lookupKeyword (vStringValue (token->string), Lang_go);
				if (isKeyword (token, KEYWORD_NONE))
					token->type = TOKEN_IDENTIFIER;
				else
					token->type = TOKEN_KEYWORD;
			}
			else
				token->type = TOKEN_OTHER;
			break;
	}

	if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH)
	{
		if (token->type == TOKEN_LEFT_ARROW)
			vStringCatS(signature, "<-");
		else if (token->type == TOKEN_STRING)
		{
			// only struct member annotations can appear in function prototypes
			// so only `` type strings are possible
			vStringPut(signature, '`');
			vStringCat(signature, token->string);
			vStringPut(signature, '`');
		}
		else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD)
			vStringCat(signature, token->string);
		else if (c != EOF)
			vStringPut(signature, c);
	}

	lastTokenType = token->type;
}
Esempio n. 21
0
File: css.c Progetto: pjkack/ctags
static void readToken (tokenInfo *const token)
{
	int c;

	vStringClear (token->string);

getNextChar:

	c = getcFromInputFile ();
	while (isspace (c))
		c = getcFromInputFile ();

	token->type = c;
	switch (c)
	{
		case EOF: token->type = TOKEN_EOF; break;

		case '\'':
		case '"':
		{
			const int delimiter = c;
			do
			{
				vStringPut (token->string, c);
				c = getcFromInputFile ();
				if (c == '\\')
					c = getcFromInputFile ();
			}
			while (c != EOF && c != delimiter);
			if (c != EOF)
				vStringPut (token->string, c);
			token->type = TOKEN_STRING;
			break;
		}

		case '/': /* maybe comment start */
		{
			int d = getcFromInputFile ();
			if (d != '*')
			{
				ungetcToInputFile (d);
				vStringPut (token->string, c);
				token->type = c;
			}
			else
			{
				d = getcFromInputFile ();
				do
				{
					c = d;
					d = getcFromInputFile ();
				}
				while (d != EOF && ! (c == '*' && d == '/'));
				goto getNextChar;
			}
			break;
		}

		default:
			if (! isSelectorChar (c))
			{
				vStringPut (token->string, c);
				token->type = c;
			}
			else
			{
				parseSelector (token->string, c);
				token->type = TOKEN_SELECTOR;
			}
			break;
	}
}
Esempio n. 22
0
static void readToken (tokenInfo * const token)
{
	int c;

	token->type = TOKEN_NONE;
	token->keyword = KEYWORD_NONE;
	vStringClear (token->string);

  getNextChar:
	do
	{
		c = getcFromInputFile ();
		token->lineNumber = getInputLineNumber ();
		token->filePosition = getInputFilePosition ();
	}
	while (c == '\t' || c == ' ' || c == '\n');

	switch (c)
	{
	case EOF:
		token->type = TOKEN_EOF;
		break;
	case '(':
		token->type = TOKEN_OPEN_PAREN;
		break;
	case ')':
		token->type = TOKEN_CLOSE_PAREN;
		break;
	case ';':
		token->type = TOKEN_SEMICOLON;
		break;
	case '.':
		token->type = TOKEN_PERIOD;
		break;
	case ',':
		token->type = TOKEN_COMMA;
		break;
	case '\'':	/* only single char are inside simple quotes */
		break;	/* or it is for attributes so we don't care */
	case '"':
		token->type = TOKEN_STRING;
		parseString (token->string, c);
		token->lineNumber = getInputLineNumber ();
		token->filePosition = getInputFilePosition ();
		break;
	case '-':
		c = getcFromInputFile ();
		if (c == '-')	/* start of a comment */
		{
			skipToCharacterInInputFile ('\n');
			goto getNextChar;
		}
		else
		{
			if (!isspace (c))
				ungetcToInputFile (c);
			token->type = TOKEN_OPERATOR;
		}
		break;
	default:
		if (!isIdentChar1 (c))
			token->type = TOKEN_NONE;
		else
		{
			parseIdentifier (token->string, c);
			token->lineNumber = getInputLineNumber ();
			token->filePosition = getInputFilePosition ();
			token->keyword = lookupCaseKeyword (vStringValue (token->string), Lang_vhdl);
			if (isKeyword (token, KEYWORD_NONE))
				token->type = TOKEN_IDENTIFIER;
			else
				token->type = TOKEN_KEYWORD;
		}
		break;
	}
}
Esempio n. 23
0
File: make.c Progetto: jonthn/ctags
static void findMakeTags (void)
{
    stringList *identifiers = stringListNew ();
    boolean newline = TRUE;
    boolean in_define = FALSE;
    boolean in_rule = FALSE;
    boolean variable_possible = TRUE;
    int c;

    while ((c = nextChar ()) != EOF)
    {
        if (newline)
        {
            if (in_rule)
            {
                if (c == '\t' || (c = skipToNonWhite (c)) == '#')
                {
                    skipLine ();  /* skip rule or comment */
                    c = nextChar ();
                }
                else if (c != '\n')
                    in_rule = FALSE;
            }
            stringListClear (identifiers);
            variable_possible = (boolean)(!in_rule);
            newline = FALSE;
        }
        if (c == '\n')
            newline = TRUE;
        else if (isspace (c))
            continue;
        else if (c == '#')
            skipLine ();
        else if (variable_possible && c == '?')
        {
            c = nextChar ();
            ungetcToInputFile (c);
            variable_possible = (c == '=');
        }
        else if (variable_possible && c == ':' &&
                 stringListCount (identifiers) > 0)
        {
            c = nextChar ();
            ungetcToInputFile (c);
            if (c != '=')
            {
                unsigned int i;
                for (i = 0; i < stringListCount (identifiers); i++)
                    newTarget (stringListItem (identifiers, i));
                stringListClear (identifiers);
                in_rule = TRUE;
            }
        }
        else if (variable_possible && c == '=' &&
                 stringListCount (identifiers) == 1)
        {
            newMacro (stringListItem (identifiers, 0));
            skipLine ();
            in_rule = FALSE;
        }
        else if (variable_possible && isIdentifier (c))
        {
            vString *name = vStringNew ();
            readIdentifier (c, name);
            stringListAdd (identifiers, name);

            if (stringListCount (identifiers) == 1)
            {
                if (in_define && ! strcmp (vStringValue (name), "endef"))
                    in_define = FALSE;
                else if (in_define)
                    skipLine ();
                else if (! strcmp (vStringValue (name), "define"))
                {
                    in_define = TRUE;
                    c = skipToNonWhite (nextChar ());
                    vStringClear (name);
                    /* all remaining characters on the line are the name -- even spaces */
                    while (c != EOF && c != '\n')
                    {
                        vStringPut (name, c);
                        c = nextChar ();
                    }
                    if (c == '\n')
                        ungetcToInputFile (c);
                    vStringTerminate (name);
                    vStringStripTrailing (name);
                    newMacro (name);
                }
                else if (! strcmp (vStringValue (name), "export"))
                    stringListClear (identifiers);
                else if (! strcmp (vStringValue (name), "include")
                         || ! strcmp (vStringValue (name), "sinclude")
                         || ! strcmp (vStringValue (name), "-include"))
                {
                    boolean optional = (vStringValue (name)[0] == 'i')? FALSE: TRUE;
                    while (1)
                    {
                        c = skipToNonWhite (nextChar ());
                        readIdentifier (c, name);
                        vStringStripTrailing (name);
                        if (isAcceptableAsInclude(name))
                            newInclude (name, optional);

                        /* non-space characters after readIdentifier() may
                         * be rejected by the function:
                         * e.g.
                         * include $*
                         *
                         * Here, remove such characters from input stream.
                         */
                        do
                            c = nextChar ();
                        while (c != EOF && c != '\n' && (!isspace (c)));
                        if (c == '\n')
                            ungetcToInputFile (c);

                        if (c == EOF || c == '\n')
                            break;
                    }
                }
            }
        }
        else
            variable_possible = FALSE;
    }
    stringListDelete (identifiers);
}
Esempio n. 24
0
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr)
{
	int c;
	int i;
	boolean newline_encountered = FALSE;

	/* if we've got a token held back, emit it */
	if (NextToken)
	{
		copyToken (token, NextToken, FALSE);
		deleteToken (NextToken);
		NextToken = NULL;
		return;
	}

	token->type			= TOKEN_UNDEFINED;
	token->keyword		= KEYWORD_NONE;
	vStringClear (token->string);

getNextChar:
	i = 0;
	do
	{
		c = getcFromInputFile ();
		if (include_newlines && (c == '\r' || c == '\n'))
			newline_encountered = TRUE;
		i++;
	}
	while (c == '\t' || c == ' ' || c == '\r' || c == '\n');

	token->lineNumber   = getInputLineNumber ();
	token->filePosition = getInputFilePosition ();

	if (repr)
	{
		if (i > 1)
			vStringPut (repr, ' ');
		vStringPut (repr, c);
	}

	switch (c)
	{
		case EOF: token->type = TOKEN_EOF;					break;
		case '(': token->type = TOKEN_OPEN_PAREN;			break;
		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
		case ';': token->type = TOKEN_SEMICOLON;			break;
		case ',': token->type = TOKEN_COMMA;				break;
		case '.': token->type = TOKEN_PERIOD;				break;
		case ':': token->type = TOKEN_COLON;				break;
		case '{': token->type = TOKEN_OPEN_CURLY;			break;
		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
		case '=': token->type = TOKEN_EQUAL_SIGN;			break;
		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;

		case '+':
		case '-':
			{
				int d = getcFromInputFile ();
				if (d == c) /* ++ or -- */
					token->type = TOKEN_POSTFIX_OPERATOR;
				else
				{
					ungetcToInputFile (d);
					token->type = TOKEN_BINARY_OPERATOR;
				}
				break;
			}

		case '*':
		case '%':
		case '?':
		case '>':
		case '<':
		case '^':
		case '|':
		case '&':
			token->type = TOKEN_BINARY_OPERATOR;
			break;

		case '\'':
		case '"':
				  token->type = TOKEN_STRING;
				  parseString (token->string, c);
				  token->lineNumber = getInputLineNumber ();
				  token->filePosition = getInputFilePosition ();
				  if (repr)
				  {
					  vStringCat (repr, token->string);
					  vStringPut (repr, c);
				  }
				  break;

		case '`':
				  token->type = TOKEN_TEMPLATE_STRING;
				  parseTemplateString (token->string);
				  token->lineNumber = getInputLineNumber ();
				  token->filePosition = getInputFilePosition ();
				  if (repr)
				  {
					  vStringCat (repr, token->string);
					  vStringPut (repr, c);
				  }
				  break;

		case '\\':
				  c = getcFromInputFile ();
				  if (c != '\\'  && c != '"'  &&  !isspace (c))
					  ungetcToInputFile (c);
				  token->type = TOKEN_CHARACTER;
				  token->lineNumber = getInputLineNumber ();
				  token->filePosition = getInputFilePosition ();
				  break;

		case '/':
				  {
					  int d = getcFromInputFile ();
					  if ( (d != '*') &&		/* is this the start of a comment? */
							  (d != '/') )		/* is a one line comment? */
					  {
						  ungetcToInputFile (d);
						  switch (LastTokenType)
						  {
							  case TOKEN_CHARACTER:
							  case TOKEN_IDENTIFIER:
							  case TOKEN_STRING:
							  case TOKEN_TEMPLATE_STRING:
							  case TOKEN_CLOSE_CURLY:
							  case TOKEN_CLOSE_PAREN:
							  case TOKEN_CLOSE_SQUARE:
								  token->type = TOKEN_BINARY_OPERATOR;
								  break;

							  default:
								  token->type = TOKEN_REGEXP;
								  parseRegExp ();
								  token->lineNumber = getInputLineNumber ();
								  token->filePosition = getInputFilePosition ();
								  break;
						  }
					  }
					  else
					  {
						  if (repr) /* remove the / we added */
							  repr->buffer[--repr->length] = 0;
						  if (d == '*')
						  {
							  do
							  {
								  skipToCharacterInInputFile ('*');
								  c = getcFromInputFile ();
								  if (c == '/')
									  break;
								  else
									  ungetcToInputFile (c);
							  } while (c != EOF && c != '\0');
							  goto getNextChar;
						  }
						  else if (d == '/')	/* is this the start of a comment?  */
						  {
							  skipToCharacterInInputFile ('\n');
							  /* if we care about newlines, put it back so it is seen */
							  if (include_newlines)
								  ungetcToInputFile ('\n');
							  goto getNextChar;
						  }
					  }
					  break;
				  }

		case '#':
				  /* skip shebang in case of e.g. Node.js scripts */
				  if (token->lineNumber > 1)
					  token->type = TOKEN_UNDEFINED;
				  else if ((c = getcFromInputFile ()) != '!')
				  {
					  ungetcToInputFile (c);
					  token->type = TOKEN_UNDEFINED;
				  }
				  else
				  {
					  skipToCharacterInInputFile ('\n');
					  goto getNextChar;
				  }
				  break;

		default:
				  if (! isIdentChar (c))
					  token->type = TOKEN_UNDEFINED;
				  else
				  {
					  parseIdentifier (token->string, c);
					  token->lineNumber = getInputLineNumber ();
					  token->filePosition = getInputFilePosition ();
					  token->keyword = analyzeToken (token->string, Lang_js);
					  if (isKeyword (token, KEYWORD_NONE))
						  token->type = TOKEN_IDENTIFIER;
					  else
						  token->type = TOKEN_KEYWORD;
					  if (repr && vStringLength (token->string) > 1)
						  vStringCatS (repr, vStringValue (token->string) + 1);
				  }
				  break;
	}

	if (include_newlines && newline_encountered)
	{
		/* This isn't strictly correct per the standard, but following the
		 * real rules means understanding all statements, and that's not
		 * what the parser currently does.  What we do here is a guess, by
		 * avoiding inserting semicolons that would make the statement on
		 * the left or right obviously invalid.  Hopefully this should not
		 * have false negatives (e.g. should not miss insertion of a semicolon)
		 * but might have false positives (e.g. it will wrongfully emit a
		 * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
		 * This should however be mostly harmless as we only deal with
		 * newlines in specific situations where we know a false positive
		 * wouldn't hurt too bad. */

		/* these already end a statement, so no need to duplicate it */
		#define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON    || \
		                              (t) == TOKEN_EOF          || \
		                              (t) == TOKEN_COMMA        || \
		                              (t) == TOKEN_CLOSE_CURLY  || \
		                              (t) == TOKEN_OPEN_CURLY)
		/* these cannot be the start or end of a statement */
		#define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN      || \
		                               (t) == TOKEN_COLON           || \
		                               (t) == TOKEN_PERIOD          || \
		                               (t) == TOKEN_BINARY_OPERATOR)

		if (! IS_STMT_SEPARATOR(LastTokenType) &&
		    ! IS_STMT_SEPARATOR(token->type) &&
		    ! IS_BINARY_OPERATOR(LastTokenType) &&
		    ! IS_BINARY_OPERATOR(token->type) &&
		    /* these cannot be followed by a semicolon */
		    ! (LastTokenType == TOKEN_OPEN_PAREN ||
		       LastTokenType == TOKEN_OPEN_SQUARE))
		{
			/* hold the token... */
			Assert (NextToken == NULL);
			NextToken = newToken ();
			copyToken (NextToken, token, FALSE);

			/* ...and emit a semicolon instead */
			token->type		= TOKEN_SEMICOLON;
			token->keyword	= KEYWORD_NONE;
			vStringClear (token->string);
			if (repr)
				vStringPut (token->string, '\n');
		}

		#undef IS_STMT_SEPARATOR
		#undef IS_BINARY_OPERATOR
	}

	LastTokenType = token->type;
}
Esempio n. 25
0
static void readToken (tokenInfo *const token)
{
	int c;

	token->type		= TOKEN_UNDEFINED;
	token->keyword	= KEYWORD_NONE;
	vStringClear (token->string);

getNextChar:

	if (! InPhp)
	{
		c = findPhpStart ();
		if (c != EOF)
			InPhp = TRUE;
	}
	else
		c = getcFromInputFile ();

	c = skipWhitespaces (c);

	token->lineNumber   = getInputLineNumber ();
	token->filePosition = getInputFilePosition ();

	switch (c)
	{
		case EOF: token->type = TOKEN_EOF;					break;
		case '(': token->type = TOKEN_OPEN_PAREN;			break;
		case ')': token->type = TOKEN_CLOSE_PAREN;			break;
		case ';': token->type = TOKEN_SEMICOLON;			break;
		case ',': token->type = TOKEN_COMMA;				break;
		case '.': token->type = TOKEN_PERIOD;				break;
		case ':': token->type = TOKEN_COLON;				break;
		case '{': token->type = TOKEN_OPEN_CURLY;			break;
		case '}': token->type = TOKEN_CLOSE_CURLY;			break;
		case '[': token->type = TOKEN_OPEN_SQUARE;			break;
		case ']': token->type = TOKEN_CLOSE_SQUARE;			break;
		case '&': token->type = TOKEN_AMPERSAND;			break;
		case '\\': token->type = TOKEN_BACKSLASH;			break;

		case '=':
		{
			int d = getcFromInputFile ();
			if (d == '=' || d == '>')
				token->type = TOKEN_OPERATOR;
			else
			{
				ungetcToInputFile (d);
				token->type = TOKEN_EQUAL_SIGN;
			}
			break;
		}

		case '\'':
		case '"':
			token->type = TOKEN_STRING;
			parseString (token->string, c);
			token->lineNumber = getInputLineNumber ();
			token->filePosition = getInputFilePosition ();
			break;

		case '<':
		{
			int d = getcFromInputFile ();
			if (d == '/')
			{
				/* </script[:white:]*> */
				if (tolower ((d = getcFromInputFile ())) == 's' &&
					tolower ((d = getcFromInputFile ())) == 'c' &&
					tolower ((d = getcFromInputFile ())) == 'r' &&
					tolower ((d = getcFromInputFile ())) == 'i' &&
					tolower ((d = getcFromInputFile ())) == 'p' &&
					tolower ((d = getcFromInputFile ())) == 't' &&
					(d = skipWhitespaces (getcFromInputFile ())) == '>')
				{
					InPhp = FALSE;
					goto getNextChar;
				}
				else
				{
					ungetcToInputFile (d);
					token->type = TOKEN_UNDEFINED;
				}
			}
			else if (d == '<' && (d = getcFromInputFile ()) == '<')
			{
				token->type = TOKEN_STRING;
				parseHeredoc (token->string);
			}
			else
			{
				ungetcToInputFile (d);
				token->type = TOKEN_UNDEFINED;
			}
			break;
		}

		case '#': /* comment */
			skipSingleComment ();
			goto getNextChar;
			break;

		case '+':
		case '-':
		case '*':
		case '%':
		{
			int d = getcFromInputFile ();
			if (d != '=' && ! (c == '-' && d == '>'))
				ungetcToInputFile (d);
			token->type = TOKEN_OPERATOR;
			break;
		}

		case '/': /* division or comment start */
		{
			int d = getcFromInputFile ();
			if (d == '/') /* single-line comment */
			{
				skipSingleComment ();
				goto getNextChar;
			}
			else if (d == '*')
			{
				do
				{
					c = skipToCharacter ('*');
					if (c != EOF)
					{
						c = getcFromInputFile ();
						if (c == '/')
							break;
						else
							ungetcToInputFile (c);
					}
				} while (c != EOF && c != '\0');
				goto getNextChar;
			}
			else
			{
				if (d != '=')
					ungetcToInputFile (d);
				token->type = TOKEN_OPERATOR;
			}
			break;
		}

		case '$': /* variable start */
		{
			int d = getcFromInputFile ();
			if (! isIdentChar (d))
			{
				ungetcToInputFile (d);
				token->type = TOKEN_UNDEFINED;
			}
			else
			{
				parseIdentifier (token->string, d);
				token->type = TOKEN_VARIABLE;
			}
			break;
		}

		case '?': /* maybe the end of the PHP chunk */
		{
			int d = getcFromInputFile ();
			if (d == '>')
			{
				InPhp = FALSE;
				goto getNextChar;
			}
			else
			{
				ungetcToInputFile (d);
				token->type = TOKEN_UNDEFINED;
			}
			break;
		}

		default:
			if (! isIdentChar (c))
				token->type = TOKEN_UNDEFINED;
			else
			{
				parseIdentifier (token->string, c);
				token->keyword = analyzeToken (token->string, getInputLanguage ());
				if (token->keyword == KEYWORD_NONE)
					token->type = TOKEN_IDENTIFIER;
				else
					token->type = TOKEN_KEYWORD;
			}
			break;
	}

	if (token->type == TOKEN_SEMICOLON ||
		token->type == TOKEN_OPEN_CURLY ||
		token->type == TOKEN_CLOSE_CURLY)
	{
		/* reset current statement details on statement end, and when entering
		 * a deeper scope.
		 * it is a bit ugly to do this in readToken(), but it makes everything
		 * a lot simpler. */
		CurrentStatement.access = ACCESS_UNDEFINED;
		CurrentStatement.impl = IMPL_UNDEFINED;
	}
}
Esempio n. 26
0
static void readTokenFull (tokenInfo *const token,
						   boolean includeStringRepr)
{
	int c;

	token->type = TOKEN_UNDEFINED;
	vStringClear (token->string);

	do
		c = getcFromInputFile ();
	while (c == '\t' || c == ' ' || c == '\r' || c == '\n');

	token->lineNumber   = getInputLineNumber ();
	token->filePosition = getInputFilePosition ();

	switch (c)
	{
		case EOF: token->type = TOKEN_EOF;			break;
		case '[': token->type = TOKEN_OPEN_SQUARE;	break;
		case ']': token->type = TOKEN_CLOSE_SQUARE;	break;
		case '{': token->type = TOKEN_OPEN_CURLY;	break;
		case '}': token->type = TOKEN_CLOSE_CURLY;	break;
		case ':': token->type = TOKEN_COLON;		break;
		case ',': token->type = TOKEN_COMMA;		break;

		case '"':
		{
			boolean escaped = FALSE;
			token->type = TOKEN_STRING;
			while (TRUE)
			{
				c = getcFromInputFile ();
				/* we don't handle unicode escapes but they are safe */
				if (escaped)
					escaped = FALSE;
				else if (c == '\\')
					escaped = TRUE;
				else if (c >= 0x00 && c <= 0x1F)
					break; /* break on invalid, unescaped, control characters */
				else if (c == '"' || c == EOF)
					break;
				if (includeStringRepr)
					vStringPut (token->string, c);
			}
			vStringTerminate (token->string);
			break;
		}

		default:
			if (! isIdentChar (c))
				token->type = TOKEN_UNDEFINED;
			else
			{
				do
				{
					vStringPut (token->string, c);
					c = getcFromInputFile ();
				}
				while (c != EOF && isIdentChar (c));
				vStringTerminate (token->string);
				ungetcToInputFile (c);
				switch (lookupKeyword (vStringValue (token->string), Lang_json))
				{
					case KEYWORD_true:	token->type = TOKEN_TRUE;	break;
					case KEYWORD_false:	token->type = TOKEN_FALSE;	break;
					case KEYWORD_null:	token->type = TOKEN_NULL;	break;
					default:			token->type = TOKEN_NUMBER;	break;
				}
			}
			break;
	}
}
Esempio n. 27
0
File: html.c Progetto: qzhuyan/ctags
static void readToken (tokenInfo *const token, bool skipComments)
{
	int c;

	vStringClear (token->string);

getNextChar:

	c = getcFromInputFile ();
	while (isspace (c))
		c = getcFromInputFile ();

	switch (c)
	{
		case EOF:
			token->type = TOKEN_EOF;
			break;

		case '<':
		{
			int d = getcFromInputFile ();

			if (d == '!')
			{
				d = getcFromInputFile ();
				if (d == '-')
				{
					d = getcFromInputFile ();
					if (d == '-')
					{
						int e = ' ';
						int f = ' ';
						do
						{
							d = e;
							e = f;
							f = getcFromInputFile ();
						}
						while (f != EOF && ! (d == '-' && e == '-' && f == '>'));

						if (skipComments)
							goto getNextChar;
						else
						{
							token->type = TOKEN_COMMENT;
							break;
						}
					}
				}
				ungetcToInputFile (d);
				token->type = TOKEN_OTHER;
			}
			else if (d == '?')
				token->type = TOKEN_OTHER;
			else if (d == '/')
				token->type = TOKEN_TAG_START2;
			else
			{
				ungetcToInputFile (d);
				token->type = TOKEN_TAG_START;
			}
			break;
		}
		case '/':
		{
			int d = getcFromInputFile ();
			if (d == '>')
				token->type = TOKEN_TAG_END2;
			else
			{
				ungetcToInputFile (d);
				token->type = TOKEN_OTHER;
			}
			break;
		}
		case '>':
			token->type = TOKEN_TAG_END;
			break;

		case '=':
			token->type = TOKEN_EQUAL;
			break;

		case '"':
		case '\'':
		{
			const int delimiter = c;
			c = getcFromInputFile ();
			while (c != EOF && c != delimiter)
			{
				vStringPut (token->string, c);
				c = getcFromInputFile ();
			}
			token->type = TOKEN_STRING;
			break;
		}

		default:
		{
			do
			{
				vStringPut (token->string, tolower (c));
				c = getcFromInputFile ();
			}
			while (!isspace (c) && c != '<' && c != '>' && c != '/' &&
				   c != '=' && c != '\'' && c != '"' && c != EOF);
			if (c != EOF)
				ungetcToInputFile (c);
			token->type = TOKEN_NAME;
			break;
		}
	}
}
Esempio n. 28
0
/* reads an HereDoc or a NowDoc (the part after the <<<).
 * 	<<<[ \t]*(ID|'ID'|"ID")
 * 	...
 * 	ID;?
 *
 * note that:
 *  1) starting ID must be immediately followed by a newline;
 *  2) closing ID is the same as opening one;
 *  3) closing ID must be immediately followed by a newline or a semicolon
 *     then a newline.
 *
 * Example of a *single* valid heredoc:
 * 	<<< FOO
 * 	something
 * 	something else
 * 	FOO this is not an end
 * 	FOO; this isn't either
 * 	FOO; # neither this is
 * 	FOO;
 * 	# previous line was the end, but the semicolon wasn't required
 */
static void parseHeredoc (vString *const string)
{
	int c;
	unsigned int len;
	char delimiter[64]; /* arbitrary limit, but more is crazy anyway */
	int quote = 0;

	do
	{
		c = getcFromInputFile ();
	}
	while (c == ' ' || c == '\t');

	if (c == '\'' || c == '"')
	{
		quote = c;
		c = getcFromInputFile ();
	}
	for (len = 0; len < ARRAY_SIZE (delimiter) - 1; len++)
	{
		if (! isIdentChar (c))
			break;
		delimiter[len] = (char) c;
		c = getcFromInputFile ();
	}
	delimiter[len] = 0;

	if (len == 0) /* no delimiter, give up */
		goto error;
	if (quote)
	{
		if (c != quote) /* no closing quote for quoted identifier, give up */
			goto error;
		c = getcFromInputFile ();
	}
	if (c != '\r' && c != '\n') /* missing newline, give up */
		goto error;

	do
	{
		c = getcFromInputFile ();

		if (c != '\r' && c != '\n')
			vStringPut (string, (char) c);
		else
		{
			/* new line, check for a delimiter right after */
			int nl = c;
			int extra = EOF;

			c = getcFromInputFile ();
			for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++)
				c = getcFromInputFile ();

			if (delimiter[len] != 0)
				ungetcToInputFile (c);
			else
			{
				/* line start matched the delimiter, now check whether there
				 * is anything after it */
				if (c == '\r' || c == '\n')
				{
					ungetcToInputFile (c);
					break;
				}
				else if (c == ';')
				{
					int d = getcFromInputFile ();
					if (d == '\r' || d == '\n')
					{
						/* put back the semicolon since it's not part of the
						 * string.  we can't put back the newline, but it's a
						 * whitespace character nobody cares about it anyway */
						ungetcToInputFile (';');
						break;
					}
					else
					{
						/* put semicolon in the string and continue */
						extra = ';';
						ungetcToInputFile (d);
					}
				}
			}
			/* if we are here it wasn't a delimiter, so put everything in the
			 * string */
			vStringPut (string, (char) nl);
			vStringNCatS (string, delimiter, len);
			if (extra != EOF)
				vStringPut (string, (char) extra);
		}
	}
	while (c != EOF);

	vStringTerminate (string);

	return;

error:
	ungetcToInputFile (c);
}
Esempio n. 29
0
static void readToken (tokenInfo *const token)
{
	int c;

	token->type    = TOKEN_UNDEFINED;
	token->keyword = KEYWORD_NONE;
	vStringClear (token->string);

getNextChar:

	do
		c = getcFromInputFile ();
	while (c == '\t'  ||  c == ' '  ||  c == '\n');

	switch (c)
	{
		case EOF:  token->type = TOKEN_EOF;                break;
		case ';':  token->type = TOKEN_SEMICOLON;          break;
		case '!':  token->type = TOKEN_BANG;               break;
		case '}':  token->type = TOKEN_CLOSE_BRACE;        break;
		case ']':  token->type = TOKEN_CLOSE_BRACKET;      break;
		case ')':  token->type = TOKEN_CLOSE_PAREN;        break;
		case ',':  token->type = TOKEN_COMMA;              break;
		case '$':  token->type = TOKEN_DOLLAR;             break;
		case '.':  token->type = TOKEN_DOT;                break;
		case '{':  token->type = TOKEN_OPEN_BRACE;         break;
		case '[':  token->type = TOKEN_OPEN_BRACKET;       break;
		case '(':  token->type = TOKEN_OPEN_PAREN;         break;
		case '~':  token->type = TOKEN_TILDE;              break;


		case '+':
		case '*':
		case '^':
		case '=':  token->type = TOKEN_OPERATOR;           break;

		case '-':
			c = getcFromInputFile ();
			if (c == '>')
				token->type = TOKEN_CONSTRAINT;
			else if (c == '-')  /* is this the start of a comment? */
			{
				skipToCharacter ('\n');
				goto getNextChar;
			}
			else
			{
				if (!isspace (c))
					ungetcToInputFile (c);
				token->type = TOKEN_OPERATOR;
			}
			break;

		case '?':
		case ':':
		{
			int c2 = getcFromInputFile ();
			if (c2 == '=')
				token->type = TOKEN_OPERATOR;
			else
			{
				if (!isspace (c2))
					ungetcToInputFile (c2);
				if (c == ':')
					token->type = TOKEN_COLON;
				else
					token->type = TOKEN_QUESTION;
			}
			break;
		}

		case '<':
			c = getcFromInputFile ();
			if (c != '='  &&  c != '>'  &&  !isspace (c))
				ungetcToInputFile (c);
			token->type = TOKEN_OPERATOR;
			break;

		case '>':
			c = getcFromInputFile ();
			if (c != '='  &&  c != '>'  &&  !isspace (c))
				ungetcToInputFile (c);
			token->type = TOKEN_OPERATOR;
			break;

		case '/':
			c = getcFromInputFile ();
			if (c != '/'  &&  c != '='  &&  !isspace (c))
				ungetcToInputFile (c);
			token->type = TOKEN_OPERATOR;
			break;

		case '\\':
			c = getcFromInputFile ();
			if (c != '\\'  &&  !isspace (c))
				ungetcToInputFile (c);
			token->type = TOKEN_OPERATOR;
			break;

		case '"':
			token->type = TOKEN_STRING;
			parseString (token->string);
			break;

		case '\'':
			token->type = TOKEN_CHARACTER;
			parseCharacter ();
			break;

		default:
			if (isalpha (c))
			{
				parseIdentifier (token->string, c);
				token->keyword = analyzeToken (token->string, Lang_eiffel);
				if (isKeyword (token, KEYWORD_NONE))
					token->type = TOKEN_IDENTIFIER;
				else
					token->type = TOKEN_KEYWORD;
			}
			else if (isdigit (c))
			{
				vString* numeric = parseNumeric (c);
				vStringCat (token->string, numeric);
				vStringDelete (numeric);
				token->type = TOKEN_NUMERIC;
			}
			else if (isFreeOperatorChar (c))
			{
				parseFreeOperator (token->string, c);
				token->type = TOKEN_OPERATOR;
			}
			else
				token->type = TOKEN_UNDEFINED;
			break;
	}
}
Esempio n. 30
0
static inline void ungetcAndCollect (int c)
{
	ungetcToInputFile (c);
	if (collectingSignature)
		vStringChop (signature);
}