Example #1
0
static void eatComment (lexingState * st)
{
	boolean unfinished = TRUE;
	boolean lastIsStar = FALSE;
	const unsigned char *c = st->cp + 2;

	while (unfinished)
	{
		/* we've reached the end of the line..
		 * so we have to reload a line... */
		if (c == NULL || *c == '\0')
		{
			st->cp = fileReadLine ();
			/* WOOPS... no more input...
			 * we return, next lexing read
			 * will be null and ok */
			if (st->cp == NULL)
				return;
			c = st->cp;
		}
		/* we've reached the end of the comment */
		else if (*c == ')' && lastIsStar)
			unfinished = FALSE;
		/* here we deal with imbricated comment, which
		 * are allowed in OCaml */
		else if (c[0] == '(' && c[1] == '*')
		{
			st->cp = c;
			eatComment (st);

			c = st->cp;
			if (c == NULL)
			    return;

			lastIsStar = FALSE;
            c++;
		}
		/* OCaml has a rule which says :
		 *
		 *   "Comments do not occur inside string or character literals.
		 *    Nested comments are handled correctly."
		 *
		 * So if we encounter a string beginning, we must parse it to
		 * get a good comment nesting (bug ID: 3117537)
		 */
        else if (*c == '"')
        {
            st->cp = c;
            eatString (st);
            c = st->cp;
        }
		else
        {
			lastIsStar = '*' == *c;
            c++;
        }
	}

	st->cp = c;
}
Example #2
0
		static Value eatArray(std::istream& stream)
		{
			Value obj(Type::arrayValue);
			while (!stream.eof())
			{
				ltrim(stream);
				if (stream.peek() == ']')
				{
					stream.get();
					break;
				}
				obj.push_back(eatValue(stream));
				ltrim(stream);
				char token = stream.get();
				if (token == '/')
				{
					eatComment(stream);
					token = stream.get();
				}
				if (token == ']')
					break;
				assert(token == ',');
			}
			return obj;
		};
Example #3
0
/* findNext:
 * Return next XML unit. This is either <..>, an HTML 
 * comment <!-- ... -->, or characters up to next <.
 */
static char *findNext(char *s, agxbuf* xb)
{
    char* t = s + 1;
    char c;
    int rc;

    if (*s == '<') {
	if ((*t == '!') && !strncmp(t + 1, "--", 2))
	    t = eatComment(t + 3);
	else
	    while (*t && (*t != '>'))
		t++;
	if (*t != '>') {
	    agerr(AGWARN, "Label closed before end of HTML element\n");
	    state.warn = 1;
	} else
	    t++;
    } else {
	rc = agxbputc(xb, *s);
	while ((c = *t) && (c != '<')) {
	    if ((c == '&') && (*(t+1) != '#')) {
		t = scanEntity(t + 1, xb);
	    }
	    else {
		rc = agxbputc(xb, c);
		t++;
	    }
	}
    }
    return t;
}
Example #4
0
		static Value eatObject(std::istream& stream)
		{
			Value obj(Type::objectValue);
			while (!stream.eof())
			{
				ltrim(stream);
				char token = stream.get();
				if (token == '}')
					break; //empty object
				if (token == '/')
				{
					eatComment(stream);
					token = stream.get();
				}

				assert(token == '"');
				Value key = eatString(stream);
				ltrim(stream);
				token = stream.get();
				assert(token == ':');
				ltrim(stream);
				Value val = eatValue(stream);
				obj[key.asString()] = val;
				ltrim(stream);

				token = stream.get();
				if (token == '}')
					break;
				if (token != ',')
					throw "arg";
				assert(token == ',');
			}
			return obj;
		};
Example #5
0
//
// getToken() -  this function gets a whitespace delineated token out
// of the input stream and returns it in the provided buffer 'buf'
//
// 'buf' is null terminated
//
// it returns the number of characters in 'buf', or 0 if there is
// a failure
//
// failures include:
//	hitting end-of-file before a non-whitespace character
//	hitting a comment and never getting out of it
//	running out of space provided in 'buf'
//
static int
getToken( FILE*	  stream,
	  char*	  buf,
	  int	  buflen )
{
   // remove white space
   // read non-white space chars until a white space
   int  ch	= fgetc( stream );
   int  i	= 0;
   
   // read until we hit a non whitespace char
   for ( ;; )
   {
      // if white space, keep reading
      if ( isspace( ch ) )
	 ch	= fgetc( stream );

      // did we run out of file?
      else if ( ch == EOF )
	 return 0;

      // is this the beginning of a comment?
      else if ( ch == '#' )
      {
	 // read till end of comment
	 if ( !eatComment( stream ) )
	    return 0;
	 ch = fgetc( stream );
      }
      // we got a non-white space, non-comment character
      else
	 break;
   }

   // okay, now we are at the start of the token
   while( ch != EOF && !isspace( ch ) )
   {
      buf[i++]	= (char)ch;
      if ( i >= buflen-1 )
	 // out of buffer
	 return 0;
      ch	= fgetc( stream );
   }

   // null terminate the string
   buf[i]	= 0;
   
   return i;
}
std::string Toker::getTok()
{
  std::string tok = "";
  stripWhiteSpace();
  if(isSingleCharTok(nextChar))
  {
    getChar();
    tok.append(1,currChar);
    return tok;
  }
  do
  {
    if(isFileEnd())
      return tok;

    getChar();
    if(isBeginComment())
    {
      if(tok.length() > 0)
      {
        this->putback(currChar);
        return tok;
      }
      tok = eatComment();
      if(doReturnComments)
        return tok;
      else
      {
        tok = "";
        continue;
      }
    }
    if(isBeginQuote())
    {
      if(tok.length() > 0)
      {
        this->putback(currChar);
        return tok;
      }
      tok = eatQuote();
      return tok;
    }
    std::locale loc;
    if(!isspace(currChar, loc))
      tok.append(1,currChar);
  }  while(!isTokEnd() || tok.length() == 0);
  return tok;
}
Example #7
0
static int
Parse (FILE * fp, int (*sfunc) (char *), int (*pfunc) (char * option, char * value, int num_flags, char** flags))
{
    int c;
    c = eatWhitespace (fp);

    while ((c != EOF) && (c > 0))
    {

        switch (c)
        {
            case '\n':      /* blank line */
                {
                    c = eatWhitespace (fp);
                    break;
                }

            case ';':       /* comment line */
            case '#':
                {
                    c = eatComment (fp);
                    break;
                }

            case '[':       /* section header */
                {
                    if (Section (fp, sfunc) < 0)
                    {
                        return (-1);
                    }
                    c = eatWhitespace (fp);
                    break;
                }
            default:        /* parameter line */
                {
                    if (Parameter (fp, pfunc, c) < 0)
                        return (-1);
                    c = eatWhitespace (fp);
                }
        }
    }

    return 0;
}
Example #8
0
		static Value eatValue(std::istream& stream)
		{
			ltrim(stream);
			char token = stream.get();
			if (token == '{')
				return eatObject(stream);
			if (token == '[')
				return eatArray(stream);
			if ((token >= '0' && token <= '9') || token == '.' || token == '-')
				return eatNumeric(stream, token);
			if (token == '"')
				return eatString(stream);
			if (token == 't' || token == 'f')
				return eatBool(stream);
			if (token == 'n')
				return eatNull(stream);
			if (token == '/')
			{
				eatComment(stream);
				return eatValue(stream);
			}
			throw "Unable to parse json";
		};
Example #9
0
/* The lexer is in charge of reading the file.
 * Some of sub-lexer (like eatComment) also read file.
 * lexing is finished when the lexer return Tok_EOF */
static objcKeyword lex (lexingState * st)
{
	int retType;

	/* handling data input here */
	while (st->cp == NULL || st->cp[0] == '\0')
	{
		st->cp = readLineFromInputFile ();
		if (st->cp == NULL)
			return Tok_EOF;

		return Tok_EOL;
	}

	if (isAlpha (*st->cp) || (*st->cp == '_'))
	{
		readIdentifier (st);
		retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC);

		if (retType == -1)	/* If it's not a keyword */
		{
			return ObjcIDENTIFIER;
		}
		else
		{
			return retType;
		}
	}
	else if (*st->cp == '@')
	{
		readIdentifierObjcDirective (st);
		retType = lookupKeyword (vStringValue (st->name), Lang_ObjectiveC);

		if (retType == -1)	/* If it's not a keyword */
		{
			return Tok_any;
		}
		else
		{
			return retType;
		}
	}
	else if (isSpace (*st->cp))
	{
		eatWhiteSpace (st);
		return lex (st);
	}
	else
		switch (*st->cp)
		{
		case '(':
			st->cp++;
			return Tok_PARL;

		case '\\':
			st->cp++;
			return Tok_Backslash;

		case '#':
			st->cp++;
			return Tok_Sharp;

		case '/':
			if (st->cp[1] == '*')	/* ergl, a comment */
			{
				eatComment (st);
				return lex (st);
			}
			else if (st->cp[1] == '/')
			{
				st->cp = NULL;
				return lex (st);
			}
			else
			{
				st->cp++;
				return Tok_any;
			}
			break;

		case ')':
			st->cp++;
			return Tok_PARR;
		case '{':
			st->cp++;
			return Tok_CurlL;
		case '}':
			st->cp++;
			return Tok_CurlR;
		case '[':
			st->cp++;
			return Tok_SQUAREL;
		case ']':
			st->cp++;
			return Tok_SQUARER;
		case ',':
			st->cp++;
			return Tok_COMA;
		case ';':
			st->cp++;
			return Tok_semi;
		case ':':
			st->cp++;
			return Tok_dpoint;
		case '"':
			eatString (st);
			return Tok_any;
		case '+':
			st->cp++;
			return Tok_PLUS;
		case '-':
			st->cp++;
			return Tok_MINUS;
		case '*':
			st->cp++;
			return Tok_Asterisk;
		case '<':
			st->cp++;
			return Tok_ANGLEL;
		case '>':
			st->cp++;
			return Tok_ANGLER;

		default:
			st->cp++;
			break;
		}

	/* default return if nothing is recognized,
	 * shouldn't happen, but at least, it will
	 * be handled without destroying the parsing. */
	return Tok_any;
}
Example #10
0
Token Lexer::getNextToken()
{
    // Eat whitespace    
    tokenString = "";
    while (isspace(currentChar) && currentChar != '\n' && currentChar != EOF)
    {
        getChar();
    }

    if (currentChar == '\n')
    {
        line++;
        column = 0;
        getChar();
        return NEW_LINE;
    }
    
    if (currentChar == '\'')
    {
        eatComment();
        return NEW_LINE;
    }    

    // Match operators
    if (currentChar == '+')
    {
        getChar();
        return PLUS;
    }
    if (currentChar == '-')
    {
        getChar();
        return MINUS;
    }
    if (currentChar == '*')
    {
        getChar();
        return MULTIPLY;
    }
    if (currentChar == '/')
    {
        getChar();
        return DIVIDE;
    }
    if (currentChar == '(')
    {
        getChar();
        return BRACKET_OPEN;
    }
    if (currentChar == ')')
    {
        getChar();
        return BRACKET_CLOSE;
    }
    if (currentChar == '=')
    {
        getChar();
        return EQUALS;
    }
    if (currentChar == ',')
    {
        getChar();
        return COMMA;
    }
    if (currentChar == '%')
    {
        getChar();
        return PERCENT;
    }
    if (currentChar == '&')
    {
        getChar();
        return AMPERSAND;
    }
    if (currentChar == '!')
    {
        getChar();
        return EXCLAMATION;
    }
    if (currentChar == '#')
    {
        getChar();
        return HASH;
    }
    if (currentChar == '$')
    {
        getChar();
        return DOLLAR;
    }
    if (currentChar == EOF)
    {
        getChar();
        return END_OF_FILE;
    }

    // Match not equals and less than
    if (currentChar == '<')
    {
        getChar();
        if (currentChar == '>')
        {
            getChar();
            return NOT_EQUALS;
        }
        else if (currentChar == '=')
        {
            getChar();
            return LESS_THAN_OR_EQUALS;
        }
        else
        {
            return LESS_THAN;
        }
    }

    // Match greater thans
    if (currentChar == '>')
    {
        getChar();
        if (currentChar == '=')
        {
            getChar();
            return GREATER_THAN_OR_EQUALS;
        }
        else
        {
            return GREATER_THAN;
        }
    }

    // Match string literals
    if (currentChar == '"')
    {
        stringValue = "";

        while (true)
        {
            getChar();
            if (currentChar == '"')
            {
                getChar();
                if (currentChar != '"') break;
            }
            stringValue += currentChar;
        }

        return STRING;
    }

    // Match numbers
    if (isdigit(currentChar))
    {
        bool dotSeen = false;
        std::string numberString;

        do
        {
            if (currentChar == '.')
            {
                if (dotSeen)
                    break;
                else
                    dotSeen = true;
            }
            numberString += currentChar;
            getChar();
        }
        while (isdigit(currentChar) || currentChar == '.');

        tokenString = numberString;

        if (dotSeen)
        {
            singleValue = strtod(numberString.c_str(), NULL);
            return SINGLE;
        }
        else
        {
            integerValue = strtol(numberString.c_str(), NULL, 10);
            return INTEGER;
        }
    }

    // Match identifiers and keywords
    if (isalpha(currentChar))
    {
        std::string identString;
        do
        {
            identString += tolower(currentChar);
            getChar();
        } while (isalnum(currentChar) || currentChar == '_');

        identifierValue = identString;
        tokenString = identString;

        if (identString == "dim") return DIM;
        else if (identString == "as") return AS;
        else if (identString == "print") return PRINT;
        else if (identString == "if") return IF;
        else if (identString == "then") return THEN;
        else if (identString == "else") return ELSE;
        else if (identString == "elseif") return ELSE_IF;
        else if (identString == "end") return END;
        else if (identString == "exit") return EXIT;
        else if (identString == "select") return SELECT;
        else if (identString == "case") return CASE;
        else if (identString == "for") return FOR;
        else if (identString == "step") return STEP;
        else if (identString == "next") return NEXT;
        else if (identString == "continue") return CONTINUE;
        else if (identString == "to") return TO;
        else if (identString == "while") return WHILE;
        else if (identString == "wend") return WEND;
        else if (identString == "do") return DO;
        else if (identString == "loop") return LOOP;
        else if (identString == "until") return UNTIL;
        else if (identString == "function") return FUNCTION;
        else if (identString == "sub") return SUB;
        else if (identString == "declare") return DECLARE;
        else if (identString == "return") return RETURN;
        else if (identString == "rem")
        {
            eatComment();
            return NEW_LINE;
        }
        else return IDENTIFIER;
    }

    // Return an unknown token for whatever else is there
    tokenString = currentChar;
    return UNKNOWN;
}
Example #11
0
/* The lexer is in charge of reading the file.
 * Some of sub-lexer (like eatComment) also read file.
 * lexing is finished when the lexer return Tok_EOF */
static ocamlKeyword lex (lexingState * st)
{
	int retType;
	/* handling data input here */
	while (st->cp == NULL || st->cp[0] == '\0')
	{
		st->cp = fileReadLine ();
		if (st->cp == NULL)
			return Tok_EOF;
	}

	if (isAlpha (*st->cp))
	{
		readIdentifier (st);
		retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml);

		if (retType == -1)	/* If it's not a keyword */
		{
			return OcaIDENTIFIER;
		}
		else
		{
			return retType;
		}
	}
	else if (isNum (*st->cp))
		return eatNumber (st);
	else if (isSpace (*st->cp))
	{
		eatWhiteSpace (st);
		return lex (st);
	}
	/* OCaml permit the definition of our own operators
	 * so here we check all the consecuting chars which
	 * are operators to discard them. */
	else if (isOperator[*st->cp])
		return eatOperator (st);
	else
		switch (*st->cp)
		{
		case '(':
			if (st->cp[1] == '*')	/* ergl, a comment */
			{
				eatComment (st);
				return lex (st);
			}
			else
			{
				st->cp++;
				return Tok_PARL;
			}

		case ')':
			st->cp++;
			return Tok_PARR;
		case '[':
			st->cp++;
			return Tok_BRL;
		case ']':
			st->cp++;
			return Tok_BRR;
		case '{':
			st->cp++;
			return Tok_CurlL;
		case '}':
			st->cp++;
			return Tok_CurlR;
		case '\'':
			st->cp++;
			return Tok_Prime;
		case ',':
			st->cp++;
			return Tok_comma;
		case '=':
			st->cp++;
			return Tok_EQ;
		case ';':
			st->cp++;
			return Tok_semi;
		case '"':
			eatString (st);
			return Tok_Val;
		case '_':
			st->cp++;
			return Tok_Val;
		case '#':
			st->cp++;
			return Tok_Sharp;
		case '\\':
			st->cp++;
			return Tok_Backslash;

		default:
			st->cp++;
			break;
		}

	/* default return if nothing is recognized,
	 * shouldn't happen, but at least, it will
	 * be handled without destroying the parsing. */
	return Tok_Val;
}