Example #1
0
Lexeme *evalInclude(Lexeme *tree, Lexeme *env) {
  Lexeme *eargs = evalExprList(tree,env);
  Lexeme *val = NULL;
  Lexeme *result = NULL;
  while(eargs != NULL && strcmp(eargs->type,NIL)) {
    val = car(eargs);
    if(strcmp(val->type,STRING) == 0) {
      FILE *fp = fopen(val->sval,"r");
      if(!fp) {
        fprintf(stderr,"Cannot open %s, invalid filename\n",val->sval);
        result = lexeme(NIL);
      } else {
        result = eval(parse(fp),env);
      }
    } else {
      fprintf(stderr,"Cannot open %s\n",displayLexeme(*val));
    }
    eargs = cdr(eargs);
  }
  return result;
}
Example #2
0
// primary: primitive
//        | idExpr
//        | parenExpr
//        | OB optExpressionList CB
//        | anonFunc
Lexeme *primary(Parser *p) {
  if(primitivePending(p)) {
    return primitive(p);
  } else if(idExprPending(p)) {
    return idExpr(p);
  } else if(parenExprPending(p)) {
    return parenExpr(p);
  } else if(uOpPending(p)) {
    Lexeme *a = uOp(p);
    a->right = primary(p);
    return a;
  } else if(check(p,OB)) {
    Lexeme *a = lexeme(ARRAY);
    match(p,OB);
    if(expressionListPending(p)) {
      a->right = expressionList(p);
    }
    match(p,CB);
    return a;
  } else {
    return anonFunc(p);
  }
}
Example #3
0
// expression: primary
//           | primary operator expression
//           | VAR ID optInit
Lexeme *expression(Parser *p) {
  Lexeme *a, *b, *c;
  if(primaryPending(p)) {
    a = primary(p);
    if(operatorPending(p)) {
      b = operator(p);
      c = expression(p);
      b->left = a;
      b->right = c;
      return b;
    }
    return a;
  } else {
    a = match(p,VAR);
    a->left = match(p,ID);
    a->right = NULL;
    if(initPending(p)) {
      a->right = init(p);
    } else {
      a->right = lexeme(NIL);
    }
    return a;
  }
}
Example #4
0
	void lexer::parse_string(line_of_code & output)
	{
		std::string string;
		char string_character = input[i];
		i++;
		std::size_t start = i;
		for(; i < end; i++)
		{
			char byte = input[i];
			switch(byte)
			{
				case '\\':
				{
					if(end - i < 2)
						lexer_error("Backslash at the end of the input");

					i++;

					char next_byte = input[i];
					
					switch(next_byte)
					{
						case 'r':
							string.push_back('\r');
							continue;

						case 'n':
							string.push_back('\n');
							continue;
					}

					if(ail::is_hex_digit(next_byte))
					{
						if(end - i < 2)
							lexer_error("Incomplete hex number escape sequence at the end of the input");

						if(!ail::is_hex_digit(input[i + 1]))
							lexer_error("Invalid hex number escape sequence");

						std::string hex_string = input.substr(i, 2);
						i++;
						char new_byte = ail::string_to_number<char>(hex_string, std::ios_base::hex);
						string.push_back(new_byte);
					}
					else
						lexer_error("Invalid escape sequence: " + ail::hex_string_8(static_cast<uchar>(next_byte)));
					break;
				}

				case '\n':
					lexer_error("Detected a newline in a string");
					break;

				case '\'':
				case '"':
					if(byte == string_character)
					{
						output.lexemes.push_back(lexeme(string));
						i++;
						return;
					}
					string.push_back(byte);
					break;

				default:
					string.push_back(byte);
					break;
			}
		}
		lexer_error("String lacks terminator");
	}
Example #5
0
bool assembly_lexer(std::string const & file_name, unsigned int & line, std::string::const_iterator & begin, std::string::const_iterator const & end, lexeme & output)
{
    std::string input;

    for(; begin != end; ++begin)
    {
        char current_char = *begin;
        char type = type_lookup_table[current_char];
        switch(type)
        {
        case char_type_illegal:
        {
            lexer_exception(file_name, line, "Illegal character");
        }

        case char_type_name:
        {
            std::string::const_iterator name_begin = begin;
            for(++begin;
                    (begin != end)
                    &&
                    (is_name_char(*begin) == true);
                    ++begin);
            output = lexeme(lexeme_name, std::string(name_begin, begin), line);
            return true;
        }

        case char_type_digit:
        {
            std::string::const_iterator number_begin = begin;
            for(++begin;
                    (begin != end)
                    &&
                    (nil::string::is_digit(*begin) == true)
                    ; ++begin);
            output = lexeme(lexeme_number, std::string(number_begin, begin), line);
            return true;
        }

        case char_type_zero:
        {
            std::string::const_iterator number_begin = begin;
            ++begin;
            if(begin != end)
            {
                char second_character = *begin;
                if(
                    (second_character == 'x')
                    ||
                    (second_character == 'X')
                )
                {
                    for(++begin;
                            (begin != end)
                            &&
                            (nil::string::is_digit(*begin) == true)
                            ; ++begin);
                }
                else if(is_binary_digit(second_character) == true)
                {
                    for(++begin;
                            (begin != end)
                            &&
                            (is_binary_digit(*begin) == true)
                            ; ++begin);
                }
            }
            output = lexeme(lexeme_number, std::string(number_begin, begin), line);
            return true;
        }

        case char_type_string:
        {
            std::string string;
            for(++begin; begin != end;)
            {
                char current_char = *begin;
                switch(current_char)
                {
                case '"':
                {
                    ++begin;
                    output = lexeme(lexeme_string, string, line);
                    return true;
                }

                case '\\':
                {
                    try
                    {
                        parse_backslash(begin, end, string);
                    }
                    catch(std::exception & exception)
                    {
                        lexer_exception(file_name, line, exception.what());
                    }
                    break;
                }

                case '\n':
                {
                    lexer_exception(file_name, line, "Newline in string");
                }

                default:
                {
                    string += current_char;
                    ++begin;
                    break;
                }
                }
            }

            lexer_exception(file_name, line, "Incomplete string at the end of file");
        }

        case char_type_operator:
        {
            ++begin;
            output = lexeme(lexeme_operator, std::string(1, current_char), line);
            return true;
        }

        case char_type_operator_extended:
        {
            ++begin;
            if(begin == end)
            {
                output = lexeme(lexeme_operator, std::string(1, current_char), line);
                return false;
            }
            else
            {
                char second_char = *begin;
                bool is_extended = false;
                switch(current_char)
                {
                case '&':
                {
                    if(second_char == '&')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '|':
                {
                    if(second_char == '|')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '=':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '!':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '<':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }

                case '>':
                {
                    if(second_char == '=')
                    {
                        is_extended = true;
                    }
                    break;
                }
                }
                if(is_extended == true)
                {
                    ++begin;
                    output = lexeme(lexeme_operator, std::string(1, current_char) + second_char, line);
                }
                else
                {
                    output = lexeme(lexeme_operator, std::string(1, current_char), line);
                }
                return true;
            }
        }

        case char_type_newline:
        {
            ++begin;
            ++line;
            output = lexeme(lexeme_newline, line);
            return true;
        }

        case char_type_comment:
        {
            for(++begin; begin != end; ++begin)
            {
                if(*begin == '\n')
                {
                    ++begin;
                    ++line;
                    output = lexeme(lexeme_newline, line);
                    return true;
                }
            }
            return false;
        }
        }
    }

    return false;
}
Example #6
0
bool LexicalAnalyser::scan() {
	this->clearComment();
	string token;
	string::const_iterator nextSymbol = this->sourceText.begin();
	while (nextSymbol != this->sourceText.end()) {
		switch (this->curState) {
			case this->stEmpty: {
				if (!this->isSpace(*nextSymbol)) {
					token.push_back(*nextSymbol);
				}
				if (this->isLetter(*nextSymbol)) {
					this->curState = this->stIdentifier;
				}
				else if (this->isOctalNumber(*nextSymbol)) {
					this->curState = this->stConst;
				}
				else if (this->isNotOctalNumber(*nextSymbol)) {
					this->curState = this->stError;
				}
				else if (*nextSymbol == ':') {
					this->curState = this->stAssign;
				}
				else if (*nextSymbol == '<') {
					this->curState = this->stLeftShift;
				}
				else if (*nextSymbol == '>') {
					this->curState = this->stRightShift;
				}
				else if (this->isOperationSymbol(*nextSymbol)) {
					this->curState = this->stOpSymbol;
					--nextSymbol;
				}
				++nextSymbol;
			} break;
			case this->stIdentifier: {
				if (this->isSpace(*nextSymbol)) {
					if (this->isKeyWord(token)) {
						Lexeme lexeme(token, Lexeme::tyKeyWord);
						this->lexemes.push_back(lexeme);
					}
					else {
						Lexeme lexeme(token, Lexeme::tyIdentifier);
						this->lexemes.push_back(lexeme);
						Identifier id;
						id.present = false;
						this->identifiersTable[token] = id;
					}
					token.clear();
					this->curState = this->stEmpty;
				}
				else if (this->isLetter(*nextSymbol) || this->isNumber(*nextSymbol)) {
					token.push_back(*nextSymbol);
					this->curState = this->stIdentifier;
				}
				else {
					if (this->isKeyWord(token)) {
						Lexeme lexeme(token, Lexeme::tyKeyWord);
						this->lexemes.push_back(lexeme);
					}
					else {
						Lexeme lexeme(token, Lexeme::tyIdentifier);
						this->lexemes.push_back(lexeme);
						Identifier id;
						id.present = false;
						this->identifiersTable[token] = id;
					}
					token.clear();
					token.push_back(*nextSymbol);
					if (*nextSymbol == ':') {
						this->curState = this->stAssign;
					}
					else if (*nextSymbol == '<') {
						this->curState = this->stLeftShift;
					}
					else if (*nextSymbol == '>') {
						this->curState = this->stRightShift;
					}
					else if (this->isOperationSymbol(*nextSymbol)) {
							this->curState = this->stOpSymbol;
							--nextSymbol;
					}
				}
				++nextSymbol;
			} break;
			case this->stOpSymbol: {
				Lexeme lexeme(token, Lexeme::tyOperationSymbol);
				this->lexemes.push_back(lexeme);
				token.clear();
				this->curState = this->stEmpty;
				++nextSymbol;
			} break;
			case this->stAssign: {
				token.push_back(*nextSymbol);
				if (*nextSymbol == '=') {
					this->curState = this->stOpSymbol;
				}
				else {
					this->curState = this->stError;
				}
				++nextSymbol;
			} break;
			case this->stLeftShift: {
				if (*nextSymbol == '<') {
					token.push_back(*nextSymbol);
					this->curState = this->stOpSymbol;
				}
				else {
					this->curState = this->stError;
					Lexeme lexeme(token, Lexeme::tyOperationSymbol);
					this->lexemes.push_back(lexeme);
					token.clear();
					this->curState = this->stEmpty;
				}
				++nextSymbol;
			} break;
			case this->stRightShift: {
				if (*nextSymbol == '>') {
					token.push_back(*nextSymbol);
					this->curState = this->stOpSymbol;
				}
				else {
					this->curState = this->stError;
					Lexeme lexeme(token, Lexeme::tyOperationSymbol);
					this->lexemes.push_back(lexeme);
					token.clear();
					this->curState = this->stEmpty;
				}
				++nextSymbol;
			} break;
			case this->stConst: {
				if (this->isSpace(*nextSymbol)) {
					Lexeme lexeme(token, Lexeme::tyConst);
					int* value = new int;
					string val = "0";
					val.append(token);
					*value = this->toOctal(atoi(val.c_str()));
					lexeme.value = value;
					this->lexemes.push_back(lexeme);
					token.clear();
					this->curState = this->stEmpty;
				}
				else if (this->isOctalNumber(*nextSymbol)) {
					token.push_back(*nextSymbol);
				}
				else if (this->isLetter(*nextSymbol) || this->isNumber(*nextSymbol)) {
					token.push_back(*nextSymbol);
					this->curState = this->stError;
				}
				else {
					Lexeme lexeme(token, Lexeme::tyConst);
					int* value = new int;
					string val = "0";
					val.append(token);
					*value = this->toOctal(atoi(val.c_str()));
//					*value = atoi(token.c_str());
					lexeme.value = value;
					this->lexemes.push_back(lexeme);
					token.clear();
					token.push_back(*nextSymbol);
					if (*nextSymbol == ':') {
						this->curState = this->stAssign;
					}
					else if (*nextSymbol == '<') {
						this->curState = this->stLeftShift;
					}
					else if (*nextSymbol == '>') {
						this->curState = this->stRightShift;
					}
					else if (this->isOperationSymbol(*nextSymbol)) {
						Lexeme lexeme(token, Lexeme::tyOperationSymbol);
						int* value = new int;
						string val = "0";
						val.append(token);
						*value = this->toOctal(atoi(val.c_str()));
//						*value = atoi(token.c_str());
						lexeme.value = value;
						this->lexemes.push_back(lexeme);
						token.clear();
						this->curState = this->stEmpty;
					}
				}
				++nextSymbol;
			} break;
			case this->stError: {
				this->success = false;
				Lexeme lexeme(token, Lexeme::tyUnknown);
				this->lexemes.push_back(lexeme);
				this->errorsTable.push_back(lexeme);
				token.clear();
				this->curState = this->stEmpty;
			} break;
		}
	}
	this->curLexemesPos = this->lexemes.begin();
	return this->success;
}
Example #7
0
Lexeme *evalNil(Lexeme *tree, Lexeme *env) {
  return lexeme(NIL);
}
Example #8
0
/*!
  This is called by parseSourceFile() to do the actual parsing
  and tree building. It only processes qdoc comments. It skips
  everything else.
 */
bool PureDocParser::processQdocComments()
{
    QSet<QString> topicCommandsAllowed = topicCommands();
    QSet<QString> otherMetacommandsAllowed = otherMetaCommands();
    QSet<QString> metacommandsAllowed = topicCommandsAllowed + otherMetacommandsAllowed;

    while (tok != Tok_Eoi) {
        if (tok == Tok_Doc) {
            /*
              lexeme() returns an entire qdoc comment.
             */
            QString comment = lexeme();
            Location start_loc(location());
            readToken();

            Doc::trimCStyleComment(start_loc,comment);
            Location end_loc(location());

            /*
              Doc parses the comment.
             */
            Doc doc(start_loc,end_loc,comment,metacommandsAllowed);

            QString topic;
            ArgList args;

            QSet<QString> topicCommandsUsed = topicCommandsAllowed & doc.metaCommandsUsed();

            /*
              There should be one topic command in the set,
              or none. If the set is empty, then the comment
              should be a function description.
             */
            if (topicCommandsUsed.count() > 0) {
                topic = *topicCommandsUsed.begin();
                args = doc.metaCommandArgs(topic);
            }

            NodeList nodes;
            QList<Doc> docs;

            if (topic.isEmpty()) {
                doc.location().warning(tr("This qdoc comment contains no topic command "
                                          "(e.g., '\\%1', '\\%2').")
                                       .arg(COMMAND_MODULE).arg(COMMAND_PAGE));
            }
            else {
                /*
                  There is a topic command. Process it.
                 */
                if ((topic == COMMAND_QMLPROPERTY) ||
                        (topic == COMMAND_QMLATTACHEDPROPERTY)) {
                    Doc nodeDoc = doc;
                    Node* node = processTopicCommandGroup(nodeDoc,topic,args);
                    if (node != 0) {
                        nodes.append(node);
                        docs.append(nodeDoc);
                    }
                }
                else {
                    ArgList::ConstIterator a = args.begin();
                    while (a != args.end()) {
                        Doc nodeDoc = doc;
                        Node* node = processTopicCommand(nodeDoc,topic,*a);
                        if (node != 0) {
                            nodes.append(node);
                            docs.append(nodeDoc);
                        }
                        ++a;
                    }
                }
            }

            Node* treeRoot = QDocDatabase::qdocDB()->treeRoot();
            NodeList::Iterator n = nodes.begin();
            QList<Doc>::Iterator d = docs.begin();
            while (n != nodes.end()) {
                processOtherMetaCommands(*d, *n);
                (*n)->setDoc(*d);
                checkModuleInclusion(*n);
                if ((*n)->isInnerNode() && ((InnerNode *)*n)->includes().isEmpty()) {
                    InnerNode *m = static_cast<InnerNode *>(*n);
                    while (m->parent() && m->parent() != treeRoot)
                        m = m->parent();
                    if (m == *n)
                        ((InnerNode *)*n)->addInclude((*n)->name());
                    else
                        ((InnerNode *)*n)->setIncludes(m->includes());
                }
                ++d;
                ++n;
            }
        }
        else {
            readToken();
        }
    }
    return true;
}
bool LexDbLemmatizer::lemmatize(const std::string& token,
                               AnnotationItemManager& annotationItemManager,
                               LemmatizerOutputIterator& outputIterator) {
    bool foundLemma = false;

    pqxx::work transaction(connection_);

    pqxx::result tuples =
        transaction.exec(
            "SELECT canon,name,sinflection,M.morphology"
            " FROM forms F, lexemes L, morphologies M"
            " WHERE F.lexeme_sn = L.lexeme_sn AND M.morphology_no = F.morphology_no"
            " AND (discard is null OR discard = 0)"
            " AND form = '" + pqxx::to_string(token) + "'"
            " ORDER BY canon, name, priority");

    std::string currentLemma;
    std::string currentLexeme;

    static pqxx::tuple::size_type CANON_INDEX = 0;
    static pqxx::tuple::size_type NAME_INDEX = 1;
    static pqxx::tuple::size_type SINFLECTION_INDEX = 2;
    static pqxx::tuple::size_type MORPHOLOGY_INDEX = 3;

    for (pqxx::result::const_iterator iter = tuples.begin();
         iter != tuples.end();
         ++iter) {

        const pqxx::tuple& tuple = *iter;

        if (tuple[CANON_INDEX].c_str() != currentLemma) {
            currentLemma = tuple[CANON_INDEX].c_str();
            outputIterator.addLemma(currentLemma);
            foundLemma = true;
        }

        if (tuple[NAME_INDEX].c_str() != currentLexeme) {
            currentLexeme = tuple[NAME_INDEX].c_str();

            std::string partOfSpeech;
            std::string flags;

            parseSinflection_(tuple[SINFLECTION_INDEX].c_str(),
                              partOfSpeech,
                              flags);

            AnnotationItem lexeme(
                partOfSpeech,
                StringFrag(currentLexeme));

            annotationItemManager.setValue(lexeme, "flags", flags);

            outputIterator.addLexeme(lexeme);
            foundLemma = true;
        }

        {
            std::string partOfSpeech;
            std::string flags;

            parseSinflection_(tuple[SINFLECTION_INDEX].c_str(),
                              partOfSpeech,
                              flags);

            AnnotationItem form(
                partOfSpeech,
                StringFrag(currentLexeme));

            annotationItemManager.setValue(form, "flags", flags);

            annotationItemManager.setValue(form, "morpho", tuple[MORPHOLOGY_INDEX].c_str());

            outputIterator.addForm(form);
            foundLemma = true;
        }
    }

    return foundLemma;
}
Example #10
0
  Lexeme *table = car(env);
  Lexeme *vars = car(table);
  Lexeme *vals = cdr(table);
  table->left = cons(JOIN,variable,vars);
  if(value != NULL) {
    table->right = cons(JOIN,value,vals);
  } else {
    table->right = cons(JOIN,lexeme(NIL),vals);
  }
  return value;
}

Lexeme *extend(Lexeme *variables, Lexeme *values, Lexeme *env) {
  Lexeme *new = cons(ENV,cons(VALUES,variables,values),env);
  Lexeme *this = lexeme(ID);
  Lexeme *__context = lexeme(ID);
  __context->sval = "__context";
  this->sval = "this";
  insert(this,new,new);
  insert(__context,env,new);

  return new;
}

Lexeme *create() {
  return extend(lexeme(NIL),lexeme(NIL),NULL);
}

int sameVariable(Lexeme *var1, Lexeme *var2) {
  return !strcmp(var1->sval,var2->sval);
}
Example #11
0
	bool Parser::parse_statement(StatementList *list)	
	{
		lexer.identify_keywords();
		
		switch(lexeme())
		{
			case Lexeme::KW_IF:
				parse_if(list);
				break;
				
			case Lexeme::KW_WHILE:
				parse_while(list);
				break;
				
			case Lexeme::KW_DO:
				parse_do(list);
				parse_terminator();
				break;

			case Lexeme::KW_RETURN:
				parse_return(list);
				parse_terminator();
				break;
			
			case Lexeme::KW_BREAK:
				parse_break(list);
				parse_terminator();
				break;
			
			case Lexeme::KW_CONTINUE:
				parse_continue(list);
				parse_terminator();
				break;
			
			case Lexeme::KW_CONST:
				step();
				parse_local(true, parse_expression(), list);
				parse_terminator();
				break;
			
			case Lexeme::BRACET_OPEN:
				list->append(parse_block<true, false>(Scope::EMPTY));
				break;
			
			case Lexeme::SEMICOLON:
				step();
				break;
			
			case Lexeme::END:
			case Lexeme::BRACET_CLOSE:
				return false;
	
			default:
				if(is_expression(lexeme()))
				{
					ExpressionNode *node = parse_expression();

					if(lexeme() == Lexeme::IDENT && node->is_type_name(document, false))
						parse_local(false, node, list);
					else
						list->append(node);

					parse_terminator();
				}
				else
					return false;
		}
		
		return true;
	}
Example #12
0
File: Token.hpp Project: SEDS/CUTS
 virtual void
 print (std::ostream& o)
 {
   o << lexeme ();
 }
Example #13
0
File: Token.hpp Project: SEDS/CUTS
 virtual void
 print (std::ostream& o)
 {
   //@@ todo
   o << lexeme ();
 }
Example #14
0
File: Token.hpp Project: SEDS/CUTS
 bool
 value () const
 {
   return lexeme () == "TRUE";
 }
Example #15
0
	bool lexer::parse_number(line_of_code & output)
	{
		std::size_t start = i;
		char byte = input[i];

		if(ail::is_digit(byte))
		{
			i++;
			if(byte == '0')
			{
				std::size_t remaining_bytes = end - i;
				if(remaining_bytes > 1)
				{
					char next_byte = input[i + 1];
					if(next_byte == 'x')
					{
						i++;
						remaining_bytes = end - i;
						if(remaining_bytes == 0)
							number_parsing_error("Incomplete hex number at the end of the input");

						std::size_t hex_start = i;

						for(; i < end && ail::is_hex_digit(input[i]); i++);
						
						std::size_t hex_length = i - hex_start;
						if(hex_length == 0)
							lexer_error("Incomplete hex number");

						std::string hex_string = input.substr(hex_start, i - end);
						types::unsigned_integer value = ail::string_to_number<types::unsigned_integer>(hex_string, std::ios_base::hex);
						output.lexemes.push_back(lexeme(value));
						return true;
					}
				}
			}

			char const dot = '.';

			bool got_dot = false;
			char last_byte = byte;
			for(; i < end; i++)
			{
				byte = input[i];
				if(byte == dot)
				{
					if(got_dot)
						number_parsing_error("Encountered a floating point value containing multiple dots");
					else
						got_dot = true;
				}
				else if(!ail::is_digit(byte))
					break;

				last_byte = byte;
			}

			if(last_byte == dot)
				number_parsing_error("Encountered a floating point value ending with a dot");

			std::string number_string = input.substr(start, i - start);
			lexeme current_lexeme;
			if(got_dot)
				current_lexeme = lexeme(ail::string_to_number<types::floating_point_value>(number_string));
			else
				current_lexeme = lexeme(ail::string_to_number<types::signed_integer>(number_string));
			output.lexemes.push_back(current_lexeme);

			return true;
		}
		else
			return false;
	}