typename MapType::const_iterator findInTable(const MapType& map, const std::wstring& name, const SourcePos& pos, const ErrorCode notFoundError, const ErrorCode misspelledError) { typename MapType::const_iterator it(map.find(name)); if (it == map.end()) { const unsigned maxDist(3); std::wstring bestName; unsigned bestDist(std::numeric_limits<unsigned>::max()); for (typename MapType::const_iterator jt(map.begin()); jt != map.end(); ++jt) { const std::wstring thatName(jt->first); const unsigned d(editDistance<std::wstring>(name, thatName, maxDist)); if (d < bestDist && d < maxDist) { bestDist = d; bestName = thatName; } } if (bestDist < maxDist) throw TranslatableError(pos, misspelledError).arg(name).arg(bestName); else throw TranslatableError(pos, notFoundError).arg(name); } return it; }
Node* BinaryArithmeticNode::optimize(std::wostream* dump) { children[0] = children[0]->optimize(dump); assert(children[0]); children[1] = children[1]->optimize(dump); assert(children[1]); // constants elimination // if both children are constants, pre-compute the result auto* immediateLeftChild = dynamic_cast<ImmediateNode*>(children[0]); auto* immediateRightChild = dynamic_cast<ImmediateNode*>(children[1]); if (immediateLeftChild && immediateRightChild) { int valueOne = immediateLeftChild->value; int valueTwo = immediateRightChild->value; int result; SourcePos pos = sourcePos; switch (op) { case ASEBA_OP_SHIFT_LEFT: result = valueOne << valueTwo; break; case ASEBA_OP_SHIFT_RIGHT: result = valueOne >> valueTwo; break; case ASEBA_OP_ADD: result = valueOne + valueTwo; break; case ASEBA_OP_SUB: result = valueOne - valueTwo; break; case ASEBA_OP_MULT: result = valueOne * valueTwo; break; case ASEBA_OP_DIV: if (valueTwo == 0) throw TranslatableError(sourcePos, ERROR_DIVISION_BY_ZERO); else result = valueOne / valueTwo; break; case ASEBA_OP_MOD: if (valueTwo == 0) throw TranslatableError(sourcePos, ERROR_DIVISION_BY_ZERO); else result = valueOne % valueTwo; break; case ASEBA_OP_BIT_OR: result = valueOne | valueTwo; break; case ASEBA_OP_BIT_XOR: result = valueOne ^ valueTwo; break; case ASEBA_OP_BIT_AND: result = valueOne & valueTwo; break; case ASEBA_OP_EQUAL: result = valueOne == valueTwo; break; case ASEBA_OP_NOT_EQUAL: result = valueOne != valueTwo; break; case ASEBA_OP_BIGGER_THAN: result = valueOne > valueTwo; break; case ASEBA_OP_BIGGER_EQUAL_THAN: result = valueOne >= valueTwo; break; case ASEBA_OP_SMALLER_THAN: result = valueOne < valueTwo; break; case ASEBA_OP_SMALLER_EQUAL_THAN: result = valueOne <= valueTwo; break; case ASEBA_OP_OR: result = valueOne || valueTwo; break; case ASEBA_OP_AND: result = valueOne && valueTwo; break; default: abort(); } if (dump) *dump << sourcePos.toWString() << L" binary arithmetic expression simplified\n"; delete this; return new ImmediateNode(pos, result); }
//! Construct a new token of given type and value Compiler::Token::Token(Type type, SourcePos pos, const std::wstring& value) : type(type), sValue(value), pos(pos) { if (type == TOKEN_INT_LITERAL) { long int decode; bool wasUnsigned = false; // all values are assumed to be signed 16-bits if ((value.length() > 1) && (value[1] == 'x')) { decode = wcstol(value.c_str() + 2, NULL, 16); wasUnsigned = true; } else if ((value.length() > 1) && (value[1] == 'b')) { decode = wcstol(value.c_str() + 2, NULL, 2); wasUnsigned = true; } else decode = wcstol(value.c_str(), NULL, 10); if (decode >= 65536) throw TranslatableError(pos, ERROR_INT16_OUT_OF_RANGE).arg(decode); if (wasUnsigned && decode > 32767) decode -= 65536; iValue = decode; } else iValue = 0; pos.column--; // column has already been incremented when token is created, so we remove one pos.character--; // character has already been incremented when token is created, so we remove one }
void WhileNode::checkVectorSize() const { assert(children.size() > 0); unsigned conditionSize = children[0]->getVectorSize(); if (conditionSize != 1) throw TranslatableError(sourcePos, ERROR_WHILE_VECTOR_CONDITION); // check inner block if (children.size() > 1 && children[1]) children[1]->checkVectorSize(); }
void ArithmeticAssignmentNode::checkVectorSize() const { assert(children.size() == 2); // will recursively check the whole tree belonging to "=" unsigned lSize = children[0]->getVectorSize(); unsigned rSize = children[1]->getVectorSize(); // top-level check if (lSize != rSize) throw TranslatableError(sourcePos, ERROR_ARRAY_SIZE_MISMATCH).arg(lSize).arg(rSize); }
Node* WhileNode::optimize(std::wostream* dump) { children[0] = children[0]->optimize(dump); assert(children[0]); // block may be nullptr children[1] = children[1]->optimize(dump); // check for loops on constants auto* constantExpression = dynamic_cast<ImmediateNode*>(children[0]); if (constantExpression) { if (constantExpression->value != 0) { throw TranslatableError(sourcePos, ERROR_INFINITE_LOOP); } else { if (dump) *dump << sourcePos.toWString() << L" while removed because condition is always false\n"; delete this; return nullptr; } } // check for loops with empty content if ((children[1] == nullptr) || (dynamic_cast<BlockNode*>(children[1]) && children[1]->children.empty())) { if (dump) *dump << sourcePos.toWString() << L" while removed because it contained no statement\n"; delete this; return nullptr; } // fold operation inside loop auto* operation = polymorphic_downcast<BinaryArithmeticNode*>(children[0]); auto *foldedNode = new FoldedWhileNode(sourcePos); foldedNode->op = operation->op; foldedNode->children.push_back(operation->children[0]); foldedNode->children.push_back(operation->children[1]); operation->children.clear(); foldedNode->children.push_back(children[1]); children[1] = nullptr; if (dump) *dump << sourcePos.toWString() << L" while condition folded inside node\n"; delete this; return foldedNode; }
void IfWhenNode::checkVectorSize() const { assert(children.size() > 0); unsigned conditionSize = children[0]->getVectorSize(); if (conditionSize != 1) throw TranslatableError(sourcePos, ERROR_IF_VECTOR_CONDITION); // check true block if (children.size() > 1 && children[1]) children[1]->checkVectorSize(); // check false block if (children.size() > 2 && children[2]) children[2]->checkVectorSize(); }
//! Look for a global event of a given name, and if found, return an iterator; if not, return an exception Compiler::EventsMap::const_iterator Compiler::findGlobalEvent(const std::wstring& name, const SourcePos& pos) const { try { return findInTable<EventsMap>(globalEventsMap, name, pos, ERROR_EVENT_NOT_DEFINED, ERROR_EVENT_NOT_DEFINED_GUESS); } catch (TranslatableError e) { if (allEventsMap.find(name) != allEventsMap.end()) throw TranslatableError(pos, ERROR_EMIT_LOCAL_EVENT).arg(name); else throw e; } }
//! return the vector's length unsigned MemoryVectorNode::getVectorSize() const { assert(children.size() <= 1); if (children.size() == 1) { TupleVectorNode* index = dynamic_cast<TupleVectorNode*>(children[0]); if (index) { unsigned numberOfIndex = index->getVectorSize(); // immediate indexes if (numberOfIndex == 1) { // foo[n] -> 1 dimension return 1; } else if (numberOfIndex == 2) { const int im0(index->getImmediateValue(0)); const int im1(index->getImmediateValue(1)); if (im1 < 0 || im1 >= int(arraySize)) throw TranslatableError(sourcePos, ERROR_ARRAY_OUT_OF_BOUND).arg(arrayName).arg(im1).arg(arraySize); // foo[n:m] -> compute the span return im1 - im0 + 1; } else // whaaaaat? Are you trying foo[[1,2,3]]? throw TranslatableError(sourcePos, ERROR_ARRAY_ILLEGAL_ACCESS); } else // random access foo[expr] return 1; } else // full array access return arraySize; }
//! return the children's size, check for equal size, or E_NOVAL if no child unsigned Node::getVectorSize() const { unsigned size = E_NOVAL; unsigned new_size = E_NOVAL; for (NodesVector::const_iterator it = children.begin(); it != children.end(); ++it) { new_size = (*it)->getVectorSize(); if (size == E_NOVAL) size = new_size; else if (size != new_size) throw TranslatableError(sourcePos, ERROR_ARRAY_SIZE_MISMATCH).arg(size).arg(new_size); } return size; }
Node::ReturnType WhileNode::typeCheck() const { expectType(TYPE_BOOL, children[0]->typeCheck()); expectType(TYPE_UNIT, children[1]->typeCheck()); BinaryArithmeticNode* binaryOp = dynamic_cast<BinaryArithmeticNode*>(children[0]); UnaryArithmeticNode* unaryOp = dynamic_cast<UnaryArithmeticNode*>(children[0]); bool ok(false); if (binaryOp && binaryOp->op >= ASEBA_OP_EQUAL && binaryOp->op <= ASEBA_OP_AND) ok = true; if (unaryOp && unaryOp->op == ASEBA_UNARY_OP_NOT) ok = true; if (!ok) throw TranslatableError(children[0]->sourcePos, ERROR_EXPECTING_CONDITION).arg(children[0]->toNodeName()); return TYPE_UNIT; }
//! Assignment between vectors is expanded into multiple scalar assignments Node* AssignmentNode::expandVectorialNodes(std::wostream *dump, Compiler* compiler, unsigned int index) { assert(children.size() == 2); // left vector should reference a memory location MemoryVectorNode* leftVector = dynamic_cast<MemoryVectorNode*>(children[0]); if (!leftVector) throw TranslatableError(sourcePos, ERROR_INCORRECT_LEFT_VALUE).arg(children[0]->toNodeName()); leftVector->setWrite(true); // right vector can be anything Node* rightVector = children[1]; // check if the left vector appears somewhere on the right side if (matchNameInMemoryVector(rightVector, leftVector->arrayName) && leftVector->getVectorSize() > 1) { // in such case, there is a risk of involuntary overwriting the content // we need to throw in a temporary variable to avoid this risk std::auto_ptr<BlockNode> tempBlock(new BlockNode(sourcePos)); // tempVar = rightVector std::auto_ptr<AssignmentNode> temp(compiler->allocateTemporaryVariable(sourcePos, rightVector->deepCopy())); MemoryVectorNode* tempVar = dynamic_cast<MemoryVectorNode*>(temp->children[0]); assert(tempVar); tempBlock->children.push_back(temp.release()); // leftVector = tempVar temp.reset(new AssignmentNode(sourcePos, leftVector->deepCopy(), tempVar->deepCopy())); tempBlock->children.push_back(temp.release()); return tempBlock->expandVectorialNodes(dump, compiler); // tempBlock will be reclaimed } // else std::auto_ptr<BlockNode> block(new BlockNode(sourcePos)); // top-level block for (unsigned int i = 0; i < leftVector->getVectorSize(); i++) { // expand to left[i] = right[i] block->children.push_back(new AssignmentNode(sourcePos, leftVector->expandVectorialNodes(dump, compiler, i), rightVector->expandVectorialNodes(dump, compiler, i))); } return block.release(); }
//! Constructor UnaryArithmeticAssignmentNode::UnaryArithmeticAssignmentNode(const SourcePos& sourcePos, Compiler::Token::Type token, Node *memory) : AbstractTreeNode(sourcePos) { switch (token) { case Compiler::Token::TOKEN_OP_PLUS_PLUS: arithmeticOp = ASEBA_OP_ADD; break; case Compiler::Token::TOKEN_OP_MINUS_MINUS: arithmeticOp = ASEBA_OP_SUB; break; default: throw TranslatableError(sourcePos, ERROR_UNARY_ARITH_BUILD_UNEXPECTED); break; } children.push_back(memory); }
//! Expand to memory[index] Node* MemoryVectorNode::expandVectorialNodes(std::wostream *dump, Compiler* compiler, unsigned int index) { assert(index < getVectorSize()); // get the optional index given in the Aseba code TupleVectorNode* accessIndex = NULL; if (children.size() > 0) accessIndex = dynamic_cast<TupleVectorNode*>(children[0]); if (accessIndex || children.size() == 0) { // direct access. Several cases: // -> an immediate index "foo[n]" or "foo[n:m]" // -> full array access "foo" // => use a StoreNode (lvalue) or LoadNode (rvalue) unsigned pointer = getVectorAddr() + index; // check if index is within bounds if (pointer >= arrayAddr + arraySize) throw TranslatableError(sourcePos, ERROR_ARRAY_OUT_OF_BOUND).arg(arrayName).arg(index).arg(arraySize); if (write == true) return new StoreNode(sourcePos, pointer); else return new LoadNode(sourcePos, pointer); } else { // indirect access foo[expr] // => use a ArrayWriteNode (lvalue) or ArrayReadNode (rvalue) std::auto_ptr<Node> array; if (write == true) array.reset(new ArrayWriteNode(sourcePos, arrayAddr, arraySize, arrayName)); else array.reset(new ArrayReadNode(sourcePos, arrayAddr, arraySize, arrayName)); array->children.push_back(children[0]->expandVectorialNodes(dump, compiler, index)); return array.release(); } }
Node* AssignmentNode::expandToAsebaTree(std::wostream *dump, unsigned int index) { assert(children.size() == 2); MemoryVectorNode* leftVector = dynamic_cast<MemoryVectorNode*>(children[0]); if (!leftVector) throw TranslatableError(sourcePos, ERROR_INCORRECT_LEFT_VALUE).arg(children[0]->toNodeName()); leftVector->setWrite(true); Node* rightVector = children[1]; std::auto_ptr<BlockNode> block(new BlockNode(sourcePos)); for (unsigned int i = 0; i < leftVector->getVectorSize(); i++) { // expand to left[i] = right[i] std::auto_ptr<AssignmentNode> assignment(new AssignmentNode(sourcePos)); assignment->children.push_back(leftVector->expandToAsebaTree(dump, i)); assignment->children.push_back(rightVector->expandToAsebaTree(dump, i)); block->children.push_back(assignment.release()); } delete this; return block.release(); }
//! return the compile-time base address of the memory range, taking //! into account an immediate index foo[n] or foo[n:m] //! return E_NOVAL if foo[expr] unsigned MemoryVectorNode::getVectorAddr() const { assert(children.size() <= 1); int shift = 0; // index(es) given? if (children.size() == 1) { TupleVectorNode* index = dynamic_cast<TupleVectorNode*>(children[0]); if (index) { shift = index->getImmediateValue(0); } else // not know at compile time return E_NOVAL; } if (shift < 0 || shift >= int(arraySize)) throw TranslatableError(sourcePos, ERROR_ARRAY_OUT_OF_BOUND).arg(arrayName).arg(shift).arg(arraySize); return arrayAddr + shift; }
void Node::expectType(const Node::ReturnType& expected, const Node::ReturnType& type) const { if (type != expected) throw TranslatableError(sourcePos, ERROR_EXPECTING_TYPE).arg(typeName(expected)).arg(typeName(type)); };
//! Parse source and build tokens vector //! \param source source code void Compiler::tokenize(std::wistream& source) { tokens.clear(); SourcePos pos(0, 0, 0); const unsigned tabSize = 4; // tokenize text source while (source.good()) { wchar_t c = source.get(); if (source.eof()) break; pos.column++; pos.character++; switch (c) { // simple cases of one character case ' ': break; //case '\t': pos.column += tabSize - 1; break; case '\t': break; case '\n': pos.row++; pos.column = -1; break; // -1 so next call to pos.column++ result set 0 case '\r': pos.column = -1; break; // -1 so next call to pos.column++ result set 0 case '(': tokens.push_back(Token(Token::TOKEN_PAR_OPEN, pos)); break; case ')': tokens.push_back(Token(Token::TOKEN_PAR_CLOSE, pos)); break; case '[': tokens.push_back(Token(Token::TOKEN_BRACKET_OPEN, pos)); break; case ']': tokens.push_back(Token(Token::TOKEN_BRACKET_CLOSE, pos)); break; case ':': tokens.push_back(Token(Token::TOKEN_COLON, pos)); break; case ',': tokens.push_back(Token(Token::TOKEN_COMMA, pos)); break; // special case for comment case '#': { // check if it's a comment block #* ... *# if (source.peek() == '*') { // comment block // record position of the begining SourcePos begin(pos); // move forward by 2 characters then search for the end int step = 2; while ((step > 0) || (c != '*') || (source.peek() != '#')) { if (step) step--; if (c == '\t') pos.column += tabSize; else if (c == '\n') { pos.row++; pos.column = 0; } else pos.column++; c = source.get(); pos.character++; if (source.eof()) { // EOF -> unbalanced block throw TranslatableError(begin, ERROR_UNBALANCED_COMMENT_BLOCK); } } // fetch the # getNextCharacter(source, pos); } else { // simple comment while ((c != '\n') && (c != '\r') && (!source.eof())) { if (c == '\t') pos.column += tabSize; else pos.column++; c = source.get(); pos.character++; } if (c == '\n') { pos.row++; pos.column = 0; } else if (c == '\r') pos.column = 0; } } break; // cases that require one character look-ahead case '+': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_ADD_EQUAL)) break; if (testNextCharacter(source, pos, '+', Token::TOKEN_OP_PLUS_PLUS)) break; tokens.push_back(Token(Token::TOKEN_OP_ADD, pos)); break; case '-': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_NEG_EQUAL)) break; if (testNextCharacter(source, pos, '-', Token::TOKEN_OP_MINUS_MINUS)) break; tokens.push_back(Token(Token::TOKEN_OP_NEG, pos)); break; case '*': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_MULT_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_MULT, pos)); break; case '/': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_DIV_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_DIV, pos)); break; case '%': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_MOD_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_MOD, pos)); break; case '|': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_OR_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_BIT_OR, pos)); break; case '^': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_XOR_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_BIT_XOR, pos)); break; case '&': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_AND_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_BIT_AND, pos)); break; case '~': tokens.push_back(Token(Token::TOKEN_OP_BIT_NOT, pos)); break; case '!': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_NOT_EQUAL)) break; throw TranslatableError(pos, ERROR_SYNTAX); break; case '=': if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_ASSIGN, pos)); break; // cases that require two characters look-ahead case '<': if (source.peek() == '<') { // << getNextCharacter(source, pos); if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SHIFT_LEFT_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_SHIFT_LEFT, pos)); break; } // < if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SMALLER_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_SMALLER, pos)); break; case '>': if (source.peek() == '>') { // >> getNextCharacter(source, pos); if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SHIFT_RIGHT_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_SHIFT_RIGHT, pos)); break; } // > if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIGGER_EQUAL)) break; tokens.push_back(Token(Token::TOKEN_OP_BIGGER, pos)); break; // cases that require to look for a while default: { // check first character if (!std::iswalnum(c) && (c != '_')) throw TranslatableError(pos, ERROR_INVALID_IDENTIFIER).arg((unsigned)c, 0, 16); // get a string std::wstring s; s += c; wchar_t nextC = source.peek(); int posIncrement = 0; while ((source.good()) && (std::iswalnum(nextC) || (nextC == '_') || (nextC == '.'))) { s += nextC; source.get(); posIncrement++; nextC = source.peek(); } // we now have a string, let's check what it is if (std::iswdigit(s[0])) { // check if hex or binary if ((s.length() > 1) && (s[0] == '0') && (!std::iswdigit(s[1]))) { // check if we have a valid number if (s[1] == 'x') { for (unsigned i = 2; i < s.size(); i++) if (!std::iswxdigit(s[i])) throw TranslatableError(pos, ERROR_INVALID_HEXA_NUMBER); } else if (s[1] == 'b') { for (unsigned i = 2; i < s.size(); i++) if ((s[i] != '0') && (s[i] != '1')) throw TranslatableError(pos, ERROR_INVALID_BINARY_NUMBER); } else throw TranslatableError(pos, ERROR_NUMBER_INVALID_BASE); } else { // check if we have a valid number for (unsigned i = 1; i < s.size(); i++) if (!std::iswdigit(s[i])) throw TranslatableError(pos, ERROR_IN_NUMBER); } tokens.push_back(Token(Token::TOKEN_INT_LITERAL, pos, s)); } else { // check if it is a known keyword // FIXME: clean-up that with a table if (s == L"when") tokens.push_back(Token(Token::TOKEN_STR_when, pos)); else if (s == L"emit") tokens.push_back(Token(Token::TOKEN_STR_emit, pos)); else if (s == L"_emit") tokens.push_back(Token(Token::TOKEN_STR_hidden_emit, pos)); else if (s == L"for") tokens.push_back(Token(Token::TOKEN_STR_for, pos)); else if (s == L"in") tokens.push_back(Token(Token::TOKEN_STR_in, pos)); else if (s == L"step") tokens.push_back(Token(Token::TOKEN_STR_step, pos)); else if (s == L"while") tokens.push_back(Token(Token::TOKEN_STR_while, pos)); else if (s == L"do") tokens.push_back(Token(Token::TOKEN_STR_do, pos)); else if (s == L"if") tokens.push_back(Token(Token::TOKEN_STR_if, pos)); else if (s == L"then") tokens.push_back(Token(Token::TOKEN_STR_then, pos)); else if (s == L"else") tokens.push_back(Token(Token::TOKEN_STR_else, pos)); else if (s == L"elseif") tokens.push_back(Token(Token::TOKEN_STR_elseif, pos)); else if (s == L"end") tokens.push_back(Token(Token::TOKEN_STR_end, pos)); else if (s == L"var") tokens.push_back(Token(Token::TOKEN_STR_var, pos)); else if (s == L"const") tokens.push_back(Token(Token::TOKEN_STR_const, pos)); else if (s == L"call") tokens.push_back(Token(Token::TOKEN_STR_call, pos)); else if (s == L"sub") tokens.push_back(Token(Token::TOKEN_STR_sub, pos)); else if (s == L"callsub") tokens.push_back(Token(Token::TOKEN_STR_callsub, pos)); else if (s == L"onevent") tokens.push_back(Token(Token::TOKEN_STR_onevent, pos)); else if (s == L"abs") tokens.push_back(Token(Token::TOKEN_STR_abs, pos)); else if (s == L"return") tokens.push_back(Token(Token::TOKEN_STR_return, pos)); else if (s == L"or") tokens.push_back(Token(Token::TOKEN_OP_OR, pos)); else if (s == L"and") tokens.push_back(Token(Token::TOKEN_OP_AND, pos)); else if (s == L"not") tokens.push_back(Token(Token::TOKEN_OP_NOT, pos)); else tokens.push_back(Token(Token::TOKEN_STRING_LITERAL, pos, s)); } pos.column += posIncrement; pos.character += posIncrement; } break; } // switch (c) } // while (source.good()) tokens.push_back(Token(Token::TOKEN_END_OF_STREAM, pos)); }