Node multiToken(Node nodes[], int len, Metadata met) { std::vector<Node> out; for (int i = 0; i < len; i++) { out.push_back(nodes[i]); } return astnode("_", out, met); }
// Adds necessary wrappers to a program Node finalize(programData c) { std::vector<Node> bottom; Metadata m = c.code.metadata; // If we are using both alloc and variables, we need to pre-zfill // some memory if (c.aux.allocUsed && c.aux.vars.size() > 0) { Node nodelist[] = { token("0", m), token(unsignedToDecimal(c.aux.vars.size() * 32 - 1)), token("MSTORE8", m) }; bottom.push_back(multiToken(nodelist, 3, m)); } // If msg.data is being used as an array, then we need to copy it if (c.aux.calldataUsed) { Node nodelist[] = { token("MSIZE", m), token("CALLDATASIZE", m), token("MSIZE", m), token("0", m), token("CALLDATACOPY", m), token(c.aux.vars["'msg.data"], m), token("MSTORE", m) }; bottom.push_back(multiToken(nodelist, 7, m)); } // The actual code bottom.push_back(c.code); return astnode("_", bottom, m); }
// Convert a function of the form (def (f x y z) (do stuff)) into // (if (first byte of ABI is correct) (seq (setup x y z) (do stuff))) Node convFunction(Node node, int functionCount) { std::string prefix = "_temp"+mkUniqueToken()+"_"; Metadata m = node.metadata; if (node.args.size() != 2) err("Malformed def!", m); // Collect the list of variable names and variable byte counts Node unpack = unpackArguments(node.args[0].args, m); // And the actual code Node body = node.args[1]; // Main LLL-based function body return astnode("if", astnode("eq", astnode("get", token("__funid", m), m), token(unsignedToDecimal(functionCount), m), m), astnode("seq", unpack, body, m)); }
// Applies that dictionary Node substDict(Node program, programAux aux, int labelLength) { Metadata m = program.metadata; std::vector<Node> out; std::vector<Node> inner; if (program.type == TOKEN) { if (program.val[0] == '$') { std::string tokStr = "PUSH"+unsignedToDecimal(labelLength); out.push_back(token(tokStr, m)); int dotLoc = program.val.find('.'); if (dotLoc == -1) { std::string val = aux.vars[program.val.substr(1)]; inner = toByteArr(val, m, labelLength); } else { std::string start = aux.vars[program.val.substr(1, dotLoc-1)], end = aux.vars[program.val.substr(dotLoc + 1)], dist = decimalSub(end, start); inner = toByteArr(dist, m, labelLength); } out.push_back(astnode("_", inner, m)); } else if (program.val[0] == '~') { } else if (isNumberLike(program)) { inner = toByteArr(program.val, m); out.push_back(token("PUSH"+unsignedToDecimal(inner.size()))); out.push_back(astnode("_", inner, m)); } else return program; } else { for (unsigned i = 0; i < program.args.size(); i++) { Node n = substDict(program.args[i], aux, labelLength); if (n.type == TOKEN || n.args.size()) out.push_back(n); } } return astnode("_", out, m); }
// Adds necessary wrappers to a program Node finalize(programData c) { std::vector<Node> bottom; Metadata m = c.code.metadata; // If we are using both alloc and variables, we need to pre-zfill // some memory if ((c.aux.allocUsed || c.aux.calldataUsed) && c.aux.vars.size() > 0) { Node nodelist[] = { token("0", m), token(unsignedToDecimal(c.aux.nextVarMem - 1)), token("MSTORE8", m) }; bottom.push_back(multiToken(nodelist, 3, m)); } // The actual code bottom.push_back(c.code); return astnode("_", bottom, m); }
// Turns LLL tree into tree of code fragments programData opcodeify(Node node, programAux aux=Aux(), int height=0, std::map<std::string, int> dupvars= std::map<std::string, int>()) { std::string symb = "_"+mkUniqueToken(); Metadata m = node.metadata; // Numbers if (node.type == TOKEN) { return pd(aux, nodeToNumeric(node), 1); } else if (node.val == "ref" || node.val == "get" || node.val == "set") { std::string varname = node.args[0].val; if (!aux.vars.count(varname)) { aux.vars[varname] = unsignedToDecimal(aux.vars.size() * 32); } std::cout << aux.vars[varname] << " " << varname << " " << node.val << "\n"; if (varname == "'msg.data") aux.calldataUsed = true; // Set variable if (node.val == "set") { programData sub = opcodeify(node.args[1], aux, height, dupvars); if (!sub.outs) err("Value to set variable must have nonzero arity!", m); if (dupvars.count(node.args[0].val)) { int h = height - dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); Node nodelist[] = { sub.code, token("SWAP"+unsignedToDecimal(h), m), token("POP", m) }; return pd(sub.aux, multiToken(nodelist, 3, m), 0); } Node nodelist[] = { sub.code, token(sub.aux.vars[varname], m), token("MSTORE", m), }; return pd(sub.aux, multiToken(nodelist, 3, m), 0); } // Get variable else if (node.val == "get") { if (dupvars.count(node.args[0].val)) { int h = height - dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); return pd(aux, token("DUP"+unsignedToDecimal(h)), 1); } Node nodelist[] = { token(aux.vars[varname], m), token("MLOAD", m) }; std::cout << "<--- " << aux.vars[varname] << " " << varname << "\n"; return pd(aux, multiToken(nodelist, 2, m), 1); } // Refer variable else { if (dupvars.count(node.args[0].val)) err("Cannot ref stack variable!", m); return pd(aux, token(aux.vars[varname], m), 1); } } // Code blocks if (node.val == "lll" && node.args.size() == 2) { if (node.args[1].val != "0") aux.allocUsed = true; std::vector<Node> o; o.push_back(finalize(opcodeify(node.args[0]))); programData sub = opcodeify(node.args[1], aux, height, dupvars); Node code = astnode("____CODE", o, m); Node nodelist[] = { token("$begincode"+symb+".endcode"+symb, m), token("DUP1", m), token("$begincode"+symb, m), sub.code, token("CODECOPY", m), token("$endcode"+symb, m), token("JUMP", m), token("~begincode"+symb, m), code, token("~endcode"+symb, m) }; return pd(sub.aux, multiToken(nodelist, 10, m), 1); } // Stack variables if (node.val == "with") { std::map<std::string, int> dupvars2 = dupvars; dupvars2[node.args[0].val] = height; programData initial = opcodeify(node.args[1], aux, height, dupvars); if (!initial.outs) err("Initial variable value must have nonzero arity!", m); programData sub = opcodeify(node.args[2], initial.aux, height + 1, dupvars2); Node nodelist[] = { initial.code, sub.code }; programData o = pd(sub.aux, multiToken(nodelist, 2, m), sub.outs); if (sub.outs) o.code.args.push_back(token("SWAP1", m)); o.code.args.push_back(token("POP", m)); return o; } // Seq of multiple statements if (node.val == "seq") { std::vector<Node> children; int lastOut = 0; for (unsigned i = 0; i < node.args.size(); i++) { programData sub = opcodeify(node.args[i], aux, height, dupvars); aux = sub.aux; if (sub.outs == 1) { if (i < node.args.size() - 1) sub.code = popwrap(sub.code); else lastOut = 1; } children.push_back(sub.code); } return pd(aux, astnode("_", children, m), lastOut); } // 2-part conditional (if gets rewritten to unless in rewrites) else if (node.val == "unless" && node.args.size() == 2) { programData cond = opcodeify(node.args[0], aux, height, dupvars); programData action = opcodeify(node.args[1], cond.aux, height, dupvars); aux = action.aux; if (!cond.outs) err("Condition of if/unless statement has arity 0", m); if (action.outs) action.code = popwrap(action.code); Node nodelist[] = { cond.code, token("$endif"+symb, m), token("JUMPI", m), action.code, token("~endif"+symb, m) }; return pd(aux, multiToken(nodelist, 5, m), 0); } // 3-part conditional else if (node.val == "if" && node.args.size() == 3) { programData ifd = opcodeify(node.args[0], aux, height, dupvars); programData thend = opcodeify(node.args[1], ifd.aux, height, dupvars); programData elsed = opcodeify(node.args[2], thend.aux, height, dupvars); aux = elsed.aux; if (!ifd.outs) err("Condition of if/unless statement has arity 0", m); // Handle cases where one conditional outputs something // and the other does not int outs = (thend.outs && elsed.outs) ? 1 : 0; if (thend.outs > outs) thend.code = popwrap(thend.code); if (elsed.outs > outs) elsed.code = popwrap(elsed.code); Node nodelist[] = { ifd.code, token("NOT", m), token("$else"+symb, m), token("JUMPI", m), thend.code, token("$endif"+symb, m), token("JUMP", m), token("~else"+symb, m), elsed.code, token("~endif"+symb, m) }; return pd(aux, multiToken(nodelist, 10, m), outs); } // While (rewritten to this in rewrites) else if (node.val == "until") { programData cond = opcodeify(node.args[0], aux, height, dupvars); programData action = opcodeify(node.args[1], cond.aux, height, dupvars); aux = action.aux; if (!cond.outs) err("Condition of while/until loop has arity 0", m); if (action.outs) action.code = popwrap(action.code); Node nodelist[] = { token("~beg"+symb, m), cond.code, token("$end"+symb, m), token("JUMPI", m), action.code, token("$beg"+symb, m), token("JUMP", m), token("~end"+symb, m) }; return pd(aux, multiToken(nodelist, 8, m)); } // Memory allocations else if (node.val == "alloc") { programData bytez = opcodeify(node.args[0], aux, height, dupvars); aux = bytez.aux; if (!bytez.outs) err("Alloc input has arity 0", m); aux.allocUsed = true; Node nodelist[] = { bytez.code, token("MSIZE", m), token("SWAP1", m), token("MSIZE", m), token("ADD", m), token("0", m), token("SWAP1", m), token("MSTORE", m) }; return pd(aux, multiToken(nodelist, 8, m), 1); } // Array literals else if (node.val == "array_lit") { aux.allocUsed = true; std::vector<Node> nodes; if (!node.args.size()) { nodes.push_back(token("MSIZE", m)); return pd(aux, astnode("_", nodes, m)); } nodes.push_back(token("MSIZE", m)); nodes.push_back(token("0", m)); nodes.push_back(token("MSIZE", m)); nodes.push_back(token(unsignedToDecimal(node.args.size() * 32 - 1), m)); nodes.push_back(token("ADD", m)); nodes.push_back(token("MSTORE8", m)); for (unsigned i = 0; i < node.args.size(); i++) { Metadata m2 = node.args[i].metadata; nodes.push_back(token("DUP1", m2)); programData sub = opcodeify(node.args[i], aux, height + 2, dupvars); if (!sub.outs) err("Array_lit item " + unsignedToDecimal(i) + " has zero arity", m2); aux = sub.aux; nodes.push_back(sub.code); nodes.push_back(token("SWAP1", m2)); if (i > 0) { nodes.push_back(token(unsignedToDecimal(i * 32), m2)); nodes.push_back(token("ADD", m2)); } nodes.push_back(token("MSTORE", m2)); } return pd(aux, astnode("_", nodes, m), 1); } // All other functions/operators else { std::vector<Node> subs2; int depth = opinputs(upperCase(node.val)); if (node.val != "debug") { if (depth == -1) err("Not a function or opcode: "+node.val, m); if ((int)node.args.size() != depth) err("Invalid arity for "+node.val, m); } for (int i = node.args.size() - 1; i >= 0; i--) { programData sub = opcodeify(node.args[i], aux, height - i - 1 + node.args.size(), dupvars); aux = sub.aux; if (!sub.outs) err("Input "+unsignedToDecimal(i)+" has arity 0", sub.code.metadata); subs2.push_back(sub.code); } if (node.val == "debug") { subs2.push_back(token("DUP"+unsignedToDecimal(node.args.size()), m)); for (int i = 0; i <= (int)node.args.size(); i++) subs2.push_back(token("POP", m)); } else subs2.push_back(token(upperCase(node.val), m)); int outdepth = node.val == "debug" ? 0 : opoutputs(upperCase(node.val)); return pd(aux, astnode("_", subs2, m), outdepth); } }
// Turns LLL tree into tree of code fragments programData opcodeify(Node node, programAux aux=Aux(), programVerticalAux vaux=verticalAux()) { std::string symb = "_"+mkUniqueToken(); Metadata m = node.metadata; // Numbers if (node.type == TOKEN) { return pd(aux, nodeToNumeric(node), 1); } else if (node.val == "ref" || node.val == "get" || node.val == "set") { std::string varname = node.args[0].val; // Determine reference to variable if (!aux.vars.count(node.args[0].val)) { aux.vars[node.args[0].val] = utd(aux.nextVarMem); aux.nextVarMem += 32; } Node varNode = tkn(aux.vars[varname], m); //std::cerr << varname << " " << printSimple(varNode) << "\n"; // Set variable if (node.val == "set") { programData sub = opcodeify(node.args[1], aux, vaux); if (!sub.outs) err("Value to set variable must have nonzero arity!", m); // What if we are setting a stack variable? if (vaux.dupvars.count(node.args[0].val)) { int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); Node nodelist[] = { sub.code, token("SWAP"+unsignedToDecimal(h), m), token("POP", m) }; return pd(sub.aux, multiToken(nodelist, 3, m), 0); } // Setting a memory variable else { Node nodelist[] = { sub.code, varNode, token("MSTORE", m), }; return pd(sub.aux, multiToken(nodelist, 3, m), 0); } } // Get variable else if (node.val == "get") { // Getting a stack variable if (vaux.dupvars.count(node.args[0].val)) { int h = vaux.height - vaux.dupvars[node.args[0].val]; if (h > 16) err("Too deep for stack variable (max 16)", m); return pd(aux, token("DUP"+unsignedToDecimal(h)), 1); } // Getting a memory variable else { Node nodelist[] = { varNode, token("MLOAD", m) }; return pd(aux, multiToken(nodelist, 2, m), 1); } } // Refer variable else if (node.val == "ref") { if (vaux.dupvars.count(node.args[0].val)) err("Cannot ref stack variable!", m); return pd(aux, varNode, 1); } } // Comments do nothing else if (node.val == "comment") { return pd(aux, astnode("_", m), 0); } // Custom operation sequence // eg. (ops bytez id msize swap1 msize add 0 swap1 mstore) == alloc if (node.val == "ops") { std::vector<Node> subs2; int depth = 0; for (unsigned i = 0; i < node.args.size(); i++) { std::string op = upperCase(node.args[i].val); if (node.args[i].type == ASTNODE || opinputs(op) == -1) { programVerticalAux vaux2 = vaux; vaux2.height = vaux.height - i - 1 + node.args.size(); programData sub = opcodeify(node.args[i], aux, vaux2); aux = sub.aux; depth += sub.outs; subs2.push_back(sub.code); } else { subs2.push_back(token(op, m)); depth += opoutputs(op) - opinputs(op); } } if (depth < 0 || depth > 1) err("Stack depth mismatch", m); return pd(aux, astnode("_", subs2, m), 0); } // Code blocks if (node.val == "lll" && node.args.size() == 2) { if (node.args[1].val != "0") aux.allocUsed = true; std::vector<Node> o; o.push_back(finalize(opcodeify(node.args[0]))); programData sub = opcodeify(node.args[1], aux, vaux); Node code = astnode("____CODE", o, m); Node nodelist[] = { token("$begincode"+symb+".endcode"+symb, m), token("DUP1", m), token("$begincode"+symb, m), sub.code, token("CODECOPY", m), token("$endcode"+symb, m), token("JUMP", m), token("~begincode"+symb, m), code, token("~endcode"+symb, m), token("JUMPDEST", m) }; return pd(sub.aux, multiToken(nodelist, 11, m), 1); } // Stack variables if (node.val == "with") { programData initial = opcodeify(node.args[1], aux, vaux); programVerticalAux vaux2 = vaux; vaux2.dupvars[node.args[0].val] = vaux.height; vaux2.height += 1; if (!initial.outs) err("Initial variable value must have nonzero arity!", m); programData sub = opcodeify(node.args[2], initial.aux, vaux2); Node nodelist[] = { initial.code, sub.code }; programData o = pd(sub.aux, multiToken(nodelist, 2, m), sub.outs); if (sub.outs) o.code.args.push_back(token("SWAP1", m)); o.code.args.push_back(token("POP", m)); return o; } // Seq of multiple statements if (node.val == "seq") { std::vector<Node> children; int lastOut = 0; for (unsigned i = 0; i < node.args.size(); i++) { programData sub = opcodeify(node.args[i], aux, vaux); aux = sub.aux; if (sub.outs == 1) { if (i < node.args.size() - 1) sub.code = popwrap(sub.code); else lastOut = 1; } children.push_back(sub.code); } return pd(aux, astnode("_", children, m), lastOut); } // 2-part conditional (if gets rewritten to unless in rewrites) else if (node.val == "unless" && node.args.size() == 2) { programData cond = opcodeify(node.args[0], aux, vaux); programData action = opcodeify(node.args[1], cond.aux, vaux); aux = action.aux; if (!cond.outs) err("Condition of if/unless statement has arity 0", m); if (action.outs) action.code = popwrap(action.code); Node nodelist[] = { cond.code, token("$endif"+symb, m), token("JUMPI", m), action.code, token("~endif"+symb, m), token("JUMPDEST", m) }; return pd(aux, multiToken(nodelist, 6, m), 0); } // 3-part conditional else if (node.val == "if" && node.args.size() == 3) { programData ifd = opcodeify(node.args[0], aux, vaux); programData thend = opcodeify(node.args[1], ifd.aux, vaux); programData elsed = opcodeify(node.args[2], thend.aux, vaux); aux = elsed.aux; if (!ifd.outs) err("Condition of if/unless statement has arity 0", m); // Handle cases where one conditional outputs something // and the other does not int outs = (thend.outs && elsed.outs) ? 1 : 0; if (thend.outs > outs) thend.code = popwrap(thend.code); if (elsed.outs > outs) elsed.code = popwrap(elsed.code); Node nodelist[] = { ifd.code, token("ISZERO", m), token("$else"+symb, m), token("JUMPI", m), thend.code, token("$endif"+symb, m), token("JUMP", m), token("~else"+symb, m), token("JUMPDEST", m), elsed.code, token("~endif"+symb, m), token("JUMPDEST", m) }; return pd(aux, multiToken(nodelist, 12, m), outs); } // While (rewritten to this in rewrites) else if (node.val == "until") { programData cond = opcodeify(node.args[0], aux, vaux); programData action = opcodeify(node.args[1], cond.aux, vaux); aux = action.aux; if (!cond.outs) err("Condition of while/until loop has arity 0", m); if (action.outs) action.code = popwrap(action.code); Node nodelist[] = { token("~beg"+symb, m), token("JUMPDEST", m), cond.code, token("$end"+symb, m), token("JUMPI", m), action.code, token("$beg"+symb, m), token("JUMP", m), token("~end"+symb, m), token("JUMPDEST", m), }; return pd(aux, multiToken(nodelist, 10, m)); } // Memory allocations else if (node.val == "alloc") { programData bytez = opcodeify(node.args[0], aux, vaux); aux = bytez.aux; if (!bytez.outs) err("Alloc input has arity 0", m); aux.allocUsed = true; Node nodelist[] = { bytez.code, token("MSIZE", m), token("SWAP1", m), token("MSIZE", m), token("ADD", m), token("0", m), token("SWAP1", m), token("MSTORE", m) }; return pd(aux, multiToken(nodelist, 8, m), 1); } // All other functions/operators else { std::vector<Node> subs2; int depth = opinputs(upperCase(node.val)); if (depth == -1) err("Not a function or opcode: "+node.val, m); if ((int)node.args.size() != depth) err("Invalid arity for "+node.val, m); for (int i = node.args.size() - 1; i >= 0; i--) { programVerticalAux vaux2 = vaux; vaux2.height = vaux.height - i - 1 + node.args.size(); programData sub = opcodeify(node.args[i], aux, vaux2); aux = sub.aux; if (!sub.outs) err("Input "+unsignedToDecimal(i)+" has arity 0", sub.code.metadata); subs2.push_back(sub.code); } subs2.push_back(token(upperCase(node.val), m)); int outdepth = opoutputs(upperCase(node.val)); return pd(aux, astnode("_", subs2, m), outdepth); } }
// Turns LLL tree into tree of code fragments programData opcodeify(Node node, programAux aux=Aux()) { std::string symb = "_"+mkUniqueToken(); Metadata m = node.metadata; // Numbers if (node.type == TOKEN) { return pd(aux, nodeToNumeric(node)); } else if (node.val == "ref" || node.val == "get" || node.val == "set") { std::string varname = node.args[0].val; if (!aux.vars.count(varname)) { aux.vars[varname] = intToDecimal(aux.vars.size() * 32); } if (varname == "msg.data") aux.calldataUsed = true; // Set variable if (node.val == "set") { programData sub = opcodeify(node.args[1], aux); Node nodelist[] = { sub.code, token(aux.vars[varname], m), token("MSTORE", m), }; return pd(sub.aux, multiToken(nodelist, 3, m)); } // Get variable else if (node.val == "get") { Node nodelist[] = { token(aux.vars[varname], m), token("MLOAD", m) }; return pd(aux, multiToken(nodelist, 2, m)); } // Refer variable else return pd(aux, token(aux.vars[varname], m)); } // Code blocks if (node.val == "lll" && node.args.size() == 2) { if (node.args[1].val != "0") aux.allocUsed = true; std::vector<Node> o; o.push_back(finalize(opcodeify(node.args[0]))); programData sub = opcodeify(node.args[1], aux); Node code = astnode("____CODE", o, m); Node nodelist[] = { token("$begincode"+symb+".endcode"+symb, m), token("DUP", m), token("$begincode"+symb, m), sub.code, token("CODECOPY", m), token("$endcode"+symb, m), token("JUMP", m), token("~begincode"+symb, m), code, token("~endcode"+symb, m) }; return pd(sub.aux, multiToken(nodelist, 10, m)); } std::vector<Node> subs; for (unsigned i = 0; i < node.args.size(); i++) { programData sub = opcodeify(node.args[i], aux); aux = sub.aux; subs.push_back(sub.code); } // Debug if (node.val == "debug") { Node nodelist[] = { subs[0], token("DUP", m), token("POP", m), token("POP", m) }; return pd(aux, multiToken(nodelist, 4, m)); } // Seq of multiple statements if (node.val == "seq") { return pd(aux, astnode("_", subs, m)); } // 2-part conditional (if gets rewritten to unless in rewrites) else if (node.val == "unless" && node.args.size() == 2) { Node nodelist[] = { subs[0], token("$endif"+symb, m), token("JUMPI", m), subs[1], token("~endif"+symb, m) }; return pd(aux, multiToken(nodelist, 5, m)); } // 3-part conditional else if (node.val == "if" && node.args.size() == 3) { Node nodelist[] = { subs[0], token("NOT", m), token("$else"+symb, m), token("JUMPI", m), subs[1], token("$endif"+symb, m), token("JUMP", m), token("~else"+symb, m), subs[2], token("~endif"+symb, m) }; return pd(aux, multiToken(nodelist, 10, m)); } // While (rewritten to this in rewrites) else if (node.val == "until") { Node nodelist[] = { token("~beg"+symb, m), subs[0], token("$end"+symb, m), token("JUMPI", m), subs[1], token("$beg"+symb, m), token("JUMP", m), token("~end"+symb, m) }; return pd(aux, multiToken(nodelist, 8, m)); } // Memory allocations else if (node.val == "alloc") { aux.allocUsed = true; Node nodelist[] = { subs[0], token("MSIZE", m), token("SWAP", m), token("MSIZE", m), token("ADD", m), token("0", m), token("SWAP", m), token("MSTORE", m) }; return pd(aux, multiToken(nodelist, 8, m)); } // Array literals else if (node.val == "array_lit") { aux.allocUsed = true; std::vector<Node> nodes; if (!subs.size()) { nodes.push_back(token("MSIZE", m)); return pd(aux, astnode("_", nodes, m)); } nodes.push_back(token("MSIZE", m)); nodes.push_back(token("0", m)); nodes.push_back(token("MSIZE", m)); nodes.push_back(token(intToDecimal(subs.size() * 32 - 1), m)); nodes.push_back(token("ADD", m)); nodes.push_back(token("MSTORE8", m)); for (unsigned i = 0; i < subs.size(); i++) { nodes.push_back(token("DUP", m)); nodes.push_back(subs[i]); nodes.push_back(token("SWAP", m)); if (i > 0) { nodes.push_back(token(intToDecimal(i * 32), m)); nodes.push_back(token("ADD", m)); } nodes.push_back(token("MSTORE", m)); } return pd(aux, astnode("_", nodes, m)); } // All other functions/operators else { std::vector<Node> subs2; while (subs.size()) { subs2.push_back(subs.back()); subs.pop_back(); } subs2.push_back(token(upperCase(node.val), m)); return pd(aux, astnode("_", subs2, m)); } }
// Preprocess input containing functions // // localExterns is a map of the form, eg, // // { x: { foo: 0, bar: 1, baz: 2 }, y: { qux: 0, foo: 1 } ... } // // localExternSigs is a map of the form, eg, // // { x : { foo: iii, bar: iis, baz: ia }, y: { qux: i, foo: as } ... } // // Signifying that x.foo = 0, x.baz = 2, y.foo = 1, etc // and that x.foo has three integers as arguments, x.bar has two // integers and a variable-length string, and baz has an integer // and an array // // globalExterns is a one-level map, eg from above // // { foo: 1, bar: 1, baz: 2, qux: 0 } // // globalExternSigs is a one-level map, eg from above // // { foo: as, bar: iis, baz: ia, qux: i} // // Note that globalExterns and globalExternSigs may be ambiguous // Also, a null signature implies an infinite tail of integers preprocessResult preprocessInit(Node inp) { Metadata m = inp.metadata; if (inp.val != "seq") inp = astnode("seq", inp, m); std::vector<Node> empty = std::vector<Node>(); Node init = astnode("seq", empty, m); Node shared = astnode("seq", empty, m); std::vector<Node> any; std::vector<Node> functions; preprocessAux out = preprocessAux(); out.localExterns["self"] = std::map<std::string, int>(); int functionCount = 0; int storageDataCount = 0; for (unsigned i = 0; i < inp.args.size(); i++) { Node obj = inp.args[i]; // Functions if (obj.val == "def") { if (obj.args.size() == 0) err("Empty def", m); std::string funName = obj.args[0].val; // Init, shared and any are special functions if (funName == "init" || funName == "shared" || funName == "any") { if (obj.args[0].args.size()) err(funName+" cannot have arguments", m); } if (funName == "init") init = obj.args[1]; else if (funName == "shared") shared = obj.args[1]; else if (funName == "any") any.push_back(obj.args[1]); else { // Other functions functions.push_back(convFunction(obj, functionCount)); out.localExterns["self"][obj.args[0].val] = functionCount; out.localExternSigs["self"][obj.args[0].val] = getSignature(obj.args[0].args); functionCount++; } } // Extern declarations else if (obj.val == "extern") { std::string externName = obj.args[0].val; Node al = obj.args[1]; if (!out.localExterns.count(externName)) out.localExterns[externName] = std::map<std::string, int>(); for (unsigned i = 0; i < al.args.size(); i++) { if (al.args[i].val == ":") { std::string v = al.args[i].args[0].val; std::string sig = al.args[i].args[1].val; out.globalExterns[v] = i; out.globalExternSigs[v] = sig; out.localExterns[externName][v] = i; out.localExternSigs[externName][v] = sig; } else { std::string v = al.args[i].val; out.globalExterns[v] = i; out.globalExternSigs[v] = ""; out.localExterns[externName][v] = i; out.localExternSigs[externName][v] = ""; } } } // Custom macros else if (obj.val == "macro" || (obj.val == "fun" && obj.args[0].val == "macro")) { // Rules for valid macros: // // There are only four categories of valid macros: // // 1. a macro where the outer function is something // which is NOT an existing valid function/extern/datum // 2. a macro of the form set(c(x), d) where c must NOT // be an existing valid function/extern/datum // 3. something of the form access(c(x)), where c must NOT // be an existing valid function/extern/datum // 4. something of the form set(access(c(x)), d) where c must // NOT be an existing valid function/extern/datum // 5. something of the form with(c(x), d, e) where c must // NOT be an existing valid function/extern/datum bool valid = false; Node pattern; Node substitution; int priority; // Priority not set: default zero if (obj.val == "macro") { pattern = obj.args[0]; substitution = obj.args[1]; priority = 0; } // Specified priority else { pattern = obj.args[1]; substitution = obj.args[2]; if (obj.args[0].args.size()) priority = dtu(obj.args[0].args[0].val); else priority = 0; } if (opcode(pattern.val) < 0 && !isValidFunctionName(pattern.val)) valid = true; if (pattern.val == "set" && opcode(pattern.args[0].val) < 0 && !isValidFunctionName(pattern.args[0].val)) valid = true; if (pattern.val == "access" && opcode(pattern.args[0].val) < 0 && !isValidFunctionName(pattern.args[0].val)) if (pattern.val == "set" && pattern.args[0].val == "access" && opcode(pattern.args[0].args[0].val) < 0 && !isValidFunctionName(pattern.args[0].args[0].val)) valid = true; if (pattern.val == "with" && opcode(pattern.args[0].val) < 0 && !isValidFunctionName(pattern.args[0].val)) valid = true; if (valid) { if (!out.customMacros.count(priority)) out.customMacros[priority] = rewriteRuleSet(); out.customMacros[priority].addRule (rewriteRule(pattern, substitution)); } else warn("Macro does not fit valid template: "+printSimple(pattern), m); } // Variable types else if (obj.val == "type") { std::string typeName = obj.args[0].val; std::vector<Node> vars = obj.args[1].args; for (unsigned i = 0; i < vars.size(); i++) out.types[vars[i].val] = typeName; } // Storage variables/structures else if (obj.val == "data") { out.storageVars = getStorageVars(out.storageVars, obj.args[0], "", storageDataCount); storageDataCount += 1; } else any.push_back(obj); } // Set up top-level AST structure std::vector<Node> main; if (shared.args.size()) main.push_back(shared); if (init.args.size()) main.push_back(init); std::vector<Node> code; if (shared.args.size()) code.push_back(shared); for (unsigned i = 0; i < any.size(); i++) code.push_back(any[i]); for (unsigned i = 0; i < functions.size(); i++) code.push_back(functions[i]); Node codeNode; if (functions.size() > 0) { codeNode = astnode("with", token("__funid", m), astnode("byte", token("0", m), astnode("calldataload", token("0", m), m), m), astnode("seq", code, m), m); } else codeNode = astnode("seq", code, m); main.push_back(astnode("~return", token("0", m), astnode("lll", codeNode, token("0", m), m), m)); Node result; if (main.size() == 1) result = main[0]; else result = astnode("seq", main, inp.metadata); return preprocessResult(result, out); }