std::string generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings, unsigned int simd_width) const { std::string process_str; utils::kernel_generation_stream stream; std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; generate_prototype(stream, kernel_prefix, "unsigned int M, unsigned int N,", mappings, statements); stream << "{" << std::endl; stream.inc_tab(); tree_parsing::process(stream, PARENT_NODE_TYPE, "scalar", "#scalartype #namereg = *#pointer;", statements, mappings); tree_parsing::process(stream, PARENT_NODE_TYPE, "matrix", "#pointer += $OFFSET{#start1, #start2};", statements, mappings); tree_parsing::process(stream, PARENT_NODE_TYPE, "vector", "#pointer += #start;", statements, mappings); fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)"); stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); fetching_loop_info(p_.fetching_policy, "N", stream, init1, upper_bound1, inc1, "get_global_id(1)", "get_global_size(1)"); stream << "for(unsigned int j = " << init1 << "; j < " << upper_bound1 << "; j += " << inc1 << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); process_str = utils::append_width("#scalartype",simd_width) + " #namereg = " + vload(simd_width, "$OFFSET{i*#stride1,j*#stride2}", "#pointer")+ ";"; tree_parsing::process(stream, PARENT_NODE_TYPE, "matrix", process_str, statements, mappings); tree_parsing::process(stream, PARENT_NODE_TYPE, "vector_diag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:#pointer[min(i*#stride, j*#stride)];", statements, mappings); std::map<std::string, std::string> accessors; accessors["matrix"] = "#namereg"; accessors["vector_diag"] = "#namereg"; accessors["scalar"] = "#namereg"; tree_parsing::evaluate(stream, PARENT_NODE_TYPE, accessors, statements, mappings); process_str = vstore(simd_width, "#namereg", "$OFFSET{i*#stride1,j*#stride2}", "#pointer")+";"; tree_parsing::process(stream, LHS_NODE_TYPE, "matrix", process_str, statements, mappings); stream.dec_tab(); stream << "}" << std::endl; stream.dec_tab(); stream << "}" << std::endl; stream.dec_tab(); stream << "}" << std::endl; return stream.str(); }
/** * \brief getVertices, get vertices for desired string, * this could either be created or reusing cached vertices * depends in the input parameters given * \note use releaseVertices when done * @param str, string that we want to get or create vertex buffer from * @param bUseCache, flag use cached vertexbuffer true=yes (reuse), false=no(recreate a new vertex buffer) * @param width, output result width of the whole string in vertex buffer. * @return glm::vert4* returns vertex buffer */ glm::vec4 *RenderText::getVertices(const char *str, bool bUseCache, float *width) { glm::vec4 *vertices = 0; int num = 0; if (bUseCache) { std::string key = str; num = key.size(); TextCacheTable::iterator it = mTextCache.find(key); if (it == mTextCache.end()) { // doesn't exists in tabler, cache it. vertices = new glm::vec4[6*num]; // create vertex array if(!vertices) // check if memory alocation was ok maPanic(1,"RenderText: Failed to allocate vertex buffer"); *width = mFont->BuildVertexArray(vertices, key.c_str(), mOPos.x, mOPos.y, mScaleX, mScaleY); // get vertex array from string, glm::vec2 scaleXZ(mScaleX,mScaleY); VertStore vstore(vertices,*width,scaleXZ); if ( (mTextCache.insert(TextCachePair(key,vstore))).second == false ) { // warn in log double insertion failed to insert this one. lprintfln("RenderText::TextCache insertion double entry detected of key=\"%s\"",key.c_str()); } } else // exists re use vertex buffer from table { VertStore &vstore = it->second; vertices = static_cast<glm::vec4 *>(vstore.mVertices); *width = vstore.mWidth; // may need to check current scale state or restore system with it. mScaleX = vstore.mScaleXZ.x; mScaleY = vstore.mScaleXZ.y; } } else // Not using cache create one { num = strlen(str); // get number chars vertices = new glm::vec4[6*num]; if(!vertices) maPanic(1,"RenderText: Failed to allocate vertex buffer"); *width = mFont->BuildVertexArray(vertices, str, mOPos.x, mOPos.y, mScaleX, mScaleY); // get vertex array from string, } return vertices; }
void getcol(int c, double* x, double** bb, int nth) { int ii = c/BL; int r = c%BL; int id = ii%nth; #if TILE_SCHEME == 0 int mynbl = (ii-id+nth-1)/nth; size_t t = mynbl*((2*id+2+nth*(mynbl-1))/2)*BL*BL; #else size_t t = 0; for(int I=id, i=id*BL; I<ii; I+=nth, i+=nth*BL) t += (i+BL+RJ-1)/RJ*RJ*BL; #endif double* b = bb[id] + t + r*RJ; for(int j=0; j<=c; j+=RJ){ vstore(x, vload(b)); x += RJ; b += RJ*BL; } }
static int crypt_all(int *pcount, struct db_salt *salt) { int count = *pcount; int index = 0; #ifdef _OPENMP #pragma omp parallel for for (index = 0; index < count; index += VWIDTH) #endif { int i; vtype a, b, c, d, e, f, g, h; vtype w[80], tmp1, tmp2; for (i = 0; i < 14; i += 2) { GATHER(tmp1, saved_key, i); GATHER(tmp2, saved_key, i + 1); vswap64(tmp1); vswap64(tmp2); w[i] = tmp1; w[i + 1] = tmp2; } GATHER(tmp1, saved_key, 14); vswap64(tmp1); w[14] = tmp1; GATHER(w[15], saved_key, 15); for (i = 16; i < 80; i++) R(i); a = vset1_epi64x(0x6a09e667f3bcc908ULL); b = vset1_epi64x(0xbb67ae8584caa73bULL); c = vset1_epi64x(0x3c6ef372fe94f82bULL); d = vset1_epi64x(0xa54ff53a5f1d36f1ULL); e = vset1_epi64x(0x510e527fade682d1ULL); f = vset1_epi64x(0x9b05688c2b3e6c1fULL); g = vset1_epi64x(0x1f83d9abfb41bd6bULL); h = vset1_epi64x(0x5be0cd19137e2179ULL); SHA512_STEP(a, b, c, d, e, f, g, h, 0, 0x428a2f98d728ae22ULL); SHA512_STEP(h, a, b, c, d, e, f, g, 1, 0x7137449123ef65cdULL); SHA512_STEP(g, h, a, b, c, d, e, f, 2, 0xb5c0fbcfec4d3b2fULL); SHA512_STEP(f, g, h, a, b, c, d, e, 3, 0xe9b5dba58189dbbcULL); SHA512_STEP(e, f, g, h, a, b, c, d, 4, 0x3956c25bf348b538ULL); SHA512_STEP(d, e, f, g, h, a, b, c, 5, 0x59f111f1b605d019ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 6, 0x923f82a4af194f9bULL); SHA512_STEP(b, c, d, e, f, g, h, a, 7, 0xab1c5ed5da6d8118ULL); SHA512_STEP(a, b, c, d, e, f, g, h, 8, 0xd807aa98a3030242ULL); SHA512_STEP(h, a, b, c, d, e, f, g, 9, 0x12835b0145706fbeULL); SHA512_STEP(g, h, a, b, c, d, e, f, 10, 0x243185be4ee4b28cULL); SHA512_STEP(f, g, h, a, b, c, d, e, 11, 0x550c7dc3d5ffb4e2ULL); SHA512_STEP(e, f, g, h, a, b, c, d, 12, 0x72be5d74f27b896fULL); SHA512_STEP(d, e, f, g, h, a, b, c, 13, 0x80deb1fe3b1696b1ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 14, 0x9bdc06a725c71235ULL); SHA512_STEP(b, c, d, e, f, g, h, a, 15, 0xc19bf174cf692694ULL); SHA512_STEP(a, b, c, d, e, f, g, h, 16, 0xe49b69c19ef14ad2ULL); SHA512_STEP(h, a, b, c, d, e, f, g, 17, 0xefbe4786384f25e3ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 18, 0x0fc19dc68b8cd5b5ULL); SHA512_STEP(f, g, h, a, b, c, d, e, 19, 0x240ca1cc77ac9c65ULL); SHA512_STEP(e, f, g, h, a, b, c, d, 20, 0x2de92c6f592b0275ULL); SHA512_STEP(d, e, f, g, h, a, b, c, 21, 0x4a7484aa6ea6e483ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 22, 0x5cb0a9dcbd41fbd4ULL); SHA512_STEP(b, c, d, e, f, g, h, a, 23, 0x76f988da831153b5ULL); SHA512_STEP(a, b, c, d, e, f, g, h, 24, 0x983e5152ee66dfabULL); SHA512_STEP(h, a, b, c, d, e, f, g, 25, 0xa831c66d2db43210ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 26, 0xb00327c898fb213fULL); SHA512_STEP(f, g, h, a, b, c, d, e, 27, 0xbf597fc7beef0ee4ULL); SHA512_STEP(e, f, g, h, a, b, c, d, 28, 0xc6e00bf33da88fc2ULL); SHA512_STEP(d, e, f, g, h, a, b, c, 29, 0xd5a79147930aa725ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 30, 0x06ca6351e003826fULL); SHA512_STEP(b, c, d, e, f, g, h, a, 31, 0x142929670a0e6e70ULL); SHA512_STEP(a, b, c, d, e, f, g, h, 32, 0x27b70a8546d22ffcULL); SHA512_STEP(h, a, b, c, d, e, f, g, 33, 0x2e1b21385c26c926ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 34, 0x4d2c6dfc5ac42aedULL); SHA512_STEP(f, g, h, a, b, c, d, e, 35, 0x53380d139d95b3dfULL); SHA512_STEP(e, f, g, h, a, b, c, d, 36, 0x650a73548baf63deULL); SHA512_STEP(d, e, f, g, h, a, b, c, 37, 0x766a0abb3c77b2a8ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 38, 0x81c2c92e47edaee6ULL); SHA512_STEP(b, c, d, e, f, g, h, a, 39, 0x92722c851482353bULL); SHA512_STEP(a, b, c, d, e, f, g, h, 40, 0xa2bfe8a14cf10364ULL); SHA512_STEP(h, a, b, c, d, e, f, g, 41, 0xa81a664bbc423001ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 42, 0xc24b8b70d0f89791ULL); SHA512_STEP(f, g, h, a, b, c, d, e, 43, 0xc76c51a30654be30ULL); SHA512_STEP(e, f, g, h, a, b, c, d, 44, 0xd192e819d6ef5218ULL); SHA512_STEP(d, e, f, g, h, a, b, c, 45, 0xd69906245565a910ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 46, 0xf40e35855771202aULL); SHA512_STEP(b, c, d, e, f, g, h, a, 47, 0x106aa07032bbd1b8ULL); SHA512_STEP(a, b, c, d, e, f, g, h, 48, 0x19a4c116b8d2d0c8ULL); SHA512_STEP(h, a, b, c, d, e, f, g, 49, 0x1e376c085141ab53ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 50, 0x2748774cdf8eeb99ULL); SHA512_STEP(f, g, h, a, b, c, d, e, 51, 0x34b0bcb5e19b48a8ULL); SHA512_STEP(e, f, g, h, a, b, c, d, 52, 0x391c0cb3c5c95a63ULL); SHA512_STEP(d, e, f, g, h, a, b, c, 53, 0x4ed8aa4ae3418acbULL); SHA512_STEP(c, d, e, f, g, h, a, b, 54, 0x5b9cca4f7763e373ULL); SHA512_STEP(b, c, d, e, f, g, h, a, 55, 0x682e6ff3d6b2b8a3ULL); SHA512_STEP(a, b, c, d, e, f, g, h, 56, 0x748f82ee5defb2fcULL); SHA512_STEP(h, a, b, c, d, e, f, g, 57, 0x78a5636f43172f60ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 58, 0x84c87814a1f0ab72ULL); SHA512_STEP(f, g, h, a, b, c, d, e, 59, 0x8cc702081a6439ecULL); SHA512_STEP(e, f, g, h, a, b, c, d, 60, 0x90befffa23631e28ULL); SHA512_STEP(d, e, f, g, h, a, b, c, 61, 0xa4506cebde82bde9ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 62, 0xbef9a3f7b2c67915ULL); SHA512_STEP(b, c, d, e, f, g, h, a, 63, 0xc67178f2e372532bULL); SHA512_STEP(a, b, c, d, e, f, g, h, 64, 0xca273eceea26619cULL); SHA512_STEP(h, a, b, c, d, e, f, g, 65, 0xd186b8c721c0c207ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 66, 0xeada7dd6cde0eb1eULL); SHA512_STEP(f, g, h, a, b, c, d, e, 67, 0xf57d4f7fee6ed178ULL); SHA512_STEP(e, f, g, h, a, b, c, d, 68, 0x06f067aa72176fbaULL); SHA512_STEP(d, e, f, g, h, a, b, c, 69, 0x0a637dc5a2c898a6ULL); SHA512_STEP(c, d, e, f, g, h, a, b, 70, 0x113f9804bef90daeULL); SHA512_STEP(b, c, d, e, f, g, h, a, 71, 0x1b710b35131c471bULL); SHA512_STEP(a, b, c, d, e, f, g, h, 72, 0x28db77f523047d84ULL); SHA512_STEP(h, a, b, c, d, e, f, g, 73, 0x32caab7b40c72493ULL); SHA512_STEP(g, h, a, b, c, d, e, f, 74, 0x3c9ebe0a15c9bebcULL); SHA512_STEP(f, g, h, a, b, c, d, e, 75, 0x431d67c49c100d4cULL); SHA512_STEP(e, f, g, h, a, b, c, d, 76, 0x4cc5d4becb3e42b6ULL); SHA512_STEP(d, e, f, g, h, a, b, c, 77, 0x597f299cfc657e2aULL); SHA512_STEP(c, d, e, f, g, h, a, b, 78, 0x5fcb6fab3ad6faecULL); SHA512_STEP(b, c, d, e, f, g, h, a, 79, 0x6c44198c4a475817ULL); vstore((vtype*) &crypt_key[0][index], a); vstore((vtype*) &crypt_key[1][index], b); vstore((vtype*) &crypt_key[2][index], c); vstore((vtype*) &crypt_key[3][index], d); vstore((vtype*) &crypt_key[4][index], e); vstore((vtype*) &crypt_key[5][index], f); vstore((vtype*) &crypt_key[6][index], g); vstore((vtype*) &crypt_key[7][index], h); } return count; }
/* * Compute the symbolic value of expression of 's', and update * anything it defines in the value table 'val'. If 'alter' is true, * do various optimizations. This code would be cleaner if symbolic * evaluation and code transformations weren't folded together. */ static void opt_stmt(struct stmt *s, int val[], int alter) { int op; int v; switch (s->code) { case BPF_LD|BPF_ABS|BPF_W: case BPF_LD|BPF_ABS|BPF_H: case BPF_LD|BPF_ABS|BPF_B: v = F(s->code, s->k, 0L); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LD|BPF_IND|BPF_W: case BPF_LD|BPF_IND|BPF_H: case BPF_LD|BPF_IND|BPF_B: v = val[X_ATOM]; if (alter && vmap[v].is_const) { s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); s->k += vmap[v].const_val; v = F(s->code, s->k, 0L); done = 0; } else v = F(s->code, s->k, v); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LD|BPF_LEN: v = F(s->code, 0L, 0L); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LD|BPF_IMM: v = K(s->k); vstore(s, &val[A_ATOM], v, alter); break; case BPF_LDX|BPF_IMM: v = K(s->k); vstore(s, &val[X_ATOM], v, alter); break; case BPF_LDX|BPF_MSH|BPF_B: v = F(s->code, s->k, 0L); vstore(s, &val[X_ATOM], v, alter); break; case BPF_ALU|BPF_NEG: if (alter && vmap[val[A_ATOM]].is_const) { s->code = BPF_LD|BPF_IMM; s->k = -vmap[val[A_ATOM]].const_val; val[A_ATOM] = K(s->k); } else val[A_ATOM] = F(s->code, val[A_ATOM], 0L); break; case BPF_ALU|BPF_ADD|BPF_K: case BPF_ALU|BPF_SUB|BPF_K: case BPF_ALU|BPF_MUL|BPF_K: case BPF_ALU|BPF_DIV|BPF_K: case BPF_ALU|BPF_AND|BPF_K: case BPF_ALU|BPF_OR|BPF_K: case BPF_ALU|BPF_LSH|BPF_K: case BPF_ALU|BPF_RSH|BPF_K: op = BPF_OP(s->code); if (alter) { if (s->k == 0) { /* don't optimize away "sub #0" * as it may be needed later to * fixup the generated math code */ if (op == BPF_ADD || op == BPF_LSH || op == BPF_RSH || op == BPF_OR) { s->code = NOP; break; } if (op == BPF_MUL || op == BPF_AND) { s->code = BPF_LD|BPF_IMM; val[A_ATOM] = K(s->k); break; } } if (vmap[val[A_ATOM]].is_const) { fold_op(s, val[A_ATOM], K(s->k)); val[A_ATOM] = K(s->k); break; } } val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); break; case BPF_ALU|BPF_ADD|BPF_X: case BPF_ALU|BPF_SUB|BPF_X: case BPF_ALU|BPF_MUL|BPF_X: case BPF_ALU|BPF_DIV|BPF_X: case BPF_ALU|BPF_AND|BPF_X: case BPF_ALU|BPF_OR|BPF_X: case BPF_ALU|BPF_LSH|BPF_X: case BPF_ALU|BPF_RSH|BPF_X: op = BPF_OP(s->code); if (alter && vmap[val[X_ATOM]].is_const) { if (vmap[val[A_ATOM]].is_const) { fold_op(s, val[A_ATOM], val[X_ATOM]); val[A_ATOM] = K(s->k); } else { s->code = BPF_ALU|BPF_K|op; s->k = vmap[val[X_ATOM]].const_val; done = 0; val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); } break; } /* * Check if we're doing something to an accumulator * that is 0, and simplify. This may not seem like * much of a simplification but it could open up further * optimizations. * XXX We could also check for mul by 1, etc. */ if (alter && vmap[val[A_ATOM]].is_const && vmap[val[A_ATOM]].const_val == 0) { if (op == BPF_ADD || op == BPF_OR) { s->code = BPF_MISC|BPF_TXA; vstore(s, &val[A_ATOM], val[X_ATOM], alter); break; } else if (op == BPF_MUL || op == BPF_DIV || op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { s->code = BPF_LD|BPF_IMM; s->k = 0; vstore(s, &val[A_ATOM], K(s->k), alter); break; } else if (op == BPF_NEG) { s->code = NOP; break; } } val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); break; case BPF_MISC|BPF_TXA: vstore(s, &val[A_ATOM], val[X_ATOM], alter); break; case BPF_LD|BPF_MEM: v = val[s->k]; if (alter && vmap[v].is_const) { s->code = BPF_LD|BPF_IMM; s->k = vmap[v].const_val; done = 0; } vstore(s, &val[A_ATOM], v, alter); break; case BPF_MISC|BPF_TAX: vstore(s, &val[X_ATOM], val[A_ATOM], alter); break; case BPF_LDX|BPF_MEM: v = val[s->k]; if (alter && vmap[v].is_const) { s->code = BPF_LDX|BPF_IMM; s->k = vmap[v].const_val; done = 0; } vstore(s, &val[X_ATOM], v, alter); break; case BPF_ST: vstore(s, &val[s->k], val[A_ATOM], alter); break; case BPF_STX: vstore(s, &val[s->k], val[X_ATOM], alter); break; } }
// Generate function call. The function address is pushed first, then // all the parameters in call order. This function pops all the // parameters and the function address. void gfunc_call(int nb_args) { int size, align, r, args_size, i, func_call, v; Sym *func_sym; args_size = 0; for (i = 0; i < nb_args; i++) { if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { size = type_size(&vtop->type, &align); // Align to stack align size size = (size + 3) & ~3; // Allocate the necessary size on stack oad(0xec81, size); // sub $xxx, %esp // Generate structure store r = get_reg(RC_INT); o(0x89); // mov %esp, r o(0xe0 + r); vset(&vtop->type, r | VT_LVAL, 0); vswap(); vstore(); args_size += size; } else if (is_float(vtop->type.t)) { gv(RC_FLOAT); // Only one float register if ((vtop->type.t & VT_BTYPE) == VT_FLOAT) { size = 4; } else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) { size = 8; } else { size = 12; } oad(0xec81, size); // sub $xxx, %esp if (size == 12) { o(0x7cdb); } else { o(0x5cd9 + size - 4); // fstp[s|l] 0(%esp) } g(0x24); g(0x00); args_size += size; } else { // Simple type (currently always same size) // TODO: implicit cast? v = vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM); if (v == VT_CONST || v == (VT_CONST | VT_SYM)) { // Push constant if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { size = 8; if (vtop->c.word[1] == (char) vtop->c.word[1]) { g(0x6a); // push imm8 g(vtop->c.word[1]); } else { g(0x68); // push imm32 gen_le32(vtop->c.word[1]); } } else { size = 4; } if ((v & VT_SYM) == 0 && vtop->c.i == (char) vtop->c.i) { g(0x6a); // push imm8 g(vtop->c.i); } else { g(0x68); // push imm32 gen_addr32(v, vtop->sym, vtop->c.i); } } else { r = gv(RC_INT); if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { size = 8; o(0x50 + vtop->r2); // push r2 } else { size = 4; } o(0x50 + r); // push r } args_size += size; } vtop--; } save_regs(0); // Save used temporary registers func_sym = vtop->type.ref; func_call = FUNC_CALL(func_sym->r); // fast call case if ((func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) || func_call == FUNC_FASTCALLW) { int fastcall_nb_regs; uint8_t *fastcall_regs_ptr; if (func_call == FUNC_FASTCALLW) { fastcall_regs_ptr = fastcallw_regs; fastcall_nb_regs = 2; } else { fastcall_regs_ptr = fastcall_regs; fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1; } for (i = 0; i < fastcall_nb_regs; i++) { if (args_size <= 0) break; o(0x58 + fastcall_regs_ptr[i]); // pop r // TODO: incorrect for struct/floats args_size -= 4; } } gcall_or_jmp(0); if (args_size && func_call != FUNC_STDCALL) gadd_sp(args_size); vtop--; }
/* Generate function call. The function address is pushed first, then all the parameters in call order. This functions pops all the parameters and the function address. */ void gfunc_call(int nb_args) { int size, align, r, args_size, i, func_call; Sym *func_sym; args_size = 0; for(i = 0;i < nb_args; i++) { if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { size = type_size(&vtop->type, &align); /* align to stack align size */ size = (size + 3) & ~3; /* allocate the necessary size on stack */ oad(0xec81, size); /* sub $xxx, %esp */ /* generate structure store */ r = get_reg(RC_INT); o(0x89); /* mov %esp, r */ o(0xe0 + r); vset(&vtop->type, r | VT_LVAL, 0); vswap(); vstore(); args_size += size; } else if (is_float(vtop->type.t)) { gv(RC_FLOAT); /* only one float register */ if ((vtop->type.t & VT_BTYPE) == VT_FLOAT) size = 4; else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) size = 8; else size = 12; oad(0xec81, size); /* sub $xxx, %esp */ if (size == 12) o(0x7cdb); else o(0x5cd9 + size - 4); /* fstp[s|l] 0(%esp) */ g(0x24); g(0x00); args_size += size; } else { /* simple type (currently always same size) */ /* XXX: implicit cast ? */ r = gv(RC_INT); if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { size = 8; o(0x50 + vtop->r2); /* push r */ } else { size = 4; } o(0x50 + r); /* push r */ args_size += size; } vtop--; } save_regs(0); /* save used temporary registers */ func_sym = vtop->type.ref; func_call = FUNC_CALL(func_sym->r); /* fast call case */ if ((func_call >= FUNC_FASTCALL1 && func_call <= FUNC_FASTCALL3) || func_call == FUNC_FASTCALLW) { int fastcall_nb_regs; uint8_t *fastcall_regs_ptr; if (func_call == FUNC_FASTCALLW) { fastcall_regs_ptr = fastcallw_regs; fastcall_nb_regs = 2; } else { fastcall_regs_ptr = fastcall_regs; fastcall_nb_regs = func_call - FUNC_FASTCALL1 + 1; } for(i = 0;i < fastcall_nb_regs; i++) { if (args_size <= 0) break; o(0x58 + fastcall_regs_ptr[i]); /* pop r */ /* XXX: incorrect for struct/floats */ args_size -= 4; } } gcall_or_jmp(0); if (args_size && func_call != FUNC_STDCALL) gadd_sp(args_size); vtop--; }
f128tuple histohit(f128tuple xyvec, const colorset pointcolors[4], const i32 th_id){ // don't plot the first 20 iterations if(threadHits[th_id]++ > 20){ f128 xarr = xyvec.x; f128 yarr = xyvec.y; // check to see if any points have escaped to infinity or are NaN // if they have, reset them and the threadHits counter to 0 if(vvalid(xarr.v) && vvalid(yarr.v)){ // scale the points from fractal-space to histogram-space f128 scaledX; scaledX.v = vadd( vmul( vadd(xarr.v, xoffsetvec), hwidShrunkvec), halfhwidvec); f128 scaledY; scaledY.v = vadd( vmul( vadd(yarr.v, yoffsetvec), hheiShrunkvec), halfhheivec); // extract each point and plot for (i32 i = 0; i < FLOATS_PER_VECTOR_REGISTER; i++){ const u32 ix = scaledX.f[i]; const u32 iy = scaledY.f[i]; if(ix < hwid && iy < hhei){ const u64 cell = ix + (iy * hwid); // lock the row omp_set_lock(&(locks[iy])); // load the existing data // the cache miss here takes up maybe 2/3s of the program's execution time __m128 histocolor = vload((float *)&(h[cell])); // add the new color to the old histocolor = vadd( histocolor, pointcolors[i].vec); // write back vstore((float *)&(h[cell]), histocolor); ++goodHits; // unlock the cell omp_unset_lock(&(locks[iy])); } else { ++missHits; } } } else { ++badHits; threadHits[th_id] = 0; xyvec.x = zerovec; xyvec.y = zerovec; } } return xyvec; }
void Environment::run(RunningContext *context) { const Instruction &ins = m_ipos; m_context = context; if (!context) { m_running = false; } else if (!m_running) { clear_vstore(); TRACE_PRINTF(TRACE_VM, TRACE_INFO, "Entering running state with %p\n", context); m_running = true; const SourcePos *last_pos = NULL; // used for debug output below. const Instruction *ipos; while (m_context && (ipos = m_context->next())) { size_t output = -1; size_t expected = -1; # define CHECK_STACK(in, out) TRACE_OUT(TRACE_VM, TRACE_SPAM) {\ assert(m_context->stack_count() >= (size_t)(in)); \ if ((out) != -1) { assert((long)(expected = m_context->stack_count() - (size_t)(in) + (size_t)(out)) >= 0); } \ tprintf("Stack info: before(%d), in(%d), out(%d), expected(%d)\n", (int)m_context->stack_count(), (int)(in), (int)(out), (int)expected); \ output = (out); \ } m_ipos = *ipos; TRACE_DO(TRACE_VM, TRACE_INFO) { const SourcePos &pos = m_context->m_instructions->source_pos(ipos); if (!last_pos || *last_pos != pos) tprintf("Source Position: %s:%d:%d (%s)\n", pos.file.c_str(), (int)pos.line_num, (int)pos.column, pos.annotation.c_str()); last_pos = &pos; } TRACE_OUT(TRACE_VM, TRACE_SPAM) { tprintf("Instruction: %s@%d\n", inspect(ins).c_str(), (int)(ipos - &*m_context->instructions().begin()) ); tprintf("Stack: %d deep", (int)m_context->stack_count()); const Value *it; for (it = m_context->stack_begin(); it != m_context->stack_pos(); it++) { tprintf("\n %s", inspect(*it).c_str()); } tprintf(" <--\n"); } switch(ins.instruction) { case NOP: CHECK_STACK(0, 0); break; case DEBUGGER: CHECK_STACK(0, 0); { String *action = ptr<String>(ins.arg1); if (*action == "interrupt") { DO_DEBUG __asm__("int3"); } else if (*action == "trace_enable") { trace_mute = false; } else if (*action == "trace_disable") { trace_mute = true; } } break; case POP: CHECK_STACK(1, 0); m_context->pop(); break; case PUSH: CHECK_STACK(0, 1); m_context->push(ins.arg1); break; case SWAP: { CHECK_STACK(2, 2); Value tmp1 = m_context->top(); m_context->pop(); Value tmp2 = m_context->top(); m_context->pop(); m_context->push(tmp1); m_context->push(tmp2); } break; case DUP_TOP: CHECK_STACK(1, 2); m_context->push(m_context->top()); break; case IS: { CHECK_STACK(1, 1); Value res = bvalue(ins.arg1 == m_context->top()); m_context->pop(); m_context->push(res); } break; case IS_EQ: { CHECK_STACK(2, 1); Value first = m_context->top(); m_context->pop(); Value second = m_context->top(); m_context->pop(); m_context->push(bvalue(first == second)); } break; case IS_NOT: { CHECK_STACK(1, 1); Value res = bvalue(ins.arg1 != m_context->top()); m_context->pop(); m_context->push(res); } break; case IS_NOT_EQ:{ CHECK_STACK(2, 1); Value first = m_context->top(); m_context->pop(); Value second = m_context->top(); m_context->pop(); m_context->push(bvalue(first != second)); } break; case JMP: CHECK_STACK(0, 0); GC::safe_point(); m_context->jump(ins.cache.machine_num); break; case JMP_IF: CHECK_STACK(1, 0); GC::safe_point(); if (is_truthy(m_context->top())) m_context->jump(ins.cache.machine_num); m_context->pop(); break; case JMP_IF_NOT: CHECK_STACK(1, 0); GC::safe_point(); if (!is_truthy(m_context->top())) m_context->jump(ins.cache.machine_num); m_context->pop(); break; case LEXICAL_CLEAN_SCOPE: { CHECK_STACK(0, 0); VariableFrame *frame = new_variable_frame(m_context->m_instructions); m_context->new_scope(frame); } break; case LEXICAL_LINKED_SCOPE: { CHECK_STACK(0, 0); VariableFrame *frame = new_variable_frame(m_context->m_instructions); frame->link_frame(m_context->m_lexicalvars); m_context->new_scope(frame); } break; case FRAME_GET: { CHECK_STACK(0, 1); size_t frameid = (size_t)ins.cache.machine_num; TRACE_PRINTF(TRACE_VM, TRACE_SPAM, "Getting frame var %s (%d)\n", ptr<String>(ins.arg1)->c_str(), (int)frameid); m_context->push(m_context->get_framevar(frameid)); } break; case FRAME_SET: { CHECK_STACK(1, 0); size_t frameid = (size_t)ins.cache.machine_num; TRACE_PRINTF(TRACE_VM, TRACE_SPAM, "Setting frame var %s (%d) to: %s\n", ptr<String>(ins.arg1)->c_str(), (int)frameid, inspect(m_context->top()).c_str()); m_context->set_framevar(frameid, m_context->top()); m_context->pop(); } break; case LOCAL_GET: { CHECK_STACK(0, 1); size_t localid = (size_t)ins.cache.machine_num; TRACE_PRINTF(TRACE_VM, TRACE_SPAM, "Getting local var %s (%d)\n", ptr<String>(ins.arg1)->c_str(), (int)localid); m_context->push(m_context->get_localvar(localid)); } break; case LOCAL_SET: { CHECK_STACK(1, 0); size_t localid = (size_t)ins.cache.machine_num; TRACE_PRINTF(TRACE_VM, TRACE_SPAM, "Setting local var %s (%d) to: %s\n", ptr<String>(ins.arg1)->c_str(), (int)localid, inspect(m_context->top()).c_str()); m_context->set_localvar(localid, m_context->top()); m_context->pop(); } break; case LEXICAL_GET: { CHECK_STACK(0, 1); size_t depth = ins.arg1.machine_num; size_t localid = (size_t)ins.cache.machine_num; TRACE_PRINTF(TRACE_VM, TRACE_SPAM, "lexical_get %u@%u: %i\n", (unsigned)localid, (unsigned)depth, type(m_context->get_lexicalvar(localid, depth))); if (depth == 0) m_context->push(m_context->get_lexicalvar(localid)); else m_context->push(m_context->get_lexicalvar(localid, depth)); } break; case LEXICAL_SET: { CHECK_STACK(1, 0); size_t depth = ins.arg1.machine_num; size_t localid = (size_t)ins.cache.machine_num; TRACE_PRINTF(TRACE_VM, TRACE_SPAM, "lexical_set %u@%u: %i\n", (unsigned)localid, (unsigned)depth, type(m_context->top())); if (depth == 0) m_context->set_lexicalvar(localid, m_context->top()); else m_context->set_lexicalvar(localid, depth, m_context->top()); m_context->pop(); } break; case CHANNEL_NEW: { CHECK_STACK(0, 1); const Value &octx = vstore(value(m_context)); RunnableContext *nctx = new_context(octx); nctx->jump(ins.cache.machine_num); m_context->push(value(nctx)); clear_vstore(); } break; case CHANNEL_SPECIAL: CHECK_STACK(0, 1); m_context->push(special_channel(*ptr<String>(ins.arg1))); break; case CHANNEL_SEND: { CHECK_STACK(3, -1); GC::safe_point(); const Value &val = vstore(m_context->top()); m_context->pop(); const Value &ret = vstore(m_context->top()); m_context->pop(); const Value &channel = vstore(m_context->top()); m_context->pop(); // A context that's been channel_sended from should never be returned to, // so invalidate it. m_context->invalidate(); channel_send(this, channel, val, ret); clear_vstore(); } break; case CHANNEL_CALL:{ CHECK_STACK(2, -1); GC::safe_point(); const Value &val = vstore(m_context->top()); m_context->pop(); const Value &channel = vstore(m_context->top()); m_context->pop(); const Value &ret = vstore(value(m_context)); channel_send(this, channel, val, ret); clear_vstore(); } break; case CHANNEL_RET:{ CHECK_STACK(2, -1); const Value &val = vstore(m_context->top()); m_context->pop(); const Value &channel = vstore(m_context->top()); m_context->pop(); // A context that's been channel_reted from should never be returned to, // so invalidate it. m_context->invalidate(); channel_send(this, channel, val, value(&no_return_ctx)); clear_vstore(); } break; case MESSAGE_NEW: { long long id = ins.cache.machine_num; long long sysarg_count = ins.arg2.machine_num, sysarg_counter = sysarg_count - 1; long long arg_count = ins.arg3.machine_num, arg_counter = arg_count - 1; CHECK_STACK(sysarg_count + arg_count, 1); Message *msg = new_message(id, sysarg_count, arg_count); while (arg_count > 0 && arg_counter >= 0) { msg->args()[arg_counter] = m_context->top(); m_context->pop(); --arg_counter; } while (sysarg_count > 0 && sysarg_counter >= 0) { msg->sysargs()[sysarg_counter] = m_context->top(); m_context->pop(); --sysarg_counter; } m_context->push(value(msg)); } break; case MESSAGE_SPLAT: { CHECK_STACK(2, 1); const Tuple &tuple = *ptr<Tuple>(m_context->top()); m_context->pop(); const Message &msg = *ptr<Message>(m_context->top()); m_context->pop(); Message *nmsg = new_message(msg.m_id, msg.sysarg_count(), msg.arg_count() + tuple.size()); std::copy(msg.sysargs(), msg.sysargs_end(), nmsg->sysargs()); std::copy(msg.args(), msg.args_end(), nmsg->args()); std::copy(tuple.begin(), tuple.end(), nmsg->args() + msg.arg_count()); m_context->push(value(nmsg)); } break; case MESSAGE_ADD: { long long count = ins.arg1.machine_num, counter = 0; CHECK_STACK(1 + count, 1); const Message &msg = *ptr<Message>(*(m_context->stack_pos() - 1 - count)); Message *nmsg = new_message(msg.m_id, msg.sysarg_count(), msg.arg_count() + count); std::copy(msg.sysargs(), msg.sysargs_end(), nmsg->sysargs()); std::copy(msg.args(), msg.args_end(), nmsg->args()); Message::iterator out = nmsg->args() + msg.arg_count(); while (count > 0 && counter < count) { *out++ = m_context->top(); m_context->pop(); ++counter; } m_context->pop(); // this is the message we pulled directly off the stack before. m_context->push(value(nmsg)); } break; case MESSAGE_COUNT: CHECK_STACK(1, 2); m_context->push(value((long long)ptr<Message>(m_context->top())->arg_count())); break; case MESSAGE_IS: CHECK_STACK(1, 2); m_context->push(bvalue(ptr<Message>(m_context->top())->m_id == (uint64_t)ins.cache.machine_num)); break; case MESSAGE_IS_PROTO: CHECK_STACK(1, 2); m_context->push(bvalue(ptr<Message>(m_context->top())->protocol_id() == (uint64_t)ins.cache.machine_num)); break; case MESSAGE_ID: { CHECK_STACK(1, 2); Message *msg = ptr<Message>(m_context->top()); m_context->push(value((long long)msg->m_id)); } break; case MESSAGE_SPLIT_ID: { CHECK_STACK(1, 3); Message *msg = ptr<Message>(m_context->top()); m_context->push(value((long long)msg->message_id())); m_context->push(value((long long)msg->protocol_id())); } break; case MESSAGE_CHECK: CHECK_STACK(1, 1); if (!is(m_context->top(), MESSAGE)) { m_context->pop(); m_context->push(False); } break; case MESSAGE_FORWARD: { CHECK_STACK(1, 1); long long id = ins.cache.machine_num; const Message &msg = *ptr<Message>(m_context->top()); Message *nmsg = new_message(id, msg.sysarg_count(), msg.arg_count() + 1); std::copy(msg.sysargs(), msg.sysargs_end(), nmsg->sysargs()); nmsg->args()[0] = value(new_string(msg.name())); std::copy(msg.args(), msg.args_end(), nmsg->args() + 1); m_context->pop(); m_context->push(value(nmsg)); break; } case MESSAGE_SYS_PREFIX: { long long count = ins.arg1.machine_num, counter = 0; CHECK_STACK(1 + count, 1); const Value *sdata = m_context->stack_pos() - 1 - count; const Message &msg = *ptr<Message>(*sdata++); Message *nmsg = new_message(msg.m_id, msg.sysarg_count() + count, msg.arg_count()); Message::iterator to = nmsg->sysargs(); while (counter++ < count) { *to++ = *sdata++; } std::copy(msg.sysargs(), msg.sysargs_end(), to); std::copy(msg.args(), msg.args_end(), nmsg->args()); while (--counter != 0) { m_context->pop(); } m_context->pop(); // message we read off stack before. m_context->push(value(nmsg)); break; } case MESSAGE_SYS_UNPACK: { long long count = ins.arg1.machine_num, pos = count - 1; CHECK_STACK(1, 1 + count); const Message *msg = ptr<Message>(m_context->top()); Message::const_iterator args = msg->sysargs(); long long len = (long long)msg->sysarg_count(); while (pos >= 0) { if (pos >= len) m_context->push(Undef); else m_context->push(args[pos]); pos -= 1; } } break; case MESSAGE_UNPACK: { long long first_count = ins.arg1.machine_num, first_counter = 0; long long splat = ins.arg2.machine_num; long long last_count = ins.arg3.machine_num, last_counter = 0; CHECK_STACK(1, 1 + first_count + last_count + (splat?1:0)); const Message *msg = ptr<Message>(m_context->top()); Message::const_iterator args = msg->args(); long long len = (long long)msg->arg_count(), pos = len - 1; while (last_counter < last_count && pos >= first_count) { if (pos >= len) m_context->push(Nil); else m_context->push(args[pos]); pos -= 1; last_counter += 1; } if (splat != 0) { if (pos >= first_count) { Tuple *splat_tuple = new_tuple(pos - first_count + 1); Tuple::iterator out = splat_tuple->end(); while (pos >= first_count) { --out; *out = args[pos]; pos -= 1; } m_context->push(value(splat_tuple)); } else { m_context->push(value(new_tuple(0))); } } pos = first_count - 1; while (first_counter < first_count && pos >= 0) { if (pos >= len) m_context->push(Nil); else m_context->push(args[pos]); pos -= 1; first_counter += 1; } } break; case STRING_COERCE: { const Value &coerce = ins.arg1; const Value &val = vstore(m_context->top()); if (is(val, STRING)) { CHECK_STACK(1, 2); // push a nil like we called the method. m_context->push(Nil); } else { CHECK_STACK(1, -1); m_context->pop(); channel_send(this, val, value(new_message(coerce)), value(m_context)); } } clear_vstore(); break; case STRING_NEW: { long long count = ins.arg1.machine_num, counter = 0; CHECK_STACK(count, 1); const Value *sp = m_context->stack_pos() - 1; size_t len = 0; while (count > 0 && counter < count) { assert(is(sp[-counter], STRING)); len += ptr<String>(sp[-counter])->length(); ++counter; } String *res = new_string(len); String::iterator out = res->begin(); counter = 0; while (count > 0 && counter < count) { const String *val = ptr<String>(m_context->top()); out = std::copy(val->begin(), val->end(), out); m_context->pop(); ++counter; } m_context->push(value(res)); } break; case TUPLE_NEW: { long long count = ins.arg1.machine_num, counter = 0; CHECK_STACK(count, 1); Tuple *tuple = new_tuple(count); Tuple::iterator out = tuple->begin(); while (count > 0 && counter < count) { *out++ = m_context->top(); m_context->pop(); ++counter; } m_context->push(value(tuple)); } break; case TUPLE_SPLAT: { CHECK_STACK(2, 1); const Tuple *tuple2 = ptr<Tuple>(m_context->top()); m_context->pop(); const Tuple *tuple1 = ptr<Tuple>(m_context->top()); m_context->pop(); Tuple *tuple = join_tuple(tuple1, tuple2); m_context->push(value(tuple)); } break; case TUPLE_UNPACK: { long long first_count = ins.arg1.machine_num, first_counter = 0; long long splat = ins.arg2.machine_num; long long last_count = ins.arg3.machine_num, last_counter = 0; CHECK_STACK(1, 1 + first_count + last_count + (splat?1:0)); const Tuple &tuple = *ptr<Tuple>(m_context->top()); long long len = tuple.size(), pos = tuple.size() - 1; while (last_counter < last_count && pos >= first_count) { if (pos >= len) m_context->push(Nil); else m_context->push(tuple[pos]); pos -= 1; last_counter += 1; } if (splat != 0) { if (pos >= first_count) { Tuple *splat_tuple = new_tuple(pos - first_count + 1); Tuple::iterator out = splat_tuple->end(); while (pos >= first_count) { --out; *out = tuple[pos]; pos -= 1; } m_context->push(value(splat_tuple)); } else { m_context->push(value(new_tuple(0))); } } pos = first_count - 1; while (first_counter < first_count && pos >= 0) { if (pos >= len) m_context->push(Nil); else m_context->push(tuple[pos]); pos -= 1; first_counter += 1; } } break; default: printf("Panic! Unknown instruction %d\n", ins.instruction); exit(1); } TRACE_OUT(TRACE_VM, TRACE_SPAM) { tprintf("Output: %d", (int)output); if ((long)output > 0) { const Value *it = std::max( m_context->stack_begin(), m_context->stack_pos() - output); for (; it != m_context->stack_pos(); it++) { tprintf("\n %s", inspect(*it).c_str()); } } tprintf(" <--\n"); tprintf("----------------\n"); }