/* driver */ static void scale(int n, data_t **a, data_t **b, data_t c) { struct v_reg dst,src, src_end, v0, v1, al[10]; struct v_label loop1; static unsigned insn[1024]; v_vptr vp; /* simple unroll */ /* assert(n >= 4 && (n % 4) == 0); */ v_lambda("foo", "%p%p", al, V_LEAF, insn); /* row & c come in as parameters */ dst = al[0]; src = al[1]; if(!v_getreg(&src_end, V_P, V_TEMP)) v_fatal("scale: out of registers\n"); if(!v_getreg(&v0, V_U, V_TEMP)) v_fatal("scale: out of registers\n"); if(!v_getreg(&v1, V_U, V_TEMP)) v_fatal("scale: out of registers\n"); loop1 = v_genlabel(); /* relies on contigous memory */ v_raw_load(v_ldpi(src, src, (0)), 1); /* perform loads without interlocks */ v_raw_load(v_ldpi(dst, dst, (0)), 1); v_addpi(src_end, src, (n * n) * sizeof **a); v_label(loop1); /* load 2 to get rid of delay slots */ v_raw_load(v_ldui(v0, src, (0 * sizeof **a)), 1); v_raw_load(v_ldui(v1, src, (1 * sizeof **a)), 1); /* multiplies will be strength reduced */ if(strength_reduce) cmuli(v0, v0, c); else v_mului(v0, v0, c); v_addpi(dst, dst, (2 * sizeof **a)); if(strength_reduce) cmuli(v1, v1, c); else v_mului(v1, v1, c); v_stui(v0, dst, -(2 * sizeof **a)); v_addpi(src, src, (2 * sizeof **a)); /* schedule delay slot instructions */ v_schedule_delay( v_bltp(src, src_end, loop1), v_stui(v1, dst, -(1 * sizeof **a)) ); vp = v_end().v; if(disass) v_dump((void *)vp); if(!pixie) vp(a,b); /* perform multiplication */ }
/* A predicate is represented as a sum-of-products, that is (A1 A2 ... ) OR (B1 B2 ...) OR ... where each element in a product (the A?'s and B?'s) are simple predicates like v > 10. Predicates are represented in memory as an array of wk_term's, one term for each immediate, variable, operator, conjunction or disjunction. A single product is considered to be a group of contiguous wk_term's that are not WK_ORs. The whole mess is terminated by a WK_END. */ #include <vcode/vcode.h> #include <xok/wk.h> #include <xok/mmu.h> #include <xok/sys_proto.h> #include <xok/kerrno.h> #include <xok/malloc.h> #include <xok_include/assert.h> #include <xok/printf.h> #ifndef __CAP__ #include <xok/pmapP.h> #else #include <xok/pmap.h> #endif #define WK_MAX_CODE_BYTES 4096 #define OVERRUN_SAFETY 20 #define OVERRUN_CHECK \ { \ if (v_ip > code + WK_MAX_CODE_BYTES - OVERRUN_SAFETY) { \ warn ("wk_compile: out of code space\n"); \ ret = -E_INVAL; \ goto error; \ } \ } static int next_pp; /* outside function so can be used by cleanup code */ static int wk_compile (struct wk_term *t, int sz, char *code, u_int *pred_pages) { int i; v_reg_t r1, r2, z, tag; v_label_t end_of_term; int start_term = 1; int op1 = 1; cap c; struct Ppage *pp; u_int ppn; int ret = 0; next_pp = 0; v_lambda ("", "", NULL, 1, code, WK_MAX_CODE_BYTES); if (!v_getreg (&r1, V_U, V_TEMP) || !v_getreg (&r2, V_U, V_TEMP) || !v_getreg (&z, V_U, V_TEMP) || !v_getreg (&tag, V_U, V_TEMP)) panic ("wk_compile: architecture doesn't have enough registers."); v_setu (tag, -1); v_setu (z, 0); for (i = 0; i < sz; i++) { if (start_term) { end_of_term = v_genlabel (); start_term = 0; } OVERRUN_CHECK; switch (t[i].wk_type) { case WK_VAR: if (next_pp >= WK_MAX_PP-1) { warn ("wk_compile: too many pages in predicate\n"); ret = -E_INVAL; goto error; } if ((ret = env_getcap (curenv, t[i].wk_cap, &c)) < 0) { goto error; } ppn = PGNO((u_int)t[i].wk_var); if (!ppn || ppn >= nppage) { printf ("at index %d\n", i); warn ("wk_compile: invalid physical page\n"); ret = -E_INVAL; goto error; } pp = ppages_get(ppn); switch (Ppage_pp_status_get(pp)) { case PP_USER: if ((ret = ppage_acl_check(pp,&c,PP_ACL_LEN,0)) < 0) { goto error; } ppage_pin (pp); pred_pages[next_pp++] = ppn; break; case PP_KERNRO: /* user can access pages that each env get's mapped r/o */ break; default: printf ("at index %d\n", i); warn ("wk_compile: attempt to reference non PP_KERNRO or PP_USER page\n"); ret = -E_INVAL; goto error; } if (op1) { v_ldui (r1, z, (int )ptov (t[i].wk_var)); op1 = 0; } else { v_ldui (r2, z, (int )ptov (t[i].wk_var)); op1 = 1; } break; case WK_IMM: if (op1) { v_setu (r1, t[i].wk_imm); op1 = 0; } else { v_setu (r2, t[i].wk_imm); op1 = 1; } break; case WK_TAG: { v_setu (tag, t[i].wk_tag); break; } case WK_OP: { switch (t[i].wk_op) { case WK_GT: { v_bleu (r1, r2, end_of_term); break; } case WK_GTE: { v_bltu (r1, r2, end_of_term); break; } case WK_LT: { v_bgeu (r1, r2, end_of_term); break; } case WK_LTE: { v_bgtu (r1, r2, end_of_term); break; } case WK_EQ: { v_bneu (r1, r2, end_of_term); break; } case WK_NEQ: { v_bequ (r1, r2, end_of_term); break; } case WK_OR: { v_retu (tag); v_label (end_of_term); start_term = 1; break; } default: { printf ("at index %d\n", i); warn ("wk_compile: invalid wk-pred instruction\n"); ret = -E_INVAL; goto error; } } break; } default: printf ("at index %d\n", i); warn ("wk_compile: invalid wk-pred type\n"); ret = -E_INVAL; goto error; } } /* end the last term */ OVERRUN_CHECK; v_retu (tag); v_label (end_of_term); v_retui (0); v_end (NULL); error: /* have to do this even on error so that our caller can just call wk_free to clean memory/ref counts up */ pred_pages[next_pp] = 0; curenv->env_pred_pgs = pred_pages; curenv->env_pred = (Spred)code; return ret; }