entry sum_35() { new_env(1, 0); ildc(0); load(); null(); oequal(); jz(label2); ildc(0); ret(); jmp(label3); label2: ildc(0); load(); call(get_element_29); ildc(0); load(); ildc(1); getfield(); call(sum_35); iadd(); ret(); label3: null(); ret(); }
entry length_34() { new_env(1, 0); ildc(0); load(); null(); oequal(); jz(label0); ildc(0); ret(); jmp(label1); label0: ildc(1); ildc(0); load(); ildc(1); getfield(); call(length_34); iadd(); ret(); label1: null(); ret(); }
entry print_36() { new_env(1, 1); ildc(1); ildc(0); load(); store(); pop(); label4: ildc(1); load(); null(); onotequal(); jz(label5); ildc(1); load(); call(get_element_29); call(print_7); pop(); ildc(4); call(print_13); pop(); ildc(1); ildc(1); load(); ildc(1); getfield(); store(); pop(); jmp(label4); label5: ildc(5); call(println_21); pop(); null(); ret(); }
void divu1() { // 整数オーバーフローは発生しないはず enum { _1div, _end }; mov32_reg_mem(_ecx, _GPR_D(_rt, 0)); tst32_reg_reg(_ecx, _ecx); jz(_end); mov32_reg_mem(_eax, _GPR_D(_rs, 0)); xor32_reg_reg(_edx, _edx); div_reg(_ecx); mov32_reg_reg(_ecx, _edx); // 商をloに格納 cdq(); mov32_mem_reg((u32)&g_cpu->m_lo.d[2], _eax); mov32_mem_reg((u32)&g_cpu->m_lo.d[3], _edx); mov32_reg_reg(_eax, _ecx); // 余をhiに格納 cdq(); mov32_mem_reg((u32)&g_cpu->m_hi.d[2], _eax); mov32_mem_reg((u32)&g_cpu->m_hi.d[3], _edx); LABEL_DEF(_end); }
void cnbint( int i, const double pos[][3], const double vel[][3], const double mass[], int nnb, int list[], double f[3], double fdot[3]){ const int NBMAX = 512; assert(nnb <= NBMAX); float xbuf[NBMAX] __attribute__ ((aligned(16))); float ybuf[NBMAX] __attribute__ ((aligned(16))); float zbuf[NBMAX] __attribute__ ((aligned(16))); float vxbuf[NBMAX] __attribute__ ((aligned(16))); float vybuf[NBMAX] __attribute__ ((aligned(16))); float vzbuf[NBMAX] __attribute__ ((aligned(16))); float mbuf[NBMAX] __attribute__ ((aligned(16))); assert((unsigned long)xbuf % 16 == 0); double xi = pos[i][0]; double yi = pos[i][1]; double zi = pos[i][2]; float vxi = vel[i][0]; float vyi = vel[i][1]; float vzi = vel[i][2]; for(int k=0; k<nnb; k++){ int j = list[k]; #if 1 int jj = list[k+4]; __builtin_prefetch(pos[jj]); __builtin_prefetch(pos[jj]+2); __builtin_prefetch(vel[jj]); __builtin_prefetch(vel[jj]+2); __builtin_prefetch(&mass[jj]); #endif #if 0 xbuf[k] = pos[j][0] - xi; ybuf[k] = pos[j][1] - yi; zbuf[k] = pos[j][2] - zi; vxbuf[k] = vel[j][0] - vxi; vybuf[k] = vel[j][1] - vyi; vzbuf[k] = vel[j][2] - vzi; mbuf[k] = mass[j]; #else double xj = pos[j][0]; double yj = pos[j][1]; double zj = pos[j][2]; float vxj = vel[j][0]; float vyj = vel[j][1]; float vzj = vel[j][2]; float mj = mass[j]; xj -= xi; yj -= yi; zj -= zi; vxj -= vxi; vyj -= vyi; vzj -= vzi; xbuf[k] = xj; ybuf[k] = yj; zbuf[k] = zj; vxbuf[k] = vxj; vybuf[k] = vyj; vzbuf[k] = vzj; mbuf[k] = mj; #endif } for(int k=nnb; k%4; k++){ xbuf[k] = 16.0f; ybuf[k] = 16.0f; zbuf[k] = 16.0f; vxbuf[k] = 0.0f; vybuf[k] = 0.0f; vzbuf[k] = 0.0f; mbuf[k] = 0.0f; } v4df ax(0.0), ay(0.0), az(0.0); v4sf jx(0.0), jy(0.0), jz(0.0); for(int k=0; k<nnb; k+=4){ v4sf dx(xbuf + k); v4sf dy(ybuf + k); v4sf dz(zbuf + k); v4sf dvx(vxbuf + k); v4sf dvy(vybuf + k); v4sf dvz(vzbuf + k); v4sf mj(mbuf + k); v4sf r2 = dx*dx + dy*dy + dz*dz; v4sf rv = dx*dvx + dy*dvy + dz*dvz; rv *= v4sf(-3.0f); // v4sf rinv1 = r2.rsqrt() & v4sf(mask); #if 1 v4sf rinv1 = r2.rsqrt(); #else v4sf rinv1 = v4sf(v4df(1.0) / v4df(r2).sqrt()); #endif v4sf rinv2 = rinv1 * rinv1; rv *= rinv2; v4sf rinv3 = mj * rinv1 * rinv2; dx *= rinv3; ax += v4df(dx); dy *= rinv3; ay += v4df(dy); dz *= rinv3; az += v4df(dz); dvx *= rinv3; jx += dvx; dvy *= rinv3; jy += dvy; dvz *= rinv3; jz += dvz; dx *= rv; jx += dx; dy *= rv; jy += dy; dz *= rv; jz += dz; } f[0] = ax.sum(); f[1] = ay.sum(); f[2] = az.sum(); fdot[0] = (v4df(jx)).sum(); fdot[1] = (v4df(jy)).sum(); fdot[2] = (v4df(jz)).sum(); assert(f[0] == f[0]); assert(f[1] == f[1]); assert(f[2] == f[2]); assert(fdot[0] == fdot[0]); assert(fdot[1] == fdot[1]); assert(fdot[2] == fdot[2]); }
void cnbint( int i, const double pos[][3], const double vel[][3], const double mass[], int nnb, int list[], double f[3], double fdot[3]){ const int NBMAX = 512; assert(nnb <= NBMAX); double xbuf[NBMAX] __attribute__ ((aligned(16))); double ybuf[NBMAX] __attribute__ ((aligned(16))); double zbuf[NBMAX] __attribute__ ((aligned(16))); float vxbuf[NBMAX] __attribute__ ((aligned(16))); float vybuf[NBMAX] __attribute__ ((aligned(16))); float vzbuf[NBMAX] __attribute__ ((aligned(16))); float mbuf[NBMAX] __attribute__ ((aligned(16))); for(int k=0; k<nnb; k++){ int j = list[k]; xbuf[k] = pos[j][0]; ybuf[k] = pos[j][1]; zbuf[k] = pos[j][2]; vxbuf[k] = vel[j][0]; vybuf[k] = vel[j][1]; vzbuf[k] = vel[j][2]; mbuf[k] = mass[j]; } for(int k=nnb; k%4; k++){ xbuf[k] = 16.0; ybuf[k] = 16.0; zbuf[k] = 16.0; vxbuf[k] = 0.0f; vybuf[k] = 0.0f; vzbuf[k] = 0.0f; mbuf[k] = 0.0f; } v4df xi(pos[i][0]); v4df yi(pos[i][1]); v4df zi(pos[i][2]); v4sf vxi(vel[i][0]); v4sf vyi(vel[i][1]); v4sf vzi(vel[i][2]); v4df ax(0.0), ay(0.0), az(0.0); v4sf jx(0.0), jy(0.0), jz(0.0); for(int k=0; k<nnb; k+=4){ v4df xj(xbuf + k); v4df yj(ybuf + k); v4df zj(zbuf + k); v4sf vxj(vxbuf + k); v4sf vyj(vybuf + k); v4sf vzj(vzbuf + k); v4sf mj(mbuf + k); v4sf dx = v4sf::_v4sf(xj - xi); v4sf dy = v4sf::_v4sf(yj - yi); v4sf dz = v4sf::_v4sf(zj - zi); v4sf dvx = vxj - vxi; v4sf dvy = vyj - vyi; v4sf dvz = vzj - vzi; v4sf r2 = dx*dx + dy*dy + dz*dz; v4sf rv = dx*dvx + dy*dvy + dz*dvz; rv *= v4sf(-3.0f); // v4sf rinv1 = r2.rsqrt() & v4sf(mask); v4sf rinv1 = r2.rsqrt(); v4sf rinv2 = rinv1 * rinv1; rv *= rinv2; v4sf rinv3 = mj * rinv1 * rinv2; dx *= rinv3; ax += v4df(dx); dy *= rinv3; ay += v4df(dy); dz *= rinv3; az += v4df(dz); dvx *= rinv3; jx += dvx; dvy *= rinv3; jy += dvy; dvz *= rinv3; jz += dvz; dx *= rv; jx += dx; dy *= rv; jy += dy; dz *= rv; jz += dz; } f[0] = ax.sum(); f[1] = ay.sum(); f[2] = az.sum(); fdot[0] = (v4df(jx)).sum(); fdot[1] = (v4df(jy)).sum(); fdot[2] = (v4df(jz)).sum(); assert(f[0] == f[0]); assert(f[1] == f[1]); assert(f[2] == f[2]); assert(fdot[0] == fdot[0]); assert(fdot[1] == fdot[1]); assert(fdot[2] == fdot[2]); }
void codegen(xJIT *xjit) { xOperand *pputc, *pgetc, *stack; char c; const void *code; int lstack[100], *lcur = lstack, lno = 0; pputc = rbx; pgetc = rbp; stack = r12; push(rbx); push(rbp); push(r12); mov(pputc, rdi); /* putchar */ mov(pgetc, rsi); /* getchar */ mov(stack, rdx); /* stack */ while (~(c = getchar())) { switch (c) { case '+': case '-': { int cnt; cnt = countc(c); if (cnt == 1) { c == '+' ? inc(dword(stack)) : dec(dword(stack)); } else { addi(dword(stack), c == '+' ? cnt : -cnt); } } break; case '>': case '<': { int cnt; cnt = countc(c); addi(stack, 4 * (c == '>' ? cnt : -cnt)); } break; case '.': { mov(rdi, dword(stack)); call(pputc,XJIT_CALL_OPERAND); } break; case ',': { call(pgetc,XJIT_CALL_OPERAND); mov(dword(stack), eax); } break; case '[': { L(numl(lno, 'B')); /* backward label */ mov(eax, dword(stack)); test(eax, eax); jz(numl(lno, 'F'), XJIT_LABEL_NEAR); *lcur++ = lno++; } break; case ']': { int no; no = *--lcur; jmp(numl(no, 'B'), XJIT_JMP_LABEL); L(numl(no, 'F')); /* forward label */ } break; default: break; } } pop(r12); pop(rbp); pop(rbx); ret(); }
entry main_40() { new_env(0, 3); ildc(1); newobj(2); ildc(0); call(List_28); store(); pop(); ildc(0); ildc(1); store(); pop(); label6: ildc(0); load(); ildc(6); ilt(); jz(label7); ildc(1); ildc(1); load(); ildc(0); load(); call(push_front_30); store(); pop(); label8: ildc(0); dup(); load(); swap(); dup(); load(); ildc(1); iadd(); store(); pop(); pop(); jmp(label6); label7: ildc(1); load(); call(print_36); pop(); ildc(6); call(print_13); pop(); ildc(1); load(); call(length_34); call(println_15); pop(); ildc(9); call(print_13); pop(); ildc(1); load(); call(sum_35); call(println_15); pop(); ildc(2); newobj(2); ildc(1); load(); call(get_element_29); call(List_28); store(); pop(); ildc(1); ildc(1); load(); call(pop_front_33); store(); pop(); label9: ildc(1); load(); null(); onotequal(); jz(label10); ildc(2); ildc(2); load(); ildc(1); load(); call(get_element_29); call(push_front_30); store(); pop(); ildc(1); ildc(1); load(); call(pop_front_33); store(); pop(); jmp(label9); label10: ildc(2); load(); call(print_36); pop(); ildc(11); call(print_13); pop(); ildc(2); load(); call(length_34); call(println_15); pop(); ildc(14); call(print_13); pop(); ildc(2); load(); call(sum_35); call(println_15); pop(); null(); ret(); }