Exemplo n.º 1
0
entry sum_35() {
new_env(1, 0);
ildc(0);
load();
null();
oequal();
jz(label2);
ildc(0);
ret();
jmp(label3);
label2:
ildc(0);
load();
call(get_element_29);
ildc(0);
load();
ildc(1);
getfield();
call(sum_35);
iadd();
ret();
label3:
null();
ret();
}
Exemplo n.º 2
0
entry length_34() {
new_env(1, 0);
ildc(0);
load();
null();
oequal();
jz(label0);
ildc(0);
ret();
jmp(label1);
label0:
ildc(1);
ildc(0);
load();
ildc(1);
getfield();
call(length_34);
iadd();
ret();
label1:
null();
ret();
}
Exemplo n.º 3
0
entry print_36() {
new_env(1, 1);
ildc(1);
ildc(0);
load();
store();
pop();
label4:
ildc(1);
load();
null();
onotequal();
jz(label5);
ildc(1);
load();
call(get_element_29);
call(print_7);
pop();
ildc(4);
call(print_13);
pop();
ildc(1);
ildc(1);
load();
ildc(1);
getfield();
store();
pop();
jmp(label4);
label5:
ildc(5);
call(println_21);
pop();
null();
ret();
}
Exemplo n.º 4
0
void divu1() {
  // 整数オーバーフローは発生しないはず
  enum { _1div, _end };

  mov32_reg_mem(_ecx, _GPR_D(_rt, 0));
  tst32_reg_reg(_ecx, _ecx);
  jz(_end);

  mov32_reg_mem(_eax, _GPR_D(_rs, 0));
  xor32_reg_reg(_edx, _edx);
  div_reg(_ecx);
  mov32_reg_reg(_ecx, _edx);
  // 商をloに格納
  cdq();
  mov32_mem_reg((u32)&g_cpu->m_lo.d[2], _eax);
  mov32_mem_reg((u32)&g_cpu->m_lo.d[3], _edx);

  mov32_reg_reg(_eax, _ecx);
  // 余をhiに格納
  cdq();
  mov32_mem_reg((u32)&g_cpu->m_hi.d[2], _eax);
  mov32_mem_reg((u32)&g_cpu->m_hi.d[3], _edx);
LABEL_DEF(_end);
}
Exemplo n.º 5
0
void cnbint(
		int i,
		const double pos[][3],
		const double vel[][3],
		const double mass[],
		int nnb,
		int list[],
		double f[3],
		double fdot[3]){
	const int NBMAX = 512;
	assert(nnb <= NBMAX);

	float xbuf[NBMAX] __attribute__ ((aligned(16)));
	float ybuf[NBMAX] __attribute__ ((aligned(16)));
	float zbuf[NBMAX] __attribute__ ((aligned(16)));
	float vxbuf[NBMAX] __attribute__ ((aligned(16)));
	float vybuf[NBMAX] __attribute__ ((aligned(16)));
	float vzbuf[NBMAX] __attribute__ ((aligned(16)));
	float mbuf[NBMAX] __attribute__ ((aligned(16)));
	assert((unsigned long)xbuf % 16 == 0);

	double xi = pos[i][0];
	double yi = pos[i][1];
	double zi = pos[i][2];
	float vxi = vel[i][0];
	float vyi = vel[i][1];
	float vzi = vel[i][2];
	for(int k=0; k<nnb; k++){
		int j = list[k];
#if 1
		int jj = list[k+4];
		__builtin_prefetch(pos[jj]);
		__builtin_prefetch(pos[jj]+2);
		__builtin_prefetch(vel[jj]);
		__builtin_prefetch(vel[jj]+2);
		__builtin_prefetch(&mass[jj]);
#endif
#if 0
		xbuf[k] = pos[j][0] - xi;
		ybuf[k] = pos[j][1] - yi;
		zbuf[k] = pos[j][2] - zi;
		vxbuf[k] = vel[j][0] - vxi;
		vybuf[k] = vel[j][1] - vyi;
		vzbuf[k] = vel[j][2] - vzi;
		mbuf[k] = mass[j];
#else
		double xj = pos[j][0];
		double yj = pos[j][1];
		double zj = pos[j][2];
		float vxj = vel[j][0];
		float vyj = vel[j][1];
		float vzj = vel[j][2];
		float mj = mass[j];
		xj -= xi;
		yj -= yi;
		zj -= zi;
		vxj -= vxi;
		vyj -= vyi;
		vzj -= vzi;
		xbuf[k] = xj;
		ybuf[k] = yj;
		zbuf[k] = zj;
		vxbuf[k] = vxj;
		vybuf[k] = vyj;
		vzbuf[k] = vzj;
		mbuf[k] = mj;
#endif
	}
	for(int k=nnb; k%4; k++){
		xbuf[k] = 16.0f;
		ybuf[k] = 16.0f;
		zbuf[k] = 16.0f;
		vxbuf[k] = 0.0f;
		vybuf[k] = 0.0f;
		vzbuf[k] = 0.0f;
		mbuf[k] = 0.0f;
	}

	v4df ax(0.0), ay(0.0), az(0.0);
	v4sf jx(0.0), jy(0.0), jz(0.0);

	for(int k=0; k<nnb; k+=4){
		v4sf dx(xbuf + k);
		v4sf dy(ybuf + k);
		v4sf dz(zbuf + k);
		v4sf dvx(vxbuf + k);
		v4sf dvy(vybuf + k);
		v4sf dvz(vzbuf + k);
		v4sf mj(mbuf + k);

		v4sf r2 = dx*dx + dy*dy + dz*dz;
		v4sf rv = dx*dvx + dy*dvy + dz*dvz;
		rv *= v4sf(-3.0f);
		// v4sf rinv1 = r2.rsqrt() & v4sf(mask);
#if 1
		v4sf rinv1 = r2.rsqrt();
#else
		v4sf rinv1 = v4sf(v4df(1.0) / v4df(r2).sqrt());
#endif
		v4sf rinv2 = rinv1 * rinv1;
		rv *= rinv2;
		v4sf rinv3 = mj * rinv1 * rinv2;
		 
		dx *= rinv3; ax += v4df(dx);
		dy *= rinv3; ay += v4df(dy);
		dz *= rinv3; az += v4df(dz);
		dvx *= rinv3; jx += dvx;
		dvy *= rinv3; jy += dvy;
		dvz *= rinv3; jz += dvz;
		dx *= rv; jx += dx;
		dy *= rv; jy += dy;
		dz *= rv; jz += dz;
	}
	f[0] = ax.sum();
	f[1] = ay.sum();
	f[2] = az.sum();
	fdot[0] = (v4df(jx)).sum();
	fdot[1] = (v4df(jy)).sum();
	fdot[2] = (v4df(jz)).sum();
	assert(f[0] == f[0]);
	assert(f[1] == f[1]);
	assert(f[2] == f[2]);
	assert(fdot[0] == fdot[0]);
	assert(fdot[1] == fdot[1]);
	assert(fdot[2] == fdot[2]);
}
Exemplo n.º 6
0
void cnbint(
		int i,
		const double pos[][3],
		const double vel[][3],
		const double mass[],
		int nnb,
		int list[],
		double f[3],
		double fdot[3]){
	const int NBMAX = 512;
	assert(nnb <= NBMAX);

	double xbuf[NBMAX] __attribute__ ((aligned(16)));
	double ybuf[NBMAX] __attribute__ ((aligned(16)));
	double zbuf[NBMAX] __attribute__ ((aligned(16)));
	float vxbuf[NBMAX] __attribute__ ((aligned(16)));
	float vybuf[NBMAX] __attribute__ ((aligned(16)));
	float vzbuf[NBMAX] __attribute__ ((aligned(16)));
	float mbuf[NBMAX] __attribute__ ((aligned(16)));
	for(int k=0; k<nnb; k++){
		int j = list[k];
		xbuf[k] = pos[j][0];
		ybuf[k] = pos[j][1];
		zbuf[k] = pos[j][2];
		vxbuf[k] = vel[j][0];
		vybuf[k] = vel[j][1];
		vzbuf[k] = vel[j][2];
		mbuf[k] = mass[j];
	}
	for(int k=nnb; k%4; k++){
		xbuf[k] = 16.0;
		ybuf[k] = 16.0;
		zbuf[k] = 16.0;
		vxbuf[k] = 0.0f;
		vybuf[k] = 0.0f;
		vzbuf[k] = 0.0f;
		mbuf[k] = 0.0f;
	}

	v4df xi(pos[i][0]);
	v4df yi(pos[i][1]);
	v4df zi(pos[i][2]);
	v4sf vxi(vel[i][0]);
	v4sf vyi(vel[i][1]);
	v4sf vzi(vel[i][2]);
	v4df ax(0.0), ay(0.0), az(0.0);
	v4sf jx(0.0), jy(0.0), jz(0.0);

	for(int k=0; k<nnb; k+=4){
		v4df xj(xbuf + k);
		v4df yj(ybuf + k);
		v4df zj(zbuf + k);
		v4sf vxj(vxbuf + k);
		v4sf vyj(vybuf + k);
		v4sf vzj(vzbuf + k);
		v4sf mj(mbuf + k);

		v4sf dx = v4sf::_v4sf(xj - xi);
		v4sf dy = v4sf::_v4sf(yj - yi);
		v4sf dz = v4sf::_v4sf(zj - zi);
		v4sf dvx = vxj - vxi;
		v4sf dvy = vyj - vyi;
		v4sf dvz = vzj - vzi;

		v4sf r2 = dx*dx + dy*dy + dz*dz;
		v4sf rv = dx*dvx + dy*dvy + dz*dvz;
		rv *= v4sf(-3.0f);
		// v4sf rinv1 = r2.rsqrt() & v4sf(mask);
		v4sf rinv1 = r2.rsqrt();
		v4sf rinv2 = rinv1 * rinv1;
		rv *= rinv2;
		v4sf rinv3 = mj * rinv1 * rinv2;
		 
		dx *= rinv3; ax += v4df(dx);
		dy *= rinv3; ay += v4df(dy);
		dz *= rinv3; az += v4df(dz);
		dvx *= rinv3; jx += dvx;
		dvy *= rinv3; jy += dvy;
		dvz *= rinv3; jz += dvz;
		dx *= rv; jx += dx;
		dy *= rv; jy += dy;
		dz *= rv; jz += dz;
	}
	f[0] = ax.sum();
	f[1] = ay.sum();
	f[2] = az.sum();
	fdot[0] = (v4df(jx)).sum();
	fdot[1] = (v4df(jy)).sum();
	fdot[2] = (v4df(jz)).sum();
	assert(f[0] == f[0]);
	assert(f[1] == f[1]);
	assert(f[2] == f[2]);
	assert(fdot[0] == fdot[0]);
	assert(fdot[1] == fdot[1]);
	assert(fdot[2] == fdot[2]);
}
Exemplo n.º 7
0
void
codegen(xJIT *xjit)
{
  xOperand *pputc, *pgetc, *stack;
  char c;
  const void *code;
  int lstack[100], *lcur = lstack, lno = 0;

  pputc = rbx;
  pgetc = rbp;
  stack = r12;

  push(rbx);
  push(rbp);
  push(r12);
  mov(pputc, rdi);                /* putchar */
  mov(pgetc, rsi);                /* getchar */
  mov(stack, rdx);                /* stack */

  while (~(c = getchar())) {
    switch (c) {
    case '+':
    case '-':
      {
        int cnt;

        cnt = countc(c);
        if (cnt == 1) {
          c == '+'
            ? inc(dword(stack))
            : dec(dword(stack));
        }
        else {
          addi(dword(stack), c == '+' ? cnt : -cnt);
        }
      }
      break;
    case '>':
    case '<':
      {
        int cnt;

        cnt = countc(c);
        addi(stack, 4 * (c == '>' ? cnt : -cnt));
      }
      break;
    case '.':
      {
        mov(rdi, dword(stack));
        call(pputc,XJIT_CALL_OPERAND);
      }
      break;
    case ',':
      {
        call(pgetc,XJIT_CALL_OPERAND);
        mov(dword(stack), eax);
      }
      break;
    case '[':
      {
        L(numl(lno, 'B'));      /* backward label */
        mov(eax, dword(stack));
        test(eax, eax);
        jz(numl(lno, 'F'), XJIT_LABEL_NEAR);
        *lcur++ = lno++;
      }
      break;
    case ']':
      {
        int no;

        no = *--lcur;
        jmp(numl(no, 'B'), XJIT_JMP_LABEL);
        L(numl(no, 'F'));       /* forward label */
      }
      break;
    default:
      break;
    }
  }

  pop(r12);
  pop(rbp);
  pop(rbx);
  ret();
}
Exemplo n.º 8
0
entry main_40() {
new_env(0, 3);
ildc(1);
newobj(2);
ildc(0);
call(List_28);
store();
pop();
ildc(0);
ildc(1);
store();
pop();
label6:
ildc(0);
load();
ildc(6);
ilt();
jz(label7);
ildc(1);
ildc(1);
load();
ildc(0);
load();
call(push_front_30);
store();
pop();
label8:
ildc(0);
dup();
load();
swap();
dup();
load();
ildc(1);
iadd();
store();
pop();
pop();
jmp(label6);
label7:
ildc(1);
load();
call(print_36);
pop();
ildc(6);
call(print_13);
pop();
ildc(1);
load();
call(length_34);
call(println_15);
pop();
ildc(9);
call(print_13);
pop();
ildc(1);
load();
call(sum_35);
call(println_15);
pop();
ildc(2);
newobj(2);
ildc(1);
load();
call(get_element_29);
call(List_28);
store();
pop();
ildc(1);
ildc(1);
load();
call(pop_front_33);
store();
pop();
label9:
ildc(1);
load();
null();
onotequal();
jz(label10);
ildc(2);
ildc(2);
load();
ildc(1);
load();
call(get_element_29);
call(push_front_30);
store();
pop();
ildc(1);
ildc(1);
load();
call(pop_front_33);
store();
pop();
jmp(label9);
label10:
ildc(2);
load();
call(print_36);
pop();
ildc(11);
call(print_13);
pop();
ildc(2);
load();
call(length_34);
call(println_15);
pop();
ildc(14);
call(print_13);
pop();
ildc(2);
load();
call(sum_35);
call(println_15);
pop();
null();
ret();
}