float64 helper_fmul_DT(CPUSH4State *env, float64 t0, float64 t1) { set_float_exception_flags(0, &env->fp_status); t0 = float64_mul(t0, t1, &env->fp_status); update_fpscr(env, GETPC()); return t0; }
int main () { int main_result; int i; float64 x1, x2; main_result = 0; for (i = 0; i < N; i++) { float64 result; x1 = a_input[i]; x2 = b_input[i]; result = float64_mul (x1, x2); main_result += (result == z_output[i]); printf ("a_input=%016llx b_input=%016llx expected=%016llx output=%016llx\n", a_input[i], b_input[i], z_output[i], result); } //printf ("Result: %d\n", main_result); //if (main_result == 20) { // printf("RESULT: PASS\n"); //} else { // printf("RESULT: FAIL\n"); //} return main_result; }
/* 64-bit FP multiply and subtract RR */ void HELPER(msdbr)(CPUS390XState *env, uint32_t f1, uint32_t f3, uint32_t f2) { HELPER_LOG("%s: f1 %d f2 %d f3 %d\n", __func__, f1, f2, f3); env->fregs[f1].d = float64_sub(float64_mul(env->fregs[f2].d, env->fregs[f3].d, &env->fpu_status), env->fregs[f1].d, &env->fpu_status); }
uint64_t helper_fmul_DT(uint64_t t0, uint64_t t1) { CPU_DoubleU d0, d1; d0.ll = t0; d1.ll = t1; d0.d = float64_mul(d0.d, d1.d, &env->fp_status); return d0.ll; }
static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat) { float64 result = float64_mul(op1, op1, stat); if (!float64_is_any_nan(result)) { result = float64_set_sign(result, op2 & 1); } return result; }
float64 float64_sin(float64 rad) { float64 app; float64 diff; float64 m_rad2; int inc; app = diff = rad; inc = 1; m_rad2 = float64_neg(float64_mul(rad, rad, 3)); do { diff = float64_div(float64_mul(diff, m_rad2, 3), int32_to_float64((2 * inc) * (2 * inc + 1)), 3); app = float64_add(app, diff, 3); inc++; } while ( float64_ge(float64_abs(diff), 0x3ee4f8b588e368f1ULL, 3)); /* 0.00001 */ return app; }
/* 64-bit FP multiplication RM */ void HELPER(mdb)(CPUS390XState *env, uint32_t f1, uint64_t a2) { float64 v1 = env->fregs[f1].d; CPU_DoubleU v2; v2.ll = cpu_ldq_data(env, a2); HELPER_LOG("%s: multiplying 0x%lx from f%d and 0x%ld\n", __func__, v1, f1, v2.d); env->fregs[f1].d = float64_mul(v1, v2.d, &env->fpu_status); }
/* 64-bit FP multiply and add RM */ void HELPER(madb)(CPUS390XState *env, uint32_t f1, uint64_t a2, uint32_t f3) { CPU_DoubleU v2; HELPER_LOG("%s: f1 %d a2 0x%lx f3 %d\n", __func__, f1, a2, f3); v2.ll = cpu_ldq_data(env, a2); env->fregs[f1].d = float64_add(env->fregs[f1].d, float64_mul(v2.d, env->fregs[f3].d, &env->fpu_status), &env->fpu_status); }
float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) { float_status *fpst = fpstp; a = float64_squash_input_denormal(a, fpst); b = float64_squash_input_denormal(b, fpst); if ((float64_is_zero(a) && float64_is_infinity(b)) || (float64_is_infinity(a) && float64_is_zero(b))) { /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ return make_float64((1ULL << 62) | ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); } return float64_mul(a, b, fpst); }
void *mult() { int i, main_result = 0; //#pragma omp parallel for num_threads(OMP_ACCEL) private(i) for (i = 0; i < mulN; i++) { float64 result; // tid = omp_get_thread_num(); float64 x1 = mul_a_input[i]; float64 x2 = mul_b_input[i]; result = float64_mul(x1, x2, 1); if (result != mul_z_output[i]) printf("result = %lld, expected = %lld\n", result, mul_z_output[i]); main_result += (result == mul_z_output[i]); // temp[tid] += (result == mul_z_output[i]); } printf("mult result = %d\n", main_result); pthread_exit((void *)main_result); }
int main () { int main_result; int i,m,n,a; m= 16; n= 8; main_result = 0; for (i = 0; i < N; i++) { buff[i*2+0] = a_input[i]; buff[i*2+1] = b_input[i]; } *((volatile unsigned int *)0xf000f008) = (unsigned int)(&buff[0]); *((volatile unsigned int *)0xf000f00C) = 2*N*8; for(i=0;i<N;i++) { a = float64_mul (&buff[2*i], m, &result[i], n); } for(i=0;i<N;i++) { //printf("%llx ",result[i]); main_result += (result[i] != z_output[i]); } // printf // ("a_input=%016llx b_input=%016llx expected=%016llx output=%016llx (%lf)\n", // a_input[i], b_input[i], z_output[i], result, // ullong_to_double (result)); printf ("%d\n", main_result); return main_result; }
int main () { int main_result; int i; float64 x1, x2; main_result = 0; for (i = 0; i < N; i++) { float64 result; x1 = a_input[i]; x2 = b_input[i]; result = float64_mul (x1, x2); main_result += (result != z_output[i]); printf ("a_input=%016llx b_input=%016llx expected=%016llx output=%016llx (%lf)\n", a_input[i], b_input[i], z_output[i], result, ullong_to_double (result)); } printf ("%d\n", main_result); return main_result; }
/* 64-bit FP multiplication RR */ void HELPER(mdbr)(CPUS390XState *env, uint32_t f1, uint32_t f2) { env->fregs[f1].d = float64_mul(env->fregs[f1].d, env->fregs[f2].d, &env->fpu_status); }
float64 __muldf3(float64 A, float64 B) { return float64_mul(A, B); }
unsigned int DoubleCPDO(const unsigned int opcode) { float64 rFm, rFn; unsigned int Fd, Fm, Fn, nRc = 1; //printk("DoubleCPDO(0x%08x)\n",opcode); Fm = getFm(opcode); if (CONSTANT_FM(opcode)) { rFm = getDoubleConstant(Fm); } else { switch (fpa11->fType[Fm]) { case typeSingle: rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle); break; case typeDouble: rFm = fpa11->fpreg[Fm].fDouble; break; case typeExtended: // !! patb //printk("not implemented! why not?\n"); //!! ScottB // should never get here, if extended involved // then other operand should be promoted then // ExtendedCPDO called. break; default: return 0; } } if (!MONADIC_INSTRUCTION(opcode)) { Fn = getFn(opcode); switch (fpa11->fType[Fn]) { case typeSingle: rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle); break; case typeDouble: rFn = fpa11->fpreg[Fn].fDouble; break; default: return 0; } } Fd = getFd(opcode); /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */ switch (opcode & MASK_ARITHMETIC_OPCODE) { /* dyadic opcodes */ case ADF_CODE: fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm); break; case MUF_CODE: case FML_CODE: fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm); break; case SUF_CODE: fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm); break; case RSF_CODE: fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn); break; case DVF_CODE: case FDV_CODE: fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm); break; case RDF_CODE: case FRD_CODE: fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn); break; #if 0 case POW_CODE: fpa11->fpreg[Fd].fDouble = float64_pow(rFn,rFm); break; case RPW_CODE: fpa11->fpreg[Fd].fDouble = float64_pow(rFm,rFn); break; #endif case RMF_CODE: fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm); break; #if 0 case POL_CODE: fpa11->fpreg[Fd].fDouble = float64_pol(rFn,rFm); break; #endif /* monadic opcodes */ case MVF_CODE: fpa11->fpreg[Fd].fDouble = rFm; break; case MNF_CODE: { unsigned int *p = (unsigned int*)&rFm; p[1] ^= 0x80000000; fpa11->fpreg[Fd].fDouble = rFm; } break; case ABS_CODE: { unsigned int *p = (unsigned int*)&rFm; p[1] &= 0x7fffffff; fpa11->fpreg[Fd].fDouble = rFm; } break; case RND_CODE: case URD_CODE: fpa11->fpreg[Fd].fDouble = int32_to_float64(float64_to_int32(rFm)); break; case SQT_CODE: fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm); break; #if 0 case LOG_CODE: fpa11->fpreg[Fd].fDouble = float64_log(rFm); break; case LGN_CODE: fpa11->fpreg[Fd].fDouble = float64_ln(rFm); break; case EXP_CODE: fpa11->fpreg[Fd].fDouble = float64_exp(rFm); break; case SIN_CODE: fpa11->fpreg[Fd].fDouble = float64_sin(rFm); break; case COS_CODE: fpa11->fpreg[Fd].fDouble = float64_cos(rFm); break; case TAN_CODE: fpa11->fpreg[Fd].fDouble = float64_tan(rFm); break; case ASN_CODE: fpa11->fpreg[Fd].fDouble = float64_arcsin(rFm); break; case ACS_CODE: fpa11->fpreg[Fd].fDouble = float64_arccos(rFm); break; case ATN_CODE: fpa11->fpreg[Fd].fDouble = float64_arctan(rFm); break; #endif case NRM_CODE: break; default: { nRc = 0; } } if (0 != nRc) fpa11->fType[Fd] = typeDouble; return nRc; }
float64 float64_pow(float64 rFn,float64 rFm) { return float64_exp(float64_mul(rFm,float64_ln(rFn))); }