int main(void) { mpf_set_default_prec(300000); mpf_class x0, y0, resA, resB, Z; x0 = 1; y0 = 0.5; Z = 0.25; mpf_sqrt(y0.get_mpf_t(), y0.get_mpf_t()); int n = 1; for (int i = 0; i < 8; i++) { agm(resA, resB, x0, y0); Z -= n * (resA - x0) * (resA - x0); n *= 2; agm(x0, y0, resA, resB); Z -= n * (x0 - resA) * (x0 - resA); n *= 2; } x0 = x0 * x0 / Z; gmp_printf ("%.100000Ff\n", x0.get_mpf_t()); return 0; }
void agm(mpf_class& rop1, mpf_class& rop2, const mpf_class& op1, const mpf_class& op2) { rop1 = (op1 + op2) / 2; rop2 = op1 * op2; mpf_sqrt(rop2.get_mpf_t(), rop2.get_mpf_t()); }
int main (void) { mpf_set_default_prec (300000); mpf_t x0, y0, resA, resB, Z, var; mpf_init_set_ui (x0, 1); mpf_init_set_d (y0, 0.5); mpf_sqrt (y0, y0); mpf_init (resA); mpf_init (resB); mpf_init_set_d (Z, 0.25); mpf_init (var); int n = 1; for(int i=0; i<8; i++){ agm(x0, y0, resA, resB); mpf_sub(var, resA, x0); mpf_mul(var, var, var); mpf_mul_ui(var, var, n); mpf_sub(Z, Z, var); n += n; agm(resA, resB, x0, y0); mpf_sub(var, x0, resA); mpf_mul(var, var, var); mpf_mul_ui(var, var, n); mpf_sub(Z, Z, var); n += n; } mpf_mul(x0, x0, x0); mpf_div(x0, x0, Z); gmp_printf ("%.100000Ff\n", x0); return 0; }
void mpfc_abs(mpf_t r,mpfc_ptr x) { mpf_mul(mpfc_mpf_temp[0],x->Re,x->Re); mpf_mul(mpfc_mpf_temp[1],x->Im,x->Im); mpf_add(r,mpfc_mpf_temp[0],mpfc_mpf_temp[1]); mpf_sqrt(r,r); }
void *calc_y(thr_gmpf_t *vars) { /*vars.y1 = (1-sqrt(sqrt((1-pow(vars.y0,4))))) / (1+sqrt(sqrt(1-pow(vars.y0,4))));*/ /*b = sqrt(sqrt(1-y0^4))*/ mpf_pow_ui(vars->y1,vars->y0,4); /*y1 = pow(y0,4)*/ mpf_ui_sub(vars->y1,1,vars->y1); /*y1 = 1 - y1*/ mpf_sqrt(vars->y1,vars->y1); mpf_sqrt(vars->y1,vars->y1); /*y1 = 1-b/1+b*/ mpf_ui_sub(vars->aux_y1,1,vars->y1); mpf_add_ui(vars->aux_y2,vars->y1,1); mpf_div(vars->y1,vars->aux_y1,vars->aux_y2); pthread_exit(NULL); return NULL; }
int sg_big_float_sqrt(sg_big_float_t *a, sg_big_float_t *res) { if (!a || !res) return -1; mpf_sqrt(res->mpf, a->mpf); return 0; }
mpf_class x_mpf(const mpz_class& square_, const mpz_class& minus, const mpz_class& div) { mpf_class square = square_; mpf_class ret = square; mpf_sqrt(ret.get_mpf_t(), square.get_mpf_t()); ret = (ret - minus) / div; return ret; }
int main(void) { clock_t begin, end; mpf_set_default_prec(BITS_PER_DIGIT*DIGITS); //mpf_set_default_prec(4096); begin = clock(); mpf_init(x); mpf_init(y); mpf_init(p); mpf_init(aux1); mpf_init(aux2); mpf_init(sqrtx); mpf_init(invsqrtx); /* x = sqrt(2)*/ mpf_set_ui(x, 2); mpf_sqrt(x, x); /* y = sqrt(sqrt(2)) = sqrt(x)*/ mpf_sqrt(y, x); /* p = 2 + sqrt(2) = 2 + x*/ mpf_add_ui(p, x, 2); for (i=0; i<24; i++) { mpf_sqrt(sqrtx, x); mpf_ui_div(invsqrtx, 1, sqrtx); pthread_create(&t1, NULL, thread1, NULL); pthread_create(&t2, NULL, thread2, NULL); pthread_join(t1, NULL); pthread_join(t2, NULL); mpf_div(p, aux1, aux2); //Para ver os valores de pi a cada iteracao //mpf_out_str(stdout, 10, DIGITS, p); } mpf_out_str(stdout, 10, DIGITS, p); mpf_clear(x); mpf_clear(y); mpf_clear(p); mpf_clear(aux1); mpf_clear(aux2); mpf_clear(sqrtx); mpf_clear(invsqrtx); end = clock(); printf("Took %lfs\n", (double)(end-begin)/CLOCKS_PER_SEC); pthread_exit(0); }
// // The core Gauss - Legendre routine. // On input, 'bits' is the desired precision in bits. // On output, 'pi' contains the calculated value. // static void calculatePi( unsigned bits, mpf_class & pi ) { mpf_class lastPi( 0.0 ); mpf_class scratch; // variables per the formal Gauss - Legendre formulae mpf_class a; mpf_class b; mpf_class t; mpf_class x; mpf_class y; unsigned p = 1; // initial conditions a = 1; // b := 1 / sqrt( 2 ) mpf_sqrt_ui( b.get_mpf_t( ), 2 ); b = 1.0 / b; t = 0.25; for( ;; ) { x = ( a + b )/2; // y := sqrt( ab ) y = a * b; mpf_sqrt( y.get_mpf_t( ), y.get_mpf_t( ) ); // t := t - p * ( a - x )**2 scratch = a - x; scratch *= scratch; scratch *= p; t -= scratch; a = x; b = y; p <<= 1; // pi := ( ( a + b )**2 ) / 4t pi = a + b; pi *= pi; pi /= ( 4 * t ); // if pi == lastPi, within the requested precision, we're done if ( mpf_eq( pi.get_mpf_t( ), lastPi.get_mpf_t( ), bits ) ) { break; } lastPi = pi; } }
int main (int argc, char *argv[]) { mpf_t sq_me, sq_out, test; mpf_set_default_prec (10000); mpf_init(sq_me); mpf_init(sq_out); mpf_init(test); mpf_set_str (sq_me, argv[1], 10); mpf_sqrt(sq_out, sq_me); mpf_mul(test,sq_out,sq_out); gmp_printf ("Input: %Ff\n\n", sq_me); gmp_printf ("Square root: %.200Ff\n\n", sq_out); gmp_printf ("Re-squared: %Ff\n\n", test); return 0; }
// The Brent-Salamin algorithm int main(int argc, char* argv[]) { if (argc < 2) return -1; int n = (int)strtol(argv[1], NULL, 10); mpf_set_default_prec(1000); mpf_t a, b, t, c, sum; // a=1 mpf_init_set_ui(a, 1); mpf_init_set_ui(sum, 0); mpf_init(b); mpf_init(t); mpf_init(c); mpf_init(sum); // b=1/sqrt(2) mpf_sqrt_ui(b, 2); mpf_ui_div(b, 1, b); // n次迭代的误差小于\frac{2^{n+9}}{20^{2n+1}} for (int i = 1; i <= n; ++i) { // t=(a+b)/2 mpf_add(t, a, b); mpf_div_ui(t, t, 2); // b=sqrt(a*b); mpf_mul(b, a, b); mpf_sqrt(b, b); // a=t mpf_swap(t, a); mpf_mul(t, a, a); mpf_mul(c, b, b); mpf_sub(c, t, c); mpf_mul_2exp(c, c, i + 1); mpf_add(sum, sum, c); } mpf_mul(t, a, a); mpf_mul_ui(t, t, 4); mpf_ui_sub(sum, 1, sum); mpf_div(t, t, sum); mpf_out_str(stdout, 10, 0, t); printf("\n"); mpf_clear(a); mpf_clear(b); mpf_clear(t); mpf_clear(c); mpf_clear(sum); return 0; }
//------------------------------------------------------------------------------ // Name: //------------------------------------------------------------------------------ knumber_base *knumber_float::sqrt() { if(sign() < 0) { delete this; return new knumber_error(knumber_error::ERROR_UNDEFINED); } #ifdef KNUMBER_USE_MPFR mpfr_t mpfr; mpfr_init_set_f(mpfr, mpf_, rounding_mode); mpfr_sqrt(mpfr, mpfr, rounding_mode); mpfr_get_f(mpf_, mpfr, rounding_mode); mpfr_clear(mpfr); #else mpf_sqrt(mpf_, mpf_); #endif return this; }
int main (void) { mpf_set_default_prec (65568); mpf_t x0, y0, resA, resB; mpf_init_set_ui (y0, 1); mpf_init_set_d (x0, 0.5); mpf_sqrt (x0, x0); mpf_init (resA); mpf_init (resB); for(int i=0; i<7; i++){ agm(x0, y0, resA, resB); agm(resA, resB, x0, y0); } gmp_printf ("%.20000Ff\n", x0); gmp_printf ("%.20000Ff\n\n", y0); return 0; }
R gaunt(Int lp, Int l1, Int l2, Int mp, Int m1, Int m2) { R gg; mpf_t g,h; if((lp+l1+l2)%Int(2)==Int(1)) return R(0); if(NewGaunt::iabs(mp)>lp || NewGaunt::iabs(m1)>l1 || NewGaunt::iabs(m2)>l2) return R(0); mpf_init(g); mpf_init(h); NewGaunt::w3j(g,lp,l1,l2,0,0,0); NewGaunt::w3j(h,lp,l1,l2,-mp,m1,m2); mpf_mul(g,g,h); mpf_set_si(h,(2*lp+1)*(2*l1+1)*(2*l2+1)); mpf_sqrt(h,h); mpf_mul(g,g,h); gg=mpf_get_d(g)/sqrt(4.0*M_PI); if(NewGaunt::iabs(mp)%Int(2)==Int(1)) gg=-gg; mpf_clear(g); mpf_clear(h); return gg; }
//double log2(mpf_class l) { // mpf_class r = l; // mpf_class t,t1=l; // int y=1; // mpf_class resp; // do{ // mpf_sqrt(t.get_mpf_t(),t1.get_mpf_t()); // y*=2; // t1=t; // } while (t>2); // resp=y*log2(t.get_d()); // /*mpf_t a; // mpf_init(a); // mpf_class b(l); // //cout<<b<<"--"<<l<<endl; // mpf_div_2exp(a, b.get_mpf_t(), y); // double temp = mpf_get_d(a); // temp = pow(temp, 2); // temp = log(temp) / log(2); // temp = y + temp / 2; // mpf_clear(a); // // cout<<temp<<endl;*/ // return resp.get_d(); // //} mpf_class log2f(mpf_class l){ mpf_class r = l; mpf_class t,t1=l; int y=1; mpf_class resp; do{ mpf_sqrt(t.get_mpf_t(),t1.get_mpf_t()); y*=2; t1=t; } while (t>2); resp=y*log2(t.get_d()); /*mpf_t a; mpf_init(a); mpf_class b(l); //cout<<b<<"--"<<l<<endl; mpf_div_2exp(a, b.get_mpf_t(), y); double temp = mpf_get_d(a); temp = pow(temp, 2); temp = log(temp) / log(2); temp = y + temp / 2; mpf_clear(a); // cout<<temp<<endl;*/ return resp; }
int main() { pthread_t thread_a, thread_b; /* My threads*/ int i; FILE *filePi, *fileTime; clock_t start, end; double cpu_time_used; mpf_set_default_prec(BITS_PER_DIGIT * 11000000); /* Borwein Variable Initialization */ for(i=0; i<2; i++) for(j=0; j<2; j++) mpf_init(params[i][j]); mpf_init(real_pi); mpf_init(y0Aux); mpf_init(y0Aux2); mpf_init(a0Aux); mpf_init(a0Aux2); mpf_init(pi[0]); mpf_init(pi[1]); mpf_init_set_str(error, "1e-10000000", 10); /* Initial value setting */ mpf_sqrt_ui(params[A][0], 2.0); /* a0 = sqrt(2)*/ mpf_mul_ui(params[A][0], params[A][0], 4.0); /* a0 = 4 * sqrt(2) */ mpf_ui_sub(params[A][0], 6.0, params[A][0]); /* a0 = 6 - 4 * sqrt(2) */ mpf_sqrt_ui(params[Y][0], 2.0); /* y0 = sqrt(2) */ mpf_sub_ui(params[Y][0], params[Y][0], 1.0); /* y0 = sqrt(2) - 1 */ mpf_set_ui(pi[0], 0); mpf_set_ui(pi[1], 0); i = 1; j = 1; iteracoes = 0; x = 0; /* Load the reals digits of pi */ filePi = fopen("pi.txt", "r"); gmp_fscanf(filePi, "%Ff", real_pi); fclose(filePi); start = clock(); while(1){ /* y = ( 1 - (1 - y0 ^ 4) ^ 0.25 ) / ( 1 + ( 1 - y0 ^ 4) ^ 0.25 ) */ mpf_pow_ui(y0Aux, params[Y][0], 4); mpf_ui_sub(y0Aux, 1.0, y0Aux); mpf_sqrt(y0Aux, y0Aux); mpf_sqrt(y0Aux, y0Aux); mpf_add_ui(y0Aux2, y0Aux, 1.0); mpf_ui_sub(y0Aux, 1.0, y0Aux); mpf_div(params[Y][1], y0Aux, y0Aux2); /* a = a0 * ( 1 + params[Y][1] ) ^ 4 - 2 ^ ( 2 * i + 3 ) * params[Y][1] * ( 1 + params[Y][1] + params[Y][1] ^ 2 ) */ /* Threads creation */ pthread_create(&thread_a, NULL, calc_a, NULL); pthread_create(&thread_b, NULL, calc_b, NULL); pthread_join(thread_a, NULL); pthread_join(thread_b, NULL); /* 2 ^ ( 2 * i + 3 ) * params[Y][1] * ( 1 + params[Y][1] + params[Y][1] ^ 2 ) */ mpf_mul(a0Aux, a0Aux, a0Aux2); /*a0 * ( 1 + params[Y][1] ) ^ 4*/ mpf_add_ui(a0Aux2, params[Y][1], 1); mpf_pow_ui(a0Aux2, a0Aux2, 4); mpf_mul(a0Aux2, params[A][0], a0Aux2); /* form the entire expression */ mpf_sub(params[A][1], a0Aux2, a0Aux); mpf_set(params[A][0], params[A][1]); mpf_set(params[Y][0], params[Y][1]); mpf_ui_div(pi[j], 1, params[A][0]); gmp_printf("\nIteracao %d | pi = %.25Ff", iteracoes, pi[j]); /* Calculate the error */ mpf_sub(pi[(j+1)%2], real_pi, pi[j]); mpf_abs(pi[(j+1) % 2], pi[(j+1) % 2]); if(mpf_cmp(pi[(j+1)%2], error) < 0){ printf("\n%d iteracoes para alcancar 10 milhoes de digitos de corretos.", iteracoes); break; } j = (j+1) % 2; iteracoes++; i++; } end = clock(); cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; fileTime = fopen("execution_time.txt", "w"); fprintf(fileTime, "Execution time: %f\n", cpu_time_used); fclose(fileTime); /* Clean up*/ for(i=0; i<2; i++) for(j=0; j<2; j++) mpf_clear(params[i][j]); mpf_clear(pi[0]); mpf_clear(pi[1]); mpf_clear(real_pi); mpf_clear(error); return 0; }
void check_rand2 (void) { unsigned long max_prec = 20; unsigned long min_prec = __GMPF_BITS_TO_PREC (1); gmp_randstate_ptr rands = RANDS; unsigned long x_prec, r_prec; mpf_t x, r, s; int i; mpf_init (x); mpf_init (r); mpf_init (s); refmpf_set_prec_limbs (s, 2*max_prec+10); for (i = 0; i < 500; i++) { /* input precision */ x_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec; refmpf_set_prec_limbs (x, x_prec); /* result precision */ r_prec = gmp_urandomm_ui (rands, max_prec-min_prec) + min_prec; refmpf_set_prec_limbs (r, r_prec); mpf_random2 (x, x_prec, 1000); mpf_sqrt (r, x); MPF_CHECK_FORMAT (r); /* Expect to prec limbs of result. In the current implementation there's no stripping of low zero limbs in mpf_sqrt, so size should be exactly prec. */ if (SIZ(r) != r_prec) { printf ("mpf_sqrt wrong number of result limbs\n"); mpf_trace (" x", x); mpf_trace (" r", r); printf (" r_prec=%lu\n", r_prec); printf (" SIZ(r) %ld\n", (long) SIZ(r)); printf (" PREC(r) %ld\n", (long) PREC(r)); abort (); } /* Must have r^2 <= x, since r has been truncated. */ mpf_mul (s, r, r); if (! (mpf_cmp (s, x) <= 0)) { printf ("mpf_sqrt result too big\n"); mpf_trace (" x", x); printf (" r_prec=%lu\n", r_prec); mpf_trace (" r", r); mpf_trace (" s", s); abort (); } /* Must have (r+ulp)^2 > x, or else r is too small. */ refmpf_add_ulp (r); mpf_mul (s, r, r); if (! (mpf_cmp (s, x) > 0)) { printf ("mpf_sqrt result too small\n"); mpf_trace (" x", x); printf (" r_prec=%lu\n", r_prec); mpf_trace (" r+ulp", r); mpf_trace (" s", s); abort (); } } mpf_clear (x); mpf_clear (r); mpf_clear (s); }
extern void _jl_mpf_sqrt(mpf_t* rop, mpf_t* op) { mpf_sqrt(*rop, *op); }
long int julia(const mpf_t x, const mpf_t xr, long int xres, const mpf_t y, const mpf_t yr, long int yres, mpf_t *c, int flag, long int max_iteration, float *iterations, int my_rank, int p, MPI_Comm comm) { double t0 = MPI_Wtime(); int i,j; //------------julia gmp const double maxRadius = 4.0; // double xi, yi, savex, savex2, savey, radius; mpf_t xi, yi, x_min, x_max, y_min, y_max, savex, savex2, savey, radius, xgap, ygap, savex_a, savex_b, savey_a, savey_b, tmp, tmp1; mpf_init(xi); mpf_init(yi); mpf_init(x_min); mpf_init(x_max); mpf_init(y_min); mpf_init(y_max); mpf_init(savex); mpf_init(savex2); mpf_init(savey); mpf_init(radius); mpf_init(xgap); mpf_init(ygap); mpf_init(savex_a); mpf_init(savex_b); mpf_init(savey_a); mpf_init(savey_b); mpf_init(tmp); mpf_init(tmp1); //double x_min = x - xr; mpf_sub(x_min, x, xr); //double x_max = x + xr; mpf_add(x_max, x, xr); //double y_min = y - yr; mpf_sub(y_min, y, yr); //double y_max = y + yr; mpf_add(y_max, y, yr); // spaceing between x and y points //double xgap = (x_max - x_min) / xres; mpf_sub(xgap, x_max, x_min); mpf_div_ui(xgap, xgap, xres); //double ygap = (y_max - y_min) / yres; mpf_sub(ygap, y_max, y_min); mpf_div_ui(ygap, ygap, yres); //---------------------------- long long int iteration; long long int total_number_iterations = 0; int k = 0; for (j = 0; j < yres; j++){ for (i = 0; i < xres; i++){ //xi = x_min + i * xgap; mpf_mul_ui(tmp, xgap, i); mpf_add(xi, x_min, tmp); //yi = y_min + j * ygap; mpf_mul_ui(tmp, ygap, j); mpf_add(yi, y_min, tmp); //flag betwee[n julia or mandelbrot //savex = flag * c[0] + (1 - flag) * xi; mpf_mul_ui(savex_a, c[0], flag); mpf_mul_ui(savex_b, xi, (1-flag)); mpf_add(savex, savex_a, savex_b); //savey = flag * c[1] + (1 - flag) * yi; mpf_mul_ui(savey_a, c[1], flag); mpf_mul_ui(savey_b, yi, (1-flag)); mpf_add(savey, savey_a, savey_b); //radius = 0; mpf_set_ui(radius, 0); iteration = 0; //while ((radius <= maxRadius) && (iteration < max_iteration)){ while ((mpf_cmp_d(radius, maxRadius)<=0) && (iteration < max_iteration)){ //savex2 = xi; mpf_add_ui(savex2, xi, 0); //xi = xi * xi - yi * yi + savex; mpf_mul(xi, xi, xi); mpf_mul(tmp, yi, yi); mpf_sub(xi, xi, tmp); mpf_add(xi, xi, savex); //yi = 2.0f * savex2 * yi + savey; mpf_mul_ui(tmp, savex2, 2); mpf_mul(yi, yi, tmp); mpf_add(yi, yi, savey); //radius = xi * xi + yi * yi; mpf_mul(tmp, xi, xi); mpf_mul(tmp1, yi, yi); mpf_add(radius, tmp, tmp1); iteration++; } total_number_iterations += iteration; float *p = iterations + k*xres + i; //if (radius > maxRadius){ if (mpf_cmp_d(radius, maxRadius)>0){ //float zn = sqrt(xi*xi + yi*yi); mpf_t zn; mpf_init(zn); mpf_mul(tmp, xi, xi); mpf_mul(tmp1, yi, yi); mpf_add(zn, tmp, tmp1); mpf_sqrt(zn, zn); double n = mpf_get_d(zn); //float nu = log(log(zn) / log(2))/log(2); double nu = log(log(n) / log(2))/log(2); //the point has escaped at iteration at any of the iterations 0,1,2,3... *p = iteration + 1 - nu; } else // zij stays within the region up to max_iteration { assert(iteration==max_iteration); *p = -1; } } k++; } //reduce max iteration count long long int total_reduced_iterations = -1; //printf("rank: %i, total_reduced_iterations: %i\n", my_rank, total_number_iterations); MPI_Reduce(&total_number_iterations, &total_reduced_iterations, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, comm); double t4 = MPI_Wtime(); double max_reduced_time = -1; double total_time = t4 - t0; MPI_Reduce(&total_time, &max_reduced_time, 1, MPI_DOUBLE, MPI_MAX, 0, comm); printf("np: %i, time: %f , iterations: %lld\n",p, max_reduced_time, total_reduced_iterations); //clear //printf("proc: %i, total time: %lf sec, init: %lf sec, calc: %lf sec, collect: %lf\n", my_rank, t4-t0, t1-t0, t2-t1, t3-t2); return total_reduced_iterations; }
int main(void) { mp_float a, b, c, d, e; int err; mpf_init_multi(100, &a, &b, &c, &d, &e, NULL); mpf_const_d(&a, 1); draw(&a); mpf_const_d(&b, 2); draw(&b); mpf_const_d(&c, 3); draw(&c); mpf_const_d(&d, 4); draw(&d); mpf_add(&b, &c, &e); printf("2 + 3 == "); draw(&e); mpf_sub(&b, &c, &e); printf("2 - 3 =="); draw(&e); mpf_mul(&b, &c, &e); printf("2 * 3 == "); draw(&e); mpf_div(&b, &c, &e); printf("2 / 3 == "); draw(&e); mpf_add_d(&b, 3, &e); printf("2 + 3 == "); draw(&e); mpf_sub_d(&b, 3, &e); printf("2 - 3 =="); draw(&e); mpf_mul_d(&b, 3, &e); printf("2 * 3 == "); draw(&e); mpf_div_d(&b, 3, &e); printf("2 / 3 == "); draw(&e); mpf_const_d(&e, 0); mpf_add_d(&e, 1, &e); printf("0 + 1 == "); draw(&e); mpf_const_d(&e, 0); mpf_sub_d(&e, 1, &e); printf("0 - 1 == "); draw(&e); printf("\n"); mpf_invsqrt(&d, &e); printf("1/sqrt(4) == 1/2 == "); draw(&e); mpf_invsqrt(&c, &e); printf("1/sqrt(3) == "); draw(&e); mpf_inv(&a, &e); printf("1/1 == "); draw(&e); mpf_inv(&b, &e); printf("1/2 == "); draw(&e); mpf_inv(&c, &e); printf("1/3 == "); draw(&e); mpf_inv(&d, &e); printf("1/4 == "); draw(&e); printf("\n"); mpf_const_pi(&e); printf("Pi == "); draw(&e); printf("\n"); mpf_const_e(&e); printf("e == "); draw(&e); mpf_exp(&c, &e); printf("e^3 == "); draw(&e); mpf_sqrt(&e, &e); printf("sqrt(e^3) == "); draw(&e); mpf_sqr(&e, &e); printf("sqrt(e^3)^2 == "); draw(&e); printf("\n"); mpf_cos(&a, &e); printf("cos(1) == "); draw(&e); mpf_cos(&b, &e); printf("cos(2) == "); draw(&e); mpf_cos(&c, &e); printf("cos(3) == "); draw(&e); mpf_cos(&d, &e); printf("cos(4) == "); draw(&e); mpf_sin(&a, &e); printf("sin(1) == "); draw(&e); mpf_sin(&b, &e); printf("sin(2) == "); draw(&e); mpf_sin(&c, &e); printf("sin(3) == "); draw(&e); mpf_sin(&d, &e); printf("sin(4) == "); draw(&e); mpf_tan(&a, &e); printf("tan(1) == "); draw(&e); mpf_tan(&b, &e); printf("tan(2) == "); draw(&e); mpf_tan(&c, &e); printf("tan(3) == "); draw(&e); mpf_tan(&d, &e); printf("tan(4) == "); draw(&e); mpf_inv(&a, &e); mpf_atan(&e, &e); printf("atan(1/1) == "); draw(&e); mpf_inv(&b, &e); mpf_atan(&e, &e); printf("atan(1/2) == "); draw(&e); mpf_inv(&c, &e); mpf_atan(&e, &e); printf("atan(1/3) == "); draw(&e); mpf_inv(&d, &e); mpf_atan(&e, &e); printf("atan(1/4) == "); draw(&e); printf("\n"); #define lntest(x) if ((err = mpf_const_ln_d(&e, x)) != MP_OKAY) { printf("Failed ln(%3d), %d\n", x, err); } else { printf("ln(%3d) == ", x); draw(&e); }; lntest(0); lntest(1); lntest(2); lntest(4); lntest(8); lntest(17); lntest(1000); lntest(100000); lntest(250000); return 0; }
//Le but de la fonction est de calculer le développement en fractions continue jusqu'à un certain rang de racine carrée de kN et de stocker les couples (A_n-1,Q_n) comme décrit dans la section (à venir) cfrac expand(const mpz_t N, const long long unsigned int rang, const mpz_t k) { cfrac res; //Contient l'ensemble des A_n-1 et l'ensemble Q_n mpz_inits(res.N, res.k, res.g, NULL); //Initialisation des variables mpz_t* A = (mpz_t*)malloc((rang+1)*sizeof(mpz_t)); //Le tableau contenant les A_n-1 mpz_t* Q = (mpz_t*)malloc((rang+2)*sizeof(mpz_t)); //Le tableau contenant les Q_n mpz_t* P = (mpz_t*)malloc((rang+1)*sizeof(mpz_t)); //Éléments reliés au Q_n mpz_t* r = (mpz_t*)malloc((rang+1)*sizeof(mpz_t)); //Interviennent dans le calcul des A_n-1 & Q_n mpz_t* q = (mpz_t*)malloc(rang*sizeof(mpz_t)); //Idem mpz_t g, tempz; //Idem, tempz = variable à tout faire mpf_t sqrtkN, tempf, tempf2; //sqrtkN = sqrt(k*N), tempf = variable à tout faire //Valeurs d'initialisation de la boucle mpz_inits(g, A[0], Q[0], r[0], tempz, NULL); mpf_inits(tempf, sqrtkN, tempf2, NULL); //Initialisation des différentes valeurs "indépendantes" mpz_set(tempz, N); mpz_mul(tempz, tempz, k); mpf_set_z(sqrtkN, tempz); mpf_sqrt(sqrtkN, sqrtkN); mpf_floor(tempf, sqrtkN); mpz_set_f(g, tempf); //g = [sqrt(kN)] mpz_set(Q[0], k); mpz_mul(Q[0], Q[0], N); //Q_-1 = kN = Q[0] mpz_set(r[0], g); //r_-1 = r[0] mpz_set_ui(A[0], 1); //A_-1 = A[0] //Calcul de P_0 & Q_0 mpz_init_set_ui(Q[1], 1); //Q_0 = Q[1] mpz_init_set_ui(P[0], 0); //P_0 = P[0] for(long long int i = 0; i < rang; i++) { switch(i) { case 0: //Calcul de q_0 mpz_init_set(q[0], g); //q_0 = [(sqrt(kN) + P_0)/Q_0] avec P_0 = 0 et Q_0 = 1 //Calcul de A_0 mpz_init_set(A[1],A[0]); mpz_mul(A[1], A[1], q[0]); //A_0 = q_0*A_-1 mpz_mod(A[1], A[1], N); //On réduit mod N //Calcul de r_0 mpz_init_set_ui(r[1], 0); //r_0 = P_0 + g - q_0.Q_0 = 0 + g - g.1 = 0 //Calcul de P_1 mpz_init_set(P[1], g); //P_1 = g - r_0 = g - 0 = g //Calcul de Q_1 = Q[2] mpz_init_set(Q[2], r[1]); mpz_sub(Q[2], Q[2], r[0]); mpz_mul(Q[2], Q[2], q[0]); mpz_add(Q[2], Q[2], Q[0]); break; default: //Calcul q_i mpz_init(q[i]); mpf_set_z(tempf, P[i]); mpf_set_z(tempf2, Q[i+1]); mpf_add(tempf, tempf, sqrtkN); //sqrt(kN) + P_i mpf_div(tempf, tempf, tempf2); mpf_floor(tempf, tempf); //floor((sqrt(kN) + P_i)/Q_i) mpz_set_f(q[i], tempf); //Calcul de r_n = r[n+1] mpz_init(r[i+1]); mpz_submul(r[i+1], q[i], Q[i+1]); mpz_add(r[i+1], r[i+1], P[i]); mpz_add(r[i+1], r[i+1], g); //Calcul de A_n = A[n+1] mpz_init_set(A[i+1],A[i]); mpz_mul(A[i+1], A[i+1], q[i]); //A_i-1*q_i mpz_add(A[i+1], A[i+1], A[i-1]); //A_i-1*q_i + A_i-2 mpz_mod(A[i+1], A[i+1], N); //réduction modulo N //Calcul P_n+1 mpz_init_set(P[i+1], g); mpz_sub(P[i+1], P[i+1], r[i+1]); //P[n+1] = g - r_n = g - r[n+1] //Calcul Q_n+1 = Q[n+2] mpz_init_set(Q[i+2], r[i+1]); mpz_sub(Q[i+2], Q[i+2], r[i]); //(r_n - r_n-1) mpz_mul(Q[i+2], Q[i+2], q[i]); //q_n(r_n - r_n-1) mpz_add(Q[i+2], Q[i+2], Q[i]); //Q_n-1 + q_n(r_n - r_n-1) break; } } //Test de routine pour voir si le développement de la fraction continue s'est bien passé mpz_t tempsqrt; mpz_init(tempsqrt); mpz_set(tempsqrt, k); mpz_mul(tempsqrt, tempsqrt, N); mpz_sqrt(tempsqrt, tempsqrt); mpz_mul_ui(tempsqrt, tempsqrt, 2); for(int i = 1; i < rang; i++) //Éviter de commencer à i = 0 puisque cela représente Q_-1 qui n'intervient uniquement dans l'algo et non dans le développement en fraction continue { if(mpz_cmp(Q[i], tempsqrt) >= 0) //Si Q_n >= 2sqrt(kN) { res.rang = 0; mpz_clears(g, tempz, NULL); mpf_clears(sqrtkN, tempf, tempf2, NULL); return res; } } //Assignation des tableaux dans le résultat res.A = A; res.Q = Q; res.r = r; res.q = q; res.P = P; mpz_set(res.N, N); mpz_set(res.k, k); res.rang = rang; mpz_set(res.g, g); //Libération de mémoire mpz_clears(g, tempz, NULL); mpf_clears(sqrtkN, tempf, tempf2, NULL); return res; }
/** * void calculate_b() * * Descricao: * Calcula o valor da variavel b na n-esima iteracao. * * Parametros de entrada: * - * * Parametros de retorno: * - */ void calculate_b(){ mpf_mul(b_n[n_count+1], a_n[n_count], b_n[n_count]); mpf_sqrt(b_n[n_count+1], b_n[n_count+1]); }
int scanhash_m7m_hash(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint64_t max_nonce, unsigned long *hashes_done) { uint32_t data[32] __attribute__((aligned(128))); uint32_t *data_p64 = data + (M7_MIDSTATE_LEN / sizeof(data[0])); uint32_t hash[8] __attribute__((aligned(32))); uint8_t bhash[7][64] __attribute__((aligned(32))); uint32_t n = pdata[19] - 1; const uint32_t first_nonce = pdata[19]; char data_str[161], hash_str[65], target_str[65]; uint8_t *bdata = 0; mpz_t bns[8]; int rc = 0; int bytes, nnNonce2; mpz_t product; mpz_init(product); for(int i=0; i < 8; i++){ mpz_init(bns[i]); } memcpy(data, pdata, 80); sph_sha256_context ctx_final_sha256; sph_sha256_context ctx_sha256; sph_sha512_context ctx_sha512; sph_keccak512_context ctx_keccak; sph_whirlpool_context ctx_whirlpool; sph_haval256_5_context ctx_haval; sph_tiger_context ctx_tiger; sph_ripemd160_context ctx_ripemd; sph_sha256_init(&ctx_sha256); sph_sha256 (&ctx_sha256, data, M7_MIDSTATE_LEN); sph_sha512_init(&ctx_sha512); sph_sha512 (&ctx_sha512, data, M7_MIDSTATE_LEN); sph_keccak512_init(&ctx_keccak); sph_keccak512 (&ctx_keccak, data, M7_MIDSTATE_LEN); sph_whirlpool_init(&ctx_whirlpool); sph_whirlpool (&ctx_whirlpool, data, M7_MIDSTATE_LEN); sph_haval256_5_init(&ctx_haval); sph_haval256_5 (&ctx_haval, data, M7_MIDSTATE_LEN); sph_tiger_init(&ctx_tiger); sph_tiger (&ctx_tiger, data, M7_MIDSTATE_LEN); sph_ripemd160_init(&ctx_ripemd); sph_ripemd160 (&ctx_ripemd, data, M7_MIDSTATE_LEN); sph_sha256_context ctx2_sha256; sph_sha512_context ctx2_sha512; sph_keccak512_context ctx2_keccak; sph_whirlpool_context ctx2_whirlpool; sph_haval256_5_context ctx2_haval; sph_tiger_context ctx2_tiger; sph_ripemd160_context ctx2_ripemd; do { data[19] = ++n; nnNonce2 = (int)(data[19]/2); memset(bhash, 0, 7 * 64); ctx2_sha256 = ctx_sha256; sph_sha256 (&ctx2_sha256, data_p64, 80 - M7_MIDSTATE_LEN); sph_sha256_close(&ctx2_sha256, (void*)(bhash[0])); ctx2_sha512 = ctx_sha512; sph_sha512 (&ctx2_sha512, data_p64, 80 - M7_MIDSTATE_LEN); sph_sha512_close(&ctx2_sha512, (void*)(bhash[1])); ctx2_keccak = ctx_keccak; sph_keccak512 (&ctx2_keccak, data_p64, 80 - M7_MIDSTATE_LEN); sph_keccak512_close(&ctx2_keccak, (void*)(bhash[2])); ctx2_whirlpool = ctx_whirlpool; sph_whirlpool (&ctx2_whirlpool, data_p64, 80 - M7_MIDSTATE_LEN); sph_whirlpool_close(&ctx2_whirlpool, (void*)(bhash[3])); ctx2_haval = ctx_haval; sph_haval256_5 (&ctx2_haval, data_p64, 80 - M7_MIDSTATE_LEN); sph_haval256_5_close(&ctx2_haval, (void*)(bhash[4])); ctx2_tiger = ctx_tiger; sph_tiger (&ctx2_tiger, data_p64, 80 - M7_MIDSTATE_LEN); sph_tiger_close(&ctx2_tiger, (void*)(bhash[5])); ctx2_ripemd = ctx_ripemd; sph_ripemd160 (&ctx2_ripemd, data_p64, 80 - M7_MIDSTATE_LEN); sph_ripemd160_close(&ctx2_ripemd, (void*)(bhash[6])); for(int i=0; i < 7; i++){ set_one_if_zero(bhash[i]); mpz_set_uint512(bns[i],bhash[i]); } mpz_set_ui(bns[7],0); for(int i=0; i < 7; i++){ mpz_add(bns[7], bns[7], bns[i]); } mpz_set_ui(product,1); for(int i=0; i < 8; i++){ mpz_mul(product,product,bns[i]); } mpz_pow_ui(product, product, 2); bytes = mpz_sizeinbase(product, 256); bdata = (uint8_t *)realloc(bdata, bytes); mpz_export((void *)bdata, NULL, -1, 1, 0, 0, product); sph_sha256_init(&ctx_final_sha256); sph_sha256 (&ctx_final_sha256, bdata, bytes); sph_sha256_close(&ctx_final_sha256, (void*)(hash)); int digits=(int)((sqrt((double)(nnNonce2))*(1.+EPS))/9000+75); int iterations=20; mpf_set_default_prec((long int)(digits*BITS_PER_DIGIT+16)); mpz_t magipi; mpz_t magisw; mpf_t magifpi; mpf_t mpa1, mpb1, mpt1, mpp1; mpf_t mpa2, mpb2, mpt2, mpp2; mpf_t mpsft; mpz_init(magipi); mpz_init(magisw); mpf_init(magifpi); mpf_init(mpsft); mpf_init(mpa1); mpf_init(mpb1); mpf_init(mpt1); mpf_init(mpp1); mpf_init(mpa2); mpf_init(mpb2); mpf_init(mpt2); mpf_init(mpp2); uint32_t usw_; usw_ = sw_(nnNonce2, SW_DIVS); if (usw_ < 1) usw_ = 1; mpz_set_ui(magisw, usw_); uint32_t mpzscale=mpz_size(magisw); for(int i=0; i < NM7M; i++){ if (mpzscale > 1000) { mpzscale = 1000; } else if (mpzscale < 1) { mpzscale = 1; } mpf_set_ui(mpa1, 1); mpf_set_ui(mpb1, 2); mpf_set_d(mpt1, 0.25*mpzscale); mpf_set_ui(mpp1, 1); mpf_sqrt(mpb1, mpb1); mpf_ui_div(mpb1, 1, mpb1); mpf_set_ui(mpsft, 10); for(int j=0; j <= iterations; j++){ mpf_add(mpa2, mpa1, mpb1); mpf_div_ui(mpa2, mpa2, 2); mpf_mul(mpb2, mpa1, mpb1); mpf_abs(mpb2, mpb2); mpf_sqrt(mpb2, mpb2); mpf_sub(mpt2, mpa1, mpa2); mpf_abs(mpt2, mpt2); mpf_sqrt(mpt2, mpt2); mpf_mul(mpt2, mpt2, mpp1); mpf_sub(mpt2, mpt1, mpt2); mpf_mul_ui(mpp2, mpp1, 2); mpf_swap(mpa1, mpa2); mpf_swap(mpb1, mpb2); mpf_swap(mpt1, mpt2); mpf_swap(mpp1, mpp2); } mpf_add(magifpi, mpa1, mpb1); mpf_pow_ui(magifpi, magifpi, 2); mpf_div_ui(magifpi, magifpi, 4); mpf_abs(mpt1, mpt1); mpf_div(magifpi, magifpi, mpt1); mpf_pow_ui(mpsft, mpsft, digits/2); mpf_mul(magifpi, magifpi, mpsft); mpz_set_f(magipi, magifpi); mpz_add(product,product,magipi); mpz_add(product,product,magisw); mpz_set_uint256(bns[0], (void*)(hash)); mpz_add(bns[7], bns[7], bns[0]); mpz_mul(product,product,bns[7]); mpz_cdiv_q (product, product, bns[0]); if (mpz_sgn(product) <= 0) mpz_set_ui(product,1); bytes = mpz_sizeinbase(product, 256); mpzscale=bytes; bdata = (uint8_t *)realloc(bdata, bytes); mpz_export(bdata, NULL, -1, 1, 0, 0, product); sph_sha256_init(&ctx_final_sha256); sph_sha256 (&ctx_final_sha256, bdata, bytes); sph_sha256_close(&ctx_final_sha256, (void*)(hash)); } mpz_clear(magipi); mpz_clear(magisw); mpf_clear(magifpi); mpf_clear(mpsft); mpf_clear(mpa1); mpf_clear(mpb1); mpf_clear(mpt1); mpf_clear(mpp1); mpf_clear(mpa2); mpf_clear(mpb2); mpf_clear(mpt2); mpf_clear(mpp2); rc = fulltest_m7hash(hash, ptarget); if (rc) { if (opt_debug) { bin2hex(hash_str, (unsigned char *)hash, 32); bin2hex(target_str, (unsigned char *)ptarget, 32); bin2hex(data_str, (unsigned char *)data, 80); applog(LOG_DEBUG, "DEBUG: [%d thread] Found share!\ndata %s\nhash %s\ntarget %s", thr_id, data_str, hash_str, target_str); } pdata[19] = data[19]; goto out; } } while (n < max_nonce && !work_restart[thr_id].restart); pdata[19] = n; out: for(int i=0; i < 8; i++){ mpz_clear(bns[i]); } mpz_clear(product); free(bdata); *hashes_done = n - first_nonce + 1; return rc; }
void agm (const mpf_t in1, const mpf_t in2, mpf_t out1, mpf_t out2) { mpf_add (out1, in1, in2); mpf_div_ui (out1, out1, 2); mpf_mul (out2, in1, in2); mpf_sqrt (out2, out2); }
static int fill_spougecache(size_t A, int accuracy, long eps) { int err, n; mp_float factrl, e, t1, t2, t3, pi; size_t start; if ((err = mpf_init_multi(eps, &factrl, &e, &t1, &t2, &t3, NULL)) != MP_OKAY) { return err; } err = MP_OKAY; if (spougecache_len < A || spougecache_eps < accuracy) { //puts("filling spougecache"); if ((err = mpf_const_d(&factrl, 1)) != MP_OKAY) { goto _ERR; } if ((err = mpf_const_e(&e)) != MP_OKAY) { goto _ERR; } if (spougecache_len != 0) { spougecache = realloc(spougecache, (A + 1) * sizeof(mp_float)); if (spougecache == NULL) { return MP_MEM; } start = spougecache_len; } else { spougecache = malloc((A + 1) * sizeof(mp_float)); if (spougecache == NULL) { return MP_MEM; } start = 1; if ((err = mpf_init(&pi, eps)) != MP_OKAY) { goto _ERR; } if ((err = mpf_const_pi(&pi)) != MP_OKAY) { goto _ERR; } pi.exp += 1; if ((err = mpf_init(&(spougecache[0]), eps)) != MP_OKAY) { goto _ERR; } if ((err = mpf_sqrt(&pi, &(spougecache[0]))) != MP_OKAY) { goto _ERR; } mpf_clear(&pi); } for (n = start; n < (int) A; n++) { // to avoid the more expensive exp(log(a-n)*(n-0.5)) // TODO: check if exp() is fast enough now if ((err = mpf_set_int(&t1, (int) (A - n))) != MP_OKAY) { goto _ERR; } if ((err = mpf_pow_d(&t1, (int) (n - 1), &t2)) != MP_OKAY) { goto _ERR; } if ((err = mpf_sqrt(&t1, &t1)) != MP_OKAY) { goto _ERR; } if ((err = mpf_mul(&t2, &t1, &t2)) != MP_OKAY) { goto _ERR; } if ((err = mpf_pow_d(&e, (int) (A - n), &t3)) != MP_OKAY) { goto _ERR; } if ((err = mpf_mul(&t2, &t3, &t2)) != MP_OKAY) { goto _ERR; } if ((err = mpf_init(&(spougecache[n]), eps)) != MP_OKAY) { goto _ERR; } if ((err = mpf_div(&t2, &factrl, &(spougecache[n]))) != MP_OKAY) { goto _ERR; } if ((err = mpf_set_int(&t1, -n)) != MP_OKAY) { goto _ERR; } if ((err = mpf_mul(&factrl, &t1, &factrl)) != MP_OKAY) { goto _ERR; } } spougecache_eps = accuracy; spougecache_len = A; } _ERR: mpf_clear_multi(&factrl, &e, &t1, &t2, &t3, NULL); return err; }
long int julia(const mpf_t x, const mpf_t xr, long int xres, const mpf_t y, const mpf_t yr, long int yres, mpf_t *c, int flag, long int max_iteration, float *iterations, int my_rank, int p, MPI_Comm comm) { double t0 = MPI_Wtime(); int i,j; // Find how many rows per process. int *rows; rows = (int*)malloc(sizeof(int)*p); for (i=0; i < p; i++) rows[i] = yres/p; for (i=0; i < yres % p; i++) rows[i]++; //allocate memory for each processor if(my_rank > 0){ iterations = (float*)malloc( sizeof(float) * xres * rows[my_rank]); assert(iterations); } //------------julia gmp const double maxRadius = 4.0; mpf_t xi, yi, x_min, x_max, y_min, y_max, savex, savex2, savey, radius, xgap, ygap, savex_a, savex_b, savey_a, savey_b, tmp, tmp1; mpf_init(xi); mpf_init(yi); mpf_init(x_min); mpf_init(x_max); mpf_init(y_min); mpf_init(y_max); mpf_init(savex); mpf_init(savex2); mpf_init(savey); mpf_init(radius); mpf_init(xgap); mpf_init(ygap); mpf_init(savex_a); mpf_init(savex_b); mpf_init(savey_a); mpf_init(savey_b); mpf_init(tmp); mpf_init(tmp1); //double x_min = x - xr; mpf_sub(x_min, x, xr); //double x_max = x + xr; mpf_add(x_max, x, xr); //double y_min = y - yr; mpf_sub(y_min, y, yr); //double y_max = y + yr; mpf_add(y_max, y, yr); // spaceing between x and y points //double xgap = (x_max - x_min) / xres; mpf_sub(xgap, x_max, x_min); mpf_div_ui(xgap, xgap, xres); //double ygap = (y_max - y_min) / yres; mpf_sub(ygap, y_max, y_min); mpf_div_ui(ygap, ygap, yres); //---------------------------- long long int iteration; long long int total_number_iterations = 0; int k = 0; for (j = my_rank; j < yres; j+=p){ if(my_rank==0) k = j; //needed for root for (i = 0; i < xres; i++){ //xi = x_min + i * xgap; mpf_mul_ui(tmp, xgap, i); mpf_add(xi, x_min, tmp); //yi = y_min + j * ygap; mpf_mul_ui(tmp, ygap, j); mpf_add(yi, y_min, tmp); //flag betwee[n julia or mandelbrot //savex = flag * c[0] + (1 - flag) * xi; mpf_mul_ui(savex_a, c[0], flag); mpf_mul_ui(savex_b, xi, (1-flag)); mpf_add(savex, savex_a, savex_b); //savey = flag * c[1] + (1 - flag) * yi; mpf_mul_ui(savey_a, c[1], flag); mpf_mul_ui(savey_b, yi, (1-flag)); mpf_add(savey, savey_a, savey_b); //radius = 0; mpf_set_ui(radius, 0); iteration = 0; //while ((radius <= maxRadius) && (iteration < max_iteration)){ while ((mpf_cmp_d(radius, maxRadius)<=0) && (iteration < max_iteration)){ //savex2 = xi; mpf_add_ui(savex2, xi, 0); //xi = xi * xi - yi * yi + savex; mpf_mul(xi, xi, xi); mpf_mul(tmp, yi, yi); mpf_sub(xi, xi, tmp); mpf_add(xi, xi, savex); //yi = 2.0f * savex2 * yi + savey; mpf_mul_ui(tmp, savex2, 2); mpf_mul(yi, yi, tmp); mpf_add(yi, yi, savey); //radius = xi * xi + yi * yi; mpf_mul(tmp, xi, xi); mpf_mul(tmp1, yi, yi); mpf_add(radius, tmp, tmp1); iteration++; } total_number_iterations += iteration; float *p = iterations + k*xres + i; //if (radius > maxRadius){ if (mpf_cmp_d(radius, maxRadius)>0){ //float zn = sqrt(xi*xi + yi*yi); mpf_t zn; mpf_init(zn); mpf_mul(tmp, xi, xi); mpf_mul(tmp1, yi, yi); mpf_add(zn, tmp, tmp1); mpf_sqrt(zn, zn); double n = mpf_get_d(zn); //float nu = log(log(zn) / log(2))/log(2); double nu = log(log(n) / log(2))/log(2); //the point has escaped at iteration at any of the iterations 0,1,2,3... *p = iteration + 1 - nu; } else // zij stays within the region up to max_iteration { assert(iteration==max_iteration); *p = -1; } } k++; } //collect various data MPI_Status status; if(my_rank == 0){ int i,j; for(i = 1; i < p; i++){ for(j = 0; j < rows[i]; j++){ MPI_Recv((iterations + (i + j * p) * xres), xres, MPI_FLOAT, i, 0, comm, &status); //MPI_Irecv((iterations + (i + j * p) * xres), xres, MPI_FLOAT, i, 0, comm, NULL); } } } else{ int i; for(i = 0; i < rows[my_rank]; i++) MPI_Send((iterations + i *xres), xres, MPI_FLOAT, 0, 0, comm); } //reduce max iteration count long long int total_reduced_iterations = -1; //printf("rank: %i, total_reduced_iterations: %i\n", my_rank, total_number_iterations); MPI_Reduce(&total_number_iterations, &total_reduced_iterations, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, comm); double t4 = MPI_Wtime(); double max_reduced_time = -1; double total_time = t4 - t0; MPI_Reduce(&total_time, &max_reduced_time, 1, MPI_DOUBLE, MPI_MAX, 0, comm); if(my_rank == 0){ printf("np: %i, time: %f , iterations: %lld\n",p, max_reduced_time, total_reduced_iterations); //printf("%i\t%.2e\n", p, max_reduced_time); } //clear //printf("proc: %i, total time: %lf sec, init: %lf sec, calc: %lf sec, collect: %lf\n", my_rank, t4-t0, t1-t0, t2-t1, t3-t2); return total_reduced_iterations; }
void check_rand1 (int argc, char **argv) { mp_size_t size; mp_exp_t exp; int reps = 20000; int i; mpf_t x, y, y2; mp_size_t bprec = 100; mpf_t rerr, max_rerr, limit_rerr; if (argc > 1) { reps = strtol (argv[1], 0, 0); if (argc > 2) bprec = strtol (argv[2], 0, 0); } mpf_set_default_prec (bprec); mpf_init_set_ui (limit_rerr, 1); mpf_div_2exp (limit_rerr, limit_rerr, bprec); #if VERBOSE mpf_dump (limit_rerr); #endif mpf_init (rerr); mpf_init_set_ui (max_rerr, 0); mpf_init (x); mpf_init (y); mpf_init (y2); for (i = 0; i < reps; i++) { size = urandom () % SIZE; exp = urandom () % SIZE; mpf_random2 (x, size, exp); mpf_sqrt (y, x); MPF_CHECK_FORMAT (y); mpf_mul (y2, y, y); mpf_reldiff (rerr, x, y2); if (mpf_cmp (rerr, max_rerr) > 0) { mpf_set (max_rerr, rerr); #if VERBOSE mpf_dump (max_rerr); #endif if (mpf_cmp (rerr, limit_rerr) > 0) { printf ("ERROR after %d tests\n", i); printf (" x = "); mpf_dump (x); printf (" y = "); mpf_dump (y); printf (" y2 = "); mpf_dump (y2); printf (" rerr = "); mpf_dump (rerr); printf (" limit_rerr = "); mpf_dump (limit_rerr); printf ("in hex:\n"); mp_trace_base = 16; mpf_trace (" x ", x); mpf_trace (" y ", y); mpf_trace (" y2 ", y2); mpf_trace (" rerr ", rerr); mpf_trace (" limit_rerr", limit_rerr); abort (); } } } mpf_clear (limit_rerr); mpf_clear (rerr); mpf_clear (max_rerr); mpf_clear (x); mpf_clear (y); mpf_clear (y2); }
void spectral_test (mpf_t rop[], unsigned int T, mpz_t a, mpz_t m) { /* Knuth "Seminumerical Algorithms, Third Edition", section 3.3.4 (pp. 101-103). */ /* v[t] = min { sqrt (x[1]^2 + ... + x[t]^2) | x[1] + a*x[2] + ... + pow (a, t-1) * x[t] is congruent to 0 (mod m) } */ /* Variables. */ unsigned int ui_t; unsigned int ui_i, ui_j, ui_k, ui_l; mpf_t f_tmp1, f_tmp2; mpz_t tmp1, tmp2, tmp3; mpz_t U[GMP_SPECT_MAXT][GMP_SPECT_MAXT], V[GMP_SPECT_MAXT][GMP_SPECT_MAXT], X[GMP_SPECT_MAXT], Y[GMP_SPECT_MAXT], Z[GMP_SPECT_MAXT]; mpz_t h, hp, r, s, p, pp, q, u, v; /* GMP inits. */ mpf_init (f_tmp1); mpf_init (f_tmp2); for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++) { for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++) { mpz_init_set_ui (U[ui_i][ui_j], 0); mpz_init_set_ui (V[ui_i][ui_j], 0); } mpz_init_set_ui (X[ui_i], 0); mpz_init_set_ui (Y[ui_i], 0); mpz_init (Z[ui_i]); } mpz_init (tmp1); mpz_init (tmp2); mpz_init (tmp3); mpz_init (h); mpz_init (hp); mpz_init (r); mpz_init (s); mpz_init (p); mpz_init (pp); mpz_init (q); mpz_init (u); mpz_init (v); /* Implementation inits. */ if (T > GMP_SPECT_MAXT) T = GMP_SPECT_MAXT; /* FIXME: Lazy. */ /* S1 [Initialize.] */ ui_t = 2 - 1; /* NOTE: `t' in description == ui_t + 1 for easy indexing */ mpz_set (h, a); mpz_set (hp, m); mpz_set_ui (p, 1); mpz_set_ui (pp, 0); mpz_set (r, a); mpz_pow_ui (s, a, 2); mpz_add_ui (s, s, 1); /* s = 1 + a^2 */ /* S2 [Euclidean step.] */ while (1) { if (g_debug > DEBUG_1) { mpz_mul (tmp1, h, pp); mpz_mul (tmp2, hp, p); mpz_sub (tmp1, tmp1, tmp2); if (mpz_cmpabs (m, tmp1)) { printf ("***BUG***: h*pp - hp*p = "); mpz_out_str (stdout, 10, tmp1); printf ("\n"); } } if (g_debug > DEBUG_2) { printf ("hp = "); mpz_out_str (stdout, 10, hp); printf ("\nh = "); mpz_out_str (stdout, 10, h); printf ("\n"); fflush (stdout); } if (mpz_sgn (h)) mpz_tdiv_q (q, hp, h); /* q = floor(hp/h) */ else mpz_set_ui (q, 1); if (g_debug > DEBUG_2) { printf ("q = "); mpz_out_str (stdout, 10, q); printf ("\n"); fflush (stdout); } mpz_mul (tmp1, q, h); mpz_sub (u, hp, tmp1); /* u = hp - q*h */ mpz_mul (tmp1, q, p); mpz_sub (v, pp, tmp1); /* v = pp - q*p */ mpz_pow_ui (tmp1, u, 2); mpz_pow_ui (tmp2, v, 2); mpz_add (tmp1, tmp1, tmp2); if (mpz_cmp (tmp1, s) < 0) { mpz_set (s, tmp1); /* s = u^2 + v^2 */ mpz_set (hp, h); /* hp = h */ mpz_set (h, u); /* h = u */ mpz_set (pp, p); /* pp = p */ mpz_set (p, v); /* p = v */ } else break; } /* S3 [Compute v2.] */ mpz_sub (u, u, h); mpz_sub (v, v, p); mpz_pow_ui (tmp1, u, 2); mpz_pow_ui (tmp2, v, 2); mpz_add (tmp1, tmp1, tmp2); if (mpz_cmp (tmp1, s) < 0) { mpz_set (s, tmp1); /* s = u^2 + v^2 */ mpz_set (hp, u); mpz_set (pp, v); } mpf_set_z (f_tmp1, s); mpf_sqrt (rop[ui_t - 1], f_tmp1); /* S4 [Advance t.] */ mpz_neg (U[0][0], h); mpz_set (U[0][1], p); mpz_neg (U[1][0], hp); mpz_set (U[1][1], pp); mpz_set (V[0][0], pp); mpz_set (V[0][1], hp); mpz_neg (V[1][0], p); mpz_neg (V[1][1], h); if (mpz_cmp_ui (pp, 0) > 0) { mpz_neg (V[0][0], V[0][0]); mpz_neg (V[0][1], V[0][1]); mpz_neg (V[1][0], V[1][0]); mpz_neg (V[1][1], V[1][1]); } while (ui_t + 1 != T) /* S4 loop */ { ui_t++; mpz_mul (r, a, r); mpz_mod (r, r, m); /* Add new row and column to U and V. They are initialized with all elements set to zero, so clearing is not necessary. */ mpz_neg (U[ui_t][0], r); /* U: First col in new row. */ mpz_set_ui (U[ui_t][ui_t], 1); /* U: Last col in new row. */ mpz_set (V[ui_t][ui_t], m); /* V: Last col in new row. */ /* "Finally, for 1 <= i < t, set q = round (vi1 * r / m), vit = vi1*r - q*m, and Ut=Ut+q*Ui */ for (ui_i = 0; ui_i < ui_t; ui_i++) { mpz_mul (tmp1, V[ui_i][0], r); /* tmp1=vi1*r */ zdiv_round (q, tmp1, m); /* q=round(vi1*r/m) */ mpz_mul (tmp2, q, m); /* tmp2=q*m */ mpz_sub (V[ui_i][ui_t], tmp1, tmp2); for (ui_j = 0; ui_j <= ui_t; ui_j++) /* U[t] = U[t] + q*U[i] */ { mpz_mul (tmp1, q, U[ui_i][ui_j]); /* tmp=q*uij */ mpz_add (U[ui_t][ui_j], U[ui_t][ui_j], tmp1); /* utj = utj + q*uij */ } } /* s = min (s, zdot (U[t], U[t]) */ vz_dot (tmp1, U[ui_t], U[ui_t], ui_t + 1); if (mpz_cmp (tmp1, s) < 0) mpz_set (s, tmp1); ui_k = ui_t; ui_j = 0; /* WARNING: ui_j no longer a temp. */ /* S5 [Transform.] */ if (g_debug > DEBUG_2) printf ("(t, k, j, q1, q2, ...)\n"); do { if (g_debug > DEBUG_2) printf ("(%u, %u, %u", ui_t + 1, ui_k + 1, ui_j + 1); for (ui_i = 0; ui_i <= ui_t; ui_i++) { if (ui_i != ui_j) { vz_dot (tmp1, V[ui_i], V[ui_j], ui_t + 1); /* tmp1=dot(Vi,Vj). */ mpz_abs (tmp2, tmp1); mpz_mul_ui (tmp2, tmp2, 2); /* tmp2 = 2*abs(dot(Vi,Vj) */ vz_dot (tmp3, V[ui_j], V[ui_j], ui_t + 1); /* tmp3=dot(Vj,Vj). */ if (mpz_cmp (tmp2, tmp3) > 0) { zdiv_round (q, tmp1, tmp3); /* q=round(Vi.Vj/Vj.Vj) */ if (g_debug > DEBUG_2) { printf (", "); mpz_out_str (stdout, 10, q); } for (ui_l = 0; ui_l <= ui_t; ui_l++) { mpz_mul (tmp1, q, V[ui_j][ui_l]); mpz_sub (V[ui_i][ui_l], V[ui_i][ui_l], tmp1); /* Vi=Vi-q*Vj */ mpz_mul (tmp1, q, U[ui_i][ui_l]); mpz_add (U[ui_j][ui_l], U[ui_j][ui_l], tmp1); /* Uj=Uj+q*Ui */ } vz_dot (tmp1, U[ui_j], U[ui_j], ui_t + 1); /* tmp1=dot(Uj,Uj) */ if (mpz_cmp (tmp1, s) < 0) /* s = min(s,dot(Uj,Uj)) */ mpz_set (s, tmp1); ui_k = ui_j; } else if (g_debug > DEBUG_2) printf (", #"); /* 2|Vi.Vj| <= Vj.Vj */ } else if (g_debug > DEBUG_2) printf (", *"); /* i == j */ } if (g_debug > DEBUG_2) printf (")\n"); /* S6 [Advance j.] */ if (ui_j == ui_t) ui_j = 0; else ui_j++; } while (ui_j != ui_k); /* S5 */ /* From Knuth p. 104: "The exhaustive search in steps S8-S10 reduces the value of s only rarely." */ #ifdef DO_SEARCH /* S7 [Prepare for search.] */ /* Find minimum in (x[1], ..., x[t]) satisfying condition x[k]^2 <= f(y[1], ...,y[t]) * dot(V[k],V[k]) */ ui_k = ui_t; if (g_debug > DEBUG_2) { printf ("searching..."); /*for (f = 0; f < ui_t*/ fflush (stdout); } /* Z[i] = floor (sqrt (floor (dot(V[i],V[i]) * s / m^2))); */ mpz_pow_ui (tmp1, m, 2); mpf_set_z (f_tmp1, tmp1); mpf_set_z (f_tmp2, s); mpf_div (f_tmp1, f_tmp2, f_tmp1); /* f_tmp1 = s/m^2 */ for (ui_i = 0; ui_i <= ui_t; ui_i++) { vz_dot (tmp1, V[ui_i], V[ui_i], ui_t + 1); mpf_set_z (f_tmp2, tmp1); mpf_mul (f_tmp2, f_tmp2, f_tmp1); f_floor (f_tmp2, f_tmp2); mpf_sqrt (f_tmp2, f_tmp2); mpz_set_f (Z[ui_i], f_tmp2); } /* S8 [Advance X[k].] */ do { if (g_debug > DEBUG_2) { printf ("X[%u] = ", ui_k); mpz_out_str (stdout, 10, X[ui_k]); printf ("\tZ[%u] = ", ui_k); mpz_out_str (stdout, 10, Z[ui_k]); printf ("\n"); fflush (stdout); } if (mpz_cmp (X[ui_k], Z[ui_k])) { mpz_add_ui (X[ui_k], X[ui_k], 1); for (ui_i = 0; ui_i <= ui_t; ui_i++) mpz_add (Y[ui_i], Y[ui_i], U[ui_k][ui_i]); /* S9 [Advance k.] */ while (++ui_k <= ui_t) { mpz_neg (X[ui_k], Z[ui_k]); mpz_mul_ui (tmp1, Z[ui_k], 2); for (ui_i = 0; ui_i <= ui_t; ui_i++) { mpz_mul (tmp2, tmp1, U[ui_k][ui_i]); mpz_sub (Y[ui_i], Y[ui_i], tmp2); } } vz_dot (tmp1, Y, Y, ui_t + 1); if (mpz_cmp (tmp1, s) < 0) mpz_set (s, tmp1); } } while (--ui_k); #endif /* DO_SEARCH */ mpf_set_z (f_tmp1, s); mpf_sqrt (rop[ui_t - 1], f_tmp1); #ifdef DO_SEARCH if (g_debug > DEBUG_2) printf ("done.\n"); #endif /* DO_SEARCH */ } /* S4 loop */ /* Clear GMP variables. */ mpf_clear (f_tmp1); mpf_clear (f_tmp2); for (ui_i = 0; ui_i < GMP_SPECT_MAXT; ui_i++) { for (ui_j = 0; ui_j < GMP_SPECT_MAXT; ui_j++) { mpz_clear (U[ui_i][ui_j]); mpz_clear (V[ui_i][ui_j]); } mpz_clear (X[ui_i]); mpz_clear (Y[ui_i]); mpz_clear (Z[ui_i]); } mpz_clear (tmp1); mpz_clear (tmp2); mpz_clear (tmp3); mpz_clear (h); mpz_clear (hp); mpz_clear (r); mpz_clear (s); mpz_clear (p); mpz_clear (pp); mpz_clear (q); mpz_clear (u); mpz_clear (v); return; }
void omega(long int n, long int m, double tau, long int q, long int k1, long int k2, mpf_t *factoriales, mpf_t pi) { mpf_t aux, aux2, aux3, sqrf, acum, Ltau, z, zelev; int j; unsigned long int qsqr; /* * Set n = min(n,m), and m = max(n,m) */ j = n; n = min(n,m); m = max(j,m); /* * The sqrt(n!m!) pre-factor */ mpf_init(aux); mpf_init(sqrf); mpf_mul(aux, factoriales[n], factoriales[m]); mpf_sqrt(sqrf, aux); /* * Calculus of tau */ mpf_init(aux2); mpf_init_set_d(Ltau, tau); mpf_init(z); mpf_init(zelev); qsqr = (unsigned long int) q; mpf_set_d(aux, (double) pow((double) k1, (double) 2)); mpf_mul(aux, aux, Ltau); mpf_set_d(aux2, (double) pow((double) k2, (double) 2)); mpf_div(aux, aux, Ltau); mpf_add(aux, aux, aux2); mpf_div_ui(aux, aux, qsqr); mpf_mul(z, aux, pi); if ( ((m-n)%2) == 0) mpf_pow_ui(zelev, z, (unsigned long int) (m-n)/2); else { mpf_pow_ui(zelev, z, m-n); mpf_sqrt(zelev, zelev); } /* mpf_pow_ui(zelev, z, m-n); *mpf_pow_ui(z, z, 2); */ /* mpf_out_str(stdout, 10, 20, z); printf("\n");*/ /* * The loop */ mpf_init(acum); mpf_init(aux3); mpf_set_ui(acum, (unsigned long int) 0); for (j=0; j <= n; j++) { mpf_mul(aux, factoriales[j], factoriales[n-j]); mpf_mul(aux2, aux, factoriales[j+m-n]); mpf_pow_ui(aux3, z, j); mpf_div(aux, aux3, aux2); if ((j%2) == 0) mpf_set(aux2, aux); else mpf_neg(aux2, aux); mpf_add(acum, acum, aux2); } mpf_mul(aux, acum, sqrf); mpf_mul(aux, aux, zelev); gmp_printf("%4d %4d %4d %4d %20.20Fe\n", n, m, k1, k2, aux); /*mpf_out_str(stdout, 10, 20, aux);*/ }
void w3j(mpf_t w, long j1, long j2, long j3, long m1, long m2, long m3) { mpq_t delta_sq,r; mpz_t i; mpf_t h; mpq_init(delta_sq); mpq_init(r); mpz_init(i); mpf_init(h); mpq_set_si(r,0,1); if(m1+m2+m3!=0) return; if((iabs(m1)>j1) || (iabs(m2)>j2) || (iabs(m3)>j3)) return; if((j3<iabs(j1-j2)) || ((j1+j2)<j3)) return; w3j_Delta_sq(delta_sq, j1, j2, j3); w3j_intterm(i, j1, j2, j3, m1, m2, m3); if(iabs(j1-j2-m3)%2 == 1) mpz_neg(i,i); w3j_sqrt_sq(r, j1, j2, j3, m1, m2, m3); mpq_mul(r,r,delta_sq); mpf_set_q(w,r); mpf_sqrt(w,w); mpf_set_z(h,i); mpf_mul(w,w,h); mpf_clear(h); mpz_clear(i); mpq_clear(r); mpq_clear(delta_sq); }