/*------------------------------------------------------------------------*/ static void search_coeffs_core(msieve_obj *obj, poly_search_t *poly, uint32 deadline) { uint32 i, j; uint32 degree = poly->degree; uint32 num_poly = poly->num_poly; uint32 mult = 0; switch (degree) { case 4: mult = 4 * 4 * 4 * 4; break; case 5: mult = 5 * 5 * 5 * 5 * 5; break; case 6: mult = 6 * 6 * 6 * 6 * 6 * 6; break; } for (i = 0; i < num_poly; i++) { curr_poly_t *c = poly->batch + i; mpz_mul_ui(c->trans_N, poly->N, (mp_limb_t)mult); for (j = 0; j < degree - 1; j++) mpz_mul(c->trans_N, c->trans_N, c->high_coeff); mpz_root(c->trans_m0, c->trans_N, (mp_limb_t)degree); mpz_tdiv_q(poly->m0, poly->N, c->high_coeff); mpz_root(poly->m0, poly->m0, (mp_limb_t)degree); c->sieve_size = c->coeff_max / mpz_get_d(poly->m0) * c->p_size_max * c->p_size_max / degree; mpz_set_d(c->mp_sieve_size, c->sieve_size); } sieve_lattice(obj, poly, 2000, 2001, 100000, num_poly * deadline); }
/* Equality of integers with up to 53 bits */ void check_onebits (void) { mpz_t x, x2; double y; int i; mpz_init_set_ui (x, 0L); mpz_init (x2); for (i = 0; i < 512; i++) { mpz_mul_2exp (x, x, 1); mpz_add_ui (x, x, 1L); y = mpz_get_d (x); mpz_set_d (x2, y); /* stop if any truncation is occurring */ if (mpz_cmp (x, x2) != 0) break; check_one ("check_onebits", x, y, 0, 0); check_one ("check_onebits", x, -y, 1, 0); mpz_neg (x, x); check_one ("check_onebits", x, y, -1, 0); check_one ("check_onebits", x, -y, 0, 0); mpz_neg (x, x); } mpz_clear (x); mpz_clear (x2); }
template<> Obj GET_INTOBJ(Z_NR<double> &v) { mpz_t z; mpz_init2 (z, 8*sizeof(double)+1); mpz_set_d(z,v.getData()); Obj o = INT_mpz(z); mpz_clear(z); return o; }
void mpz_init_set_d (mpz_ptr dest, double val) { dest->_mp_alloc = 1; dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB); dest->_mp_size = 0; mpz_set_d (dest, val); }
void check_data (void) { static const struct { double d; mp_size_t want_size; mp_limb_t want_data[2]; } data[] = { { 0.0, 0 }, { 1.0, 1, { 1 } }, { -1.0, -1, { 1 } }, { 123.0, 1, { 123 } }, { -123.0, -1, { 123 } }, }; mpz_t z; int i; for (i = 0; i < numberof (data); i++) { mpz_init (z); mpz_set_d (z, data[i].d); MPZ_CHECK_FORMAT (z); if (z->_mp_size != data[i].want_size || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data, ABS (data[i].want_size)) != 0) { printf ("mpz_set_d wrong on data[%d]\n", i); bad: d_trace (" d ", data[i].d); printf (" got size %ld\n", (long) z->_mp_size); printf (" want size %ld\n", (long) data[i].want_size); mpn_trace (" got z", z->_mp_d, z->_mp_size); mpn_trace (" want z", data[i].want_data, data[i].want_size); abort(); } mpz_clear (z); mpz_init_set_d (z, data[i].d); MPZ_CHECK_FORMAT (z); if (z->_mp_size != data[i].want_size || refmpn_cmp_allowzero (z->_mp_d, data[i].want_data, ABS (data[i].want_size)) != 0) { printf ("mpz_init_set_d wrong on data[%d]\n", i); goto bad; } mpz_clear (z); } }
Vertex::Vertex(double x, double y, double z, double radius,int index,double scale) { mpz_t temp1,temp2; this->Index = index; this->Coordinates[1] = this->NormCoordinates[1] = x; this->Coordinates[2] = this->NormCoordinates[2] = y; this->Coordinates[3] = this->NormCoordinates[3] = z; this->Radius = this->BackRadius = radius; this->Weight = this->BackWeight = pow(x,2) + pow(y,2) + pow(z,2) - pow(radius,2); for(int i=1;i<4;i++) { mpz_init(this->V[i]); mpz_set_d(this->V[i],this->Coordinates[i]*scale); } mpz_init(temp1);mpz_init(temp2); mpz_init(this->V[4]); mpz_set_d(temp1,this->Radius*(scale)); mpz_mul(temp1,temp1,temp1); mpz_mul(temp2,this->V[3],this->V[3]), mpz_sub(temp1,temp2,temp1); mpz_mul(temp2,this->V[2],this->V[2]), mpz_add(temp1,temp2,temp1); mpz_mul(temp2,this->V[1],this->V[1]), mpz_add(this->V[4],temp2,temp1); mpz_clear(temp1);mpz_clear(temp2); this->Redinfo = 0; this->ranValue = 0; this->Hull = -1; this->AlphaStatus = -1; this->Coef = 1.0; this->Rho = 0; this->Mu1 = 0; this->Mu2 = 0; this->Repeats = -1; this->ufKey = -1; this->valid = true; selected = 0; }
GmpInt::GmpInt(long double value) { const long double absValue = value >= 0.0L ? value : -value; if(absValue < 1.0L) { mData = gmpIntDataContainer().const_0(); ++(mData->mRefCount); } else { mData = gmpIntDataContainer().allocateGmpIntData (gIntDefaultNumberOfBits, false); mpz_set_d(mData->mInteger, double(value)); } }
static mul_casc * mulcascade_mul_d (mul_casc *c, const double n, ATTRIBUTE_UNUSED mpz_t t) { unsigned int i; if (mpz_sgn (c->val[0]) == 0) { mpz_set_d (c->val[0], n); return c; } mpz_mul_d (c->val[0], c->val[0], n, t); if (mpz_size (c->val[0]) <= CASCADE_THRES) return c; for (i = 1; i < c->size; i++) { if (mpz_sgn (c->val[i]) == 0) { mpz_set (c->val[i], c->val[i-1]); mpz_set_ui (c->val[i-1], 0); return c; } else { mpz_mul (c->val[i], c->val[i], c->val[i-1]); mpz_set_ui (c->val[i-1], 0); } } /* Allocate more space for cascade */ i = c->size++; c->val = (mpz_t*) realloc (c->val, c->size * sizeof (mpz_t)); if (c->val == NULL) { fprintf (stderr, "Cannot allocate memory in mulcascade_mul_d\n"); exit (1); } mpz_init (c->val[i]); mpz_swap (c->val[i], c->val[i-1]); return c; }
/* Try mpz_set_d on values 2^i+1, while such a value fits a double. */ void check_2n_plus_1 (void) { volatile double p, d, diff; mpz_t want, got; int i; mpz_init (want); mpz_init (got); p = 1.0; mpz_set_ui (want, 2L); /* gives 3 on first step */ for (i = 1; i < 500; i++) { mpz_mul_2exp (want, want, 1L); mpz_sub_ui (want, want, 1L); /* want = 2^i+1 */ p *= 2.0; /* p = 2^i */ d = p + 1.0; diff = d - p; if (diff != 1.0) break; /* rounding occurred, stop now */ mpz_set_d (got, d); MPZ_CHECK_FORMAT (got); if (mpz_cmp (got, want) != 0) { printf ("mpz_set_d wrong on 2^%d+1\n", i); d_trace (" d ", d); mpz_trace (" got ", got); mpz_trace (" want ", want); abort (); } } mpz_clear (want); mpz_clear (got); }
void sieve_xy_run_deg6(root_sieve_t *rs) { uint32 i; sieve_xyz_t *xyz = &rs->xyzdata; int64 z_base = xyz->z_base; sieve_xy_t *xy = &rs->xydata; sieve_prime_t *lattice_primes = xy->lattice_primes; uint32 num_lattice_primes; msieve_obj *obj = rs->data->obj; double direction[3] = {0, 1, 0}; double line_min, line_max; uint16 cutoff_score; xydata_t xydata[MAX_CRT_FACTORS]; plane_heap_t plane_heap; uint64 inv_xy; uint64 inv_xyz; compute_line_size(rs->max_norm, &rs->apoly, rs->dbl_p, rs->dbl_d, direction, -10000, 10000, &line_min, &line_max); if (line_min > line_max) return; num_lattice_primes = xy->num_lattice_primes = find_lattice_primes(rs->primes, rs->num_primes, xyz->lattice_size, lattice_primes, &xy->lattice_size, line_max - line_min); inv_xy = mp_modinv_2(xyz->lattice_size, xy->lattice_size); inv_xyz = mp_modinv_2(xy->lattice_size, xyz->lattice_size); uint64_2gmp(xy->lattice_size, xy->tmp1); uint64_2gmp(inv_xy, xy->tmp2); uint64_2gmp(xyz->lattice_size, xy->tmp3); uint64_2gmp(inv_xyz, xy->tmp4); mpz_mul(xy->mp_lattice_size, xy->tmp1, xy->tmp3); mpz_mul(xy->crt0, xy->tmp2, xy->tmp3); mpz_mul(xy->crt1, xy->tmp1, xy->tmp4); xy->dbl_lattice_size = mpz_get_d(xy->mp_lattice_size); xydata_alloc(lattice_primes, num_lattice_primes, xyz->lattice_size, xydata); plane_heap.num_entries = 0; for (i = 0; i < xyz->num_lattices; i++) { lattice_t *curr_lattice_xyz = xyz->lattices + i; xydata_init(xydata, num_lattice_primes, curr_lattice_xyz, z_base); find_hits(rs, xydata, num_lattice_primes, i, &plane_heap); } xydata_free(xydata, num_lattice_primes); qsort(plane_heap.entries, plane_heap.num_entries, sizeof(plane_t), compare_planes); cutoff_score = 0.9 * plane_heap.entries[0].plane.score; for (i = 0; i < plane_heap.num_entries; i++) { plane_t *curr_plane = plane_heap.entries + i; lattice_t *lattice_xy = &curr_plane->plane; lattice_t *lattice_xyz = xyz->lattices + curr_plane->which_lattice_xyz; if (lattice_xy->score < cutoff_score) break; line_min = xyz->y_line_min[curr_plane->which_z_block]; line_max = xyz->y_line_max[curr_plane->which_z_block]; z_base = xyz->z_base + curr_plane->which_z_block * xyz->lattice_size; xy->apoly = rs->apoly; xy->apoly.coeff[3] += z_base * rs->dbl_p; xy->apoly.coeff[2] -= z_base * rs->dbl_d; mpz_set_d(xy->tmp1, line_min); mpz_tdiv_q(xy->y_base, xy->tmp1, xy->mp_lattice_size); mpz_mul(xy->y_base, xy->y_base, xy->mp_lattice_size); xy->y_blocks = (line_max - line_min) / xy->dbl_lattice_size; uint64_2gmp(lattice_xy->x, xy->tmp1); uint64_2gmp(lattice_xyz->x, xy->tmp2); mpz_mul(xy->resclass_x, xy->tmp1, xy->crt0); mpz_addmul(xy->resclass_x, xy->tmp2, xy->crt1); uint64_2gmp(lattice_xy->y, xy->tmp1); uint64_2gmp(lattice_xyz->y, xy->tmp2); mpz_mul(xy->resclass_y, xy->tmp1, xy->crt0); mpz_addmul(xy->resclass_y, xy->tmp2, xy->crt1); mpz_tdiv_r(xy->resclass_x, xy->resclass_x, xy->mp_lattice_size); mpz_tdiv_r(xy->resclass_y, xy->resclass_y, xy->mp_lattice_size); xy->curr_score = lattice_xyz->score + lattice_xy->score; rs->curr_z = z_base + lattice_xyz->z; sieve_x_run_deg6(rs); if (obj->flags & MSIEVE_FLAG_STOP_SIEVING) break; } }
/* Input: p is the initial generator (sigma), if 0, generate it at random. N is the number to factor B1 is the stage 1 bound B2 is the stage 2 bound B1done is the stage 1 limit to which supplied residue has already been computed k is the number of blocks for stage 2 verbose is the verbosity level Output: f is the factor found, p is the residue at end of stage 1 Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2) */ int pm1 (mpz_t f, mpz_t p, mpz_t N, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int use_ntt, FILE *os, FILE *es, char *chkfilename, char *TreeFilename, double maxmem, gmp_randstate_t rng, int (*stop_asap)(void)) { int youpi = ECM_NO_FACTOR_FOUND; int base2 = 0; int Nbits, smallbase; int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpmod_t modulus; mpres_t x; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; faststage2_param_t faststage2_params; /* If stage2_variant != 0, we use the new fast stage 2 */ const int stage2_variant = (S == 1 || S == ECM_DEFAULT_S); set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (N, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } st = cputime (); if (mpz_cmp_ui (p, 0) == 0) pm1_random_seed (p, N, rng); mpz_init_set (B2min, B2min_parm); mpz_init_set (B2, B2_parm); /* Set default B2. See ecm.c for comments */ if (ECM_IS_DEFAULT_B2(B2)) { if (stage2_variant == 0) mpz_set_d (B2, B2scale * pow (B1 * PM1_COST, DEFAULT_B2_EXPONENT)); else mpz_set_d (B2, B2scale * pow (B1 * PM1FS2_COST, PM1FS2_DEFAULT_B2_EXPONENT)); } /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); if (repr != ECM_MOD_DEFAULT && repr != ECM_MOD_NOBASE2) { if (repr == ECM_MOD_MODMULN) mpmod_init_MODMULN (modulus, N); else if (repr == ECM_MOD_REDC) mpmod_init_REDC (modulus, N); else if (abs (repr) > 16) { if (mpmod_init_BASE2 (modulus, repr, N) == ECM_ERROR) return ECM_ERROR; } else mpmod_init_MPZ (modulus, N); } else /* automatic choice */ { /* Find a good arithmetic for this number */ Nbits = mpz_sizeinbase (N, 2); base2 = (repr == 0) ? isbase2 (N, BASE2_THRESHOLD) : 0; smallbase = mpz_fits_uint_p (p); /* TODO: make dependent on Nbits and base2 */ if (base2) { mpmod_init_BASE2 (modulus, base2, N); } else if (mpz_size (N) <= 2 * POWM_THRESHOLD && smallbase && B1 <= 1e6) /* Below POWM_THRESHOLD, mpz_powm uses MODMULN reduction, too, but without special code for small bases which makes our MODMULN faster. Above POWM_THRESHOLD mpz_powm uses faster mod reduction, at about 2*POWM_THRESHOLD it catches up with our smallbase-MODMULN and then is faster until REDC takes over. */ { outputf (OUTPUT_VERBOSE, "Using MODMULN\n"); mpmod_init_MODMULN (modulus, N); } else if (Nbits > 50000 || (Nbits > 3500 && smallbase)) { outputf (OUTPUT_VERBOSE, "Using REDC\n"); mpmod_init_REDC (modulus, N); } else { outputf (OUTPUT_VERBOSE, "Using mpz_powm\n"); mpmod_init_MPZ (modulus, N); } } /* Determine parameters (polynomial degree etc.) */ if (stage2_variant != 0) { long P_ntt, P_nontt; const unsigned long lmax = 1UL<<28; /* An upper bound */ unsigned long lmax_NTT, lmax_noNTT; faststage2_param_t params_ntt, params_nontt, *better_params; mpz_init (faststage2_params.m_1); faststage2_params.l = 0; mpz_init (params_ntt.m_1); params_ntt.l = 0; mpz_init (params_nontt.m_1); params_nontt.l = 0; /* Find out what the longest transform length is we can do at all. If no maxmem is given, the non-NTT can theoretically do any length. */ lmax_NTT = 0; if (use_ntt) { unsigned long t; /* See what transform length the NTT can handle (due to limited primes and limited memory) */ t = mpzspm_max_len (N); lmax_NTT = MIN (lmax, t); if (maxmem != 0.) { t = pm1fs2_maxlen (double_to_size (maxmem), N, use_ntt); lmax_NTT = MIN (lmax_NTT, t); } outputf (OUTPUT_DEVVERBOSE, "NTT can handle lmax <= %lu\n", lmax_NTT); /* FIXME: if both ntt and no-ntt are tried, but finally ntt is preferred, the last B2 bound computed is that of no-ntt, which is thus wrong */ P_ntt = choose_P (B2min, B2, lmax_NTT, k, ¶ms_ntt, B2min, B2, 1, ECM_PM1); if (P_ntt != ECM_ERROR) outputf (OUTPUT_DEVVERBOSE, "Parameters for NTT: P=%lu, l=%lu\n", params_ntt.P, params_ntt.l); } else P_ntt = 0; /* or GCC complains about uninitialized var */ /* See what transform length the non-NTT code can handle */ lmax_noNTT = lmax; if (maxmem != 0.) { unsigned long t; t = pm1fs2_maxlen (double_to_size (maxmem), N, 0); lmax_noNTT = MIN (lmax_noNTT, t); outputf (OUTPUT_DEVVERBOSE, "non-NTT can handle lmax <= %lu\n", lmax_noNTT); } if (use_ntt != 2) P_nontt = choose_P (B2min, B2, lmax_noNTT, k, ¶ms_nontt, B2min, B2, 0, ECM_PM1); else P_nontt = ECM_ERROR; if (P_nontt != ECM_ERROR) outputf (OUTPUT_DEVVERBOSE, "Parameters for non-NTT: P=%lu, l=%lu\n", params_nontt.P, params_nontt.l); if (((!use_ntt || P_ntt == ECM_ERROR) && P_nontt == ECM_ERROR) || (use_ntt == 2 && P_ntt == ECM_ERROR)) { outputf (OUTPUT_ERROR, "Error: cannot choose suitable P value for your stage 2 " "parameters.\nTry a shorter B2min,B2 interval.\n"); mpz_clear (faststage2_params.m_1); mpz_clear (params_ntt.m_1); mpz_clear (params_nontt.m_1); return ECM_ERROR; } /* Now decide wether to take NTT or non-NTT. How to choose the better one is not an easy question. It will depend on the speed ratio between NTT/non-NTT code, their difference in memory use and available memory. For now, we choose the one that uses a longer transform length. FIXME: Write something not brain-dead here */ if (use_ntt == 0 || P_ntt == ECM_ERROR || (use_ntt == 1 && params_nontt.l > params_ntt.l)) { better_params = ¶ms_nontt; use_ntt = 0; } else { better_params = ¶ms_ntt; use_ntt = 1; } faststage2_params.P = better_params->P; faststage2_params.s_1 = better_params->s_1; faststage2_params.s_2 = better_params->s_2; faststage2_params.l = better_params->l; mpz_set (faststage2_params.m_1, better_params->m_1); mpz_clear (params_ntt.m_1); mpz_clear (params_nontt.m_1); if (maxmem != 0.) outputf (OUTPUT_VERBOSE, "Using lmax = %lu with%s NTT which takes " "about %luMB of memory\n", faststage2_params.l, (use_ntt) ? "" : "out", pm1fs2_memory_use (faststage2_params.l, N, use_ntt)/1048576); } else { mpz_init (root_params.i0); root_params.d2 = 0; /* Enable automatic choice of d2 */ if (use_ntt || (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0)) po2 = 1; if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto clear_and_exit; } root_params.S = S; /* Set default degree for Brent-Suyama extension */ if (root_params.S == ECM_DEFAULT_S) { if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { /* For Fermat numbers, default is 2 (no Brent-Suyama) */ root_params.S = 2; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); if (mpz_cmp_d (t, 3.5e5) < 0) /* B1 < 50000 */ root_params.S = -4; /* Dickson polys give a slightly better chance of success */ else if (mpz_cmp_d (t, 1.1e7) < 0) /* B1 < 500000 */ root_params.S = -6; else if (mpz_cmp_d (t, 1.25e8) < 0) /* B1 < 3000000 */ root_params.S = 12; /* but for S>6, S-th powers are faster thanks to invtrick */ else if (mpz_cmp_d (t, 7.e9) < 0) /* B1 < 50000000 */ root_params.S = 24; else if (mpz_cmp_d (t, 1.9e10) < 0) /* B1 < 100000000 */ root_params.S = 48; else if (mpz_cmp_d (t, 5.e11) < 0) /* B1 < 1000000000 */ root_params.S = 60; else root_params.S = 120; mpz_clear (t); } } /* We need Suyama's power even and at least 2 for P-1 stage 2 to work correctly */ if (root_params.S & 1) root_params.S *= 2; /* FIXME: Is this what the user would expect? */ } /* Print B1, B2, polynomial and x0 */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_PM1, B1, *B1done, B2min_parm, B2min, B2, (stage2_variant == 0) ? root_params.S : 1, p, 0, NULL); /* If we do a stage 2, print its parameters */ if (mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) outputf (OUTPUT_VERBOSE, "P = %lu, l = %lu, s_1 = %lu, k = s_2 = %lu, " "m_1 = %Zd\n", faststage2_params.P, faststage2_params.l, faststage2_params.s_1,faststage2_params.s_2, faststage2_params.m_1); else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, root_params.i0); } if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_sgn (B2min_parm) >= 0) { outputf (OUTPUT_VERBOSE, "Can't compute success probabilities for B1 <> B2min\n"); } else { rhoinit (256, 10); print_prob (B1, B2, dF, k, (stage2_variant == 0) ? root_params.S : 1, go); } } mpres_init (x, modulus); mpres_set_z (x, p, modulus); st = cputime (); if (B1 > *B1done) youpi = pm1_stage1 (f, x, modulus, B1, B1done, go, stop_asap, chkfilename); st = elltime (st, cputime ()); outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", st); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t tx; mpz_init (tx); mpres_get_z (tx, x, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", tx); mpz_clear (tx); } if (stop_asap != NULL && (*stop_asap) ()) goto clear_and_exit; if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) { if (use_ntt) youpi = pm1fs2_ntt (f, x, modulus, &faststage2_params); else youpi = pm1fs2 (f, x, modulus, &faststage2_params); } else youpi = stage2 (f, &x, modulus, dF, k, &root_params, ECM_PM1, use_ntt, TreeFilename, stop_asap); } if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_sgn (B2min_parm) < 0) rhoinit (1, 0); /* Free memory of rhotable */ } clear_and_exit: mpres_get_z (p, x, modulus); mpres_clear (x, modulus); mpmod_clear (modulus); if (stage2_variant != 0) mpz_clear (faststage2_params.m_1); else mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; }
/* Input: p is the initial generator (sigma), if 0 generate it at random. n is the number to factor B1 is the stage 1 bound B2 is the stage 2 bound k is the number of blocks for stage 2 verbose is the verbosity level Output: p is the factor found Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2) */ int pp1 (mpz_t f, mpz_t p, mpz_t n, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int use_ntt, FILE *os, FILE *es, char *chkfilename, char *TreeFilename, double maxmem, gmp_randstate_t rng, int (*stop_asap)(void)) { int youpi = ECM_NO_FACTOR_FOUND; int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpres_t a; mpmod_t modulus; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; faststage2_param_t faststage2_params; const int stage2_variant = (S == 1 || S == ECM_DEFAULT_S); int twopass = 0; set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (n, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } st = cputime (); if (mpz_cmp_ui (p, 0) == 0) pm1_random_seed (p, n, rng); mpz_init_set (B2min, B2min_parm); mpz_init_set (B2, B2_parm); /* Set default B2. See ecm.c for comments */ if (ECM_IS_DEFAULT_B2(B2)) { if (stage2_variant == 0) mpz_set_d (B2, B2scale * pow (B1 * PP1_COST, DEFAULT_B2_EXPONENT)); else mpz_set_d (B2, B2scale * pow (B1 * PP1FS2_COST, PM1FS2_DEFAULT_B2_EXPONENT)); } /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); mpmod_init (modulus, n, repr); if (use_ntt) po2 = 1; if (stage2_variant != 0) { long P; const unsigned long lmax = 1UL<<28; /* An upper bound */ unsigned long lmax_NTT, lmax_noNTT; mpz_init (faststage2_params.m_1); faststage2_params.l = 0; /* Find out what the longest transform length is we can do at all. If no maxmem is given, the non-NTT can theoretically do any length. */ lmax_NTT = 0; if (use_ntt) { unsigned long t, t2 = 0; /* See what transform length that the NTT can handle (due to limited primes and limited memory) */ t = mpzspm_max_len (n); lmax_NTT = MIN (lmax, t); if (maxmem != 0.) { t = pp1fs2_maxlen (double_to_size (maxmem), n, use_ntt, 0); t = MIN (t, lmax_NTT); /* Maybe the two pass variant lets us use a longer transform */ t2 = pp1fs2_maxlen (double_to_size (maxmem), n, use_ntt, 1); t2 = MIN (t2, lmax_NTT); if (t2 > t) { t = t2; twopass = 1; } lmax_NTT = t; } outputf (OUTPUT_DEVVERBOSE, "NTT can handle lmax <= %lu\n", lmax_NTT); } /* See what transform length that the non-NTT code can handle */ lmax_noNTT = lmax; if (maxmem != 0.) { unsigned long t; t = pp1fs2_maxlen (double_to_size (maxmem), n, 0, 0); lmax_noNTT = MIN (lmax_noNTT, t); outputf (OUTPUT_DEVVERBOSE, "non-NTT can handle lmax <= %lu\n", lmax_noNTT); } P = choose_P (B2min, B2, MAX(lmax_noNTT, lmax_NTT), k, &faststage2_params, B2min, B2, use_ntt, ECM_PP1); if (P == ECM_ERROR) { outputf (OUTPUT_ERROR, "Error: cannot choose suitable P value for your stage 2 " "parameters.\nTry a shorter B2min,B2 interval.\n"); mpz_clear (faststage2_params.m_1); return ECM_ERROR; } /* See if the selected parameters let us use NTT or not */ if (faststage2_params.l > lmax_NTT) use_ntt = 0; if (maxmem != 0.) { unsigned long MB; char *s; if (!use_ntt) s = "out"; else if (twopass) s = " two pass"; else s = " one pass"; MB = pp1fs2_memory_use (faststage2_params.l, n, use_ntt, twopass) / 1048576; outputf (OUTPUT_VERBOSE, "Using lmax = %lu with%s NTT which takes " "about %luMB of memory\n", faststage2_params.l, s, MB); } } else { mpz_init (root_params.i0); root_params.d2 = 0; /* Enable automatic choice of d2 */ if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto clear_and_exit; } /* Set default degree for Brent-Suyama extension */ root_params.S = S; if (root_params.S == ECM_DEFAULT_S) { if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { /* For Fermat numbers, default is 1 (no Brent-Suyama) */ root_params.S = 1; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); root_params.S = choose_S (t); mpz_clear (t); } } } /* Print B1, B2, polynomial and x0 */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_PP1, B1, *B1done, B2min_parm, B2min, B2, (stage2_variant == 0) ? root_params.S : 1, p, 0, NULL); /* If we do a stage 2, print its parameters */ if (mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) outputf (OUTPUT_VERBOSE, "P = %lu, l = %lu, s_1 = %lu, k = s_2 = %lu, " "m_1 = %Zd\n", faststage2_params.P, faststage2_params.l, faststage2_params.s_1,faststage2_params.s_2, faststage2_params.m_1); else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, S == 1 ? faststage2_params.m_1 : root_params.i0); } mpres_init (a, modulus); mpres_set_z (a, p, modulus); /* since pp1_mul_prac takes an ecm_uint, we have to check that B1 <= ECM_UINT_MAX */ if (B1 > (double) ECM_UINT_MAX) { outputf (OUTPUT_ERROR, "Error, maximal step1 bound for P+1 is %lu\n", ECM_UINT_MAX); youpi = ECM_ERROR; goto clear_and_exit; } if (B1 > *B1done) youpi = pp1_stage1 (f, a, modulus, B1, B1done, go, stop_asap, chkfilename); outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ())); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; mpz_init (t); mpres_get_z (t, a, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", t); mpz_clear (t); } mpres_get_z (p, a, modulus); if (stop_asap != NULL && (*stop_asap) ()) goto clear_and_exit; if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) { if (use_ntt) youpi = pp1fs2_ntt (f, a, modulus, &faststage2_params, twopass); else youpi = pp1fs2 (f, a, modulus, &faststage2_params); } else youpi = stage2 (f, &a, modulus, dF, k, &root_params, ECM_PP1, use_ntt, TreeFilename, stop_asap); } if (youpi > 0 && test_verbose (OUTPUT_NORMAL)) pp1_check_factor (p, f); /* tell user if factor was found by P-1 */ clear_and_exit: mpres_clear (a, modulus); mpmod_clear (modulus); if (stage2_variant != 0) mpz_clear (faststage2_params.m_1); else mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; }
void testmain (int argc, char **argv) { unsigned i; mpz_t x; for (i = 0; values[i].s; i++) { char *s; mpz_init_set_d (x, values[i].d); s = mpz_get_str (NULL, 16, x); if (strcmp (s, values[i].s) != 0) { fprintf (stderr, "mpz_set_d failed:\n" "d = %.20g\n" "s = %s\n" "r = %s\n", values[i].d, s, values[i].s); abort (); } testfree (s); mpz_clear (x); } mpz_init (x); for (i = 0; i < COUNT; i++) { /* Use volatile, to avoid extended precision in floating point registers, e.g., on m68k and 80387. */ volatile double d, f; unsigned long m; int e; mini_rrandomb (x, GMP_LIMB_BITS); m = mpz_get_ui (x); mini_urandomb (x, 8); e = mpz_get_ui (x) - 100; d = ldexp ((double) m, e); mpz_set_d (x, d); f = mpz_get_d (x); if (f != floor (d)) { fprintf (stderr, "mpz_set_d/mpz_get_d failed:\n"); goto dumperror; } if ((f == d) ? (mpz_cmp_d (x, d) != 0) : (mpz_cmp_d (x, d) >= 0)) { fprintf (stderr, "mpz_cmp_d (x, d) failed:\n"); goto dumperror; } f = d + 1.0; if (f > d && ! (mpz_cmp_d (x, f) < 0)) { fprintf (stderr, "mpz_cmp_d (x, f) failed:\n"); goto dumperror; } d = - d; mpz_set_d (x, d); f = mpz_get_d (x); if (f != ceil (d)) { fprintf (stderr, "mpz_set_d/mpz_get_d failed:\n"); dumperror: dump ("x", x); fprintf (stderr, "m = %lx, e = %i\n", m, e); fprintf (stderr, "d = %.15g\n", d); fprintf (stderr, "f = %.15g\n", f); fprintf (stderr, "f - d = %.5g\n", f - d); abort (); } if ((f == d) ? (mpz_cmp_d (x, d) != 0) : (mpz_cmp_d (x, d) <= 0)) { fprintf (stderr, "mpz_cmp_d (x, d) failed:\n"); goto dumperror; } f = d - 1.0; if (f < d && ! (mpz_cmp_d (x, f) > 0)) { fprintf (stderr, "mpz_cmp_d (x, f) failed:\n"); goto dumperror; } } mpz_clear (x); }
/*-------------------------------------------------------------------*/ void alg_square_root(msieve_obj *obj, mp_poly_t *mp_alg_poly, mp_t *n, mp_t *c, signed_mp_t *m1, signed_mp_t *m0, abpair_t *rlist, uint32 num_relations, uint32 check_q, mp_t *sqrt_a) { /* external interface for computing the algebraic square root */ uint32 i; gmp_poly_t alg_poly; gmp_poly_t d_alg_poly; gmp_poly_t prod; gmp_poly_t alg_sqrt; relation_prod_t prodinfo; double log2_prodsize; mpz_t q; /* initialize */ mpz_init(q); gmp_poly_init(&alg_poly); gmp_poly_init(&d_alg_poly); gmp_poly_init(&prod); gmp_poly_init(&alg_sqrt); /* convert the algebraic poly to arbitrary precision */ for (i = 0; i < mp_alg_poly->degree; i++) { signed_mp_t *coeff = mp_alg_poly->coeff + i; mp2gmp(&coeff->num, alg_poly.coeff[i]); if (coeff->sign == NEGATIVE) mpz_neg(alg_poly.coeff[i], alg_poly.coeff[i]); } alg_poly.degree = mp_alg_poly->degree - 1; /* multiply all the relations together */ prodinfo.monic_poly = &alg_poly; prodinfo.rlist = rlist; prodinfo.c = c; logprintf(obj, "multiplying %u relations\n", num_relations); multiply_relations(&prodinfo, 0, num_relations - 1, &prod); logprintf(obj, "multiply complete, coefficients have about " "%3.2lf million bits\n", (double)mpz_sizeinbase(prod.coeff[0], 2) / 1e6); /* perform a sanity check on the result */ i = verify_product(&prod, rlist, num_relations, check_q, c, mp_alg_poly); free(rlist); if (i == 0) { logprintf(obj, "error: relation product is incorrect\n"); goto finished; } /* multiply by the square of the derivative of alg_poly; this will guarantee that the square root of prod actually is an element of the number field defined by alg_poly. If we didn't do this, we run the risk of the main Newton iteration not converging */ gmp_poly_monic_derivative(&alg_poly, &d_alg_poly); gmp_poly_mul(&d_alg_poly, &d_alg_poly, &alg_poly, 0); gmp_poly_mul(&prod, &d_alg_poly, &alg_poly, 1); /* pick the initial small prime to start the Newton iteration. To save both time and memory, choose an initial prime such that squaring it a large number of times will produce a value just a little larger than we need to calculate the square root. Note that contrary to what some authors write, pretty much any starting prime is okay. The Newton iteration has a division by 2, so that 2 must be invertible mod the prime (this is guaranteed for odd primes). Also, the Newton iteration will fail if both square roots have the same value mod the prime; however, even a 16-bit prime makes this very unlikely */ i = mpz_size(prod.coeff[0]); log2_prodsize = (double)GMP_LIMB_BITS * (i - 2) + log(mpz_getlimbn(prod.coeff[0], (mp_size_t)(i-1)) * pow(2.0, (double)GMP_LIMB_BITS) + mpz_getlimbn(prod.coeff[0], (mp_size_t)(i-2))) / M_LN2 + 10000; while (log2_prodsize > 31.5) log2_prodsize *= 0.5; mpz_set_d(q, (uint32)pow(2.0, log2_prodsize) + 1); /* get the initial inverse square root */ if (!get_initial_inv_sqrt(obj, mp_alg_poly, &prod, &alg_sqrt, q)) { goto finished; } /* compute the actual square root */ if (get_final_sqrt(obj, &alg_poly, &prod, &alg_sqrt, q)) convert_to_integer(&alg_sqrt, n, c, m1, m0, sqrt_a); finished: gmp_poly_clear(&prod); gmp_poly_clear(&alg_sqrt); gmp_poly_clear(&alg_poly); gmp_poly_clear(&d_alg_poly); mpz_clear(q); }
/** set the time of the present event (double format) * \param time the new time */ inline void setTime(double time) { _dTime = time; mpz_set_d(_timeOfEvent, rint(_dTime / _tick)); };
/*-------------------------------------------------------------------------*/ void sieve_xy_run_deg5(root_sieve_t *rs, uint64 lattice_size, double line_min, double line_max) { uint32 i, j; sieve_xy_t *xy = &rs->xydata; hit_t hitlist[MAX_CRT_FACTORS]; uint32 num_lattice_primes; uint32 num_lattices; uint32 y_blocks; int64 curr_y; double direction[3] = {0, 1, 0}; xy->lattice_size = lattice_size; xy->dbl_lattice_size = (double)lattice_size; uint64_2gmp(lattice_size, xy->mp_lattice_size); mpz_set_d(xy->y_base, line_min / lattice_size - 1); mpz_mul(xy->y_base, xy->y_base, xy->mp_lattice_size); y_blocks = (line_max - line_min) / lattice_size + 1; if (y_blocks > xy->y_blocks) { xy->x_line_min = (double *)xrealloc(xy->x_line_min, y_blocks * sizeof(double)); xy->x_line_max = (double *)xrealloc(xy->x_line_max, y_blocks * sizeof(double)); } xy->y_blocks = y_blocks; xy->num_lattices = 0; if (lattice_size == 1) { num_lattice_primes = xy->num_lattice_primes = 0; num_lattices = 1; if (num_lattices > xy->num_lattices) { xy->lattices = (lattice_t *)xrealloc(xy->lattices, num_lattices * sizeof(lattice_t)); } memset(xy->lattices, 0, sizeof(lattice_t)); } else { num_lattice_primes = xy->num_lattice_primes = find_lattice_primes(rs->primes, rs->num_primes, lattice_size, xy->lattice_primes); find_hits(xy->lattice_primes, num_lattice_primes, hitlist); for (i = 0, num_lattices = 1; i < num_lattice_primes; i++) { num_lattices *= hitlist[i].num_roots; } if (num_lattices > xy->num_lattices) { xy->lattices = (lattice_t *)xrealloc(xy->lattices, num_lattices * sizeof(lattice_t)); } compute_lattices(hitlist, num_lattice_primes, xy->lattices, lattice_size, num_lattices, 2); } xy->num_lattices = num_lattices; line_min = -10000; line_max = 10000; direction[0] = 1; direction[1] = 0; direction[2] = 0; curr_y = gmp2int64(xy->y_base); for (i = 0; i < y_blocks; i++) { dpoly_t apoly = rs->apoly; apoly.coeff[2] += rs->dbl_p * curr_y; apoly.coeff[1] -= rs->dbl_d * curr_y; compute_line_size(rs->max_norm, &apoly, rs->dbl_p, rs->dbl_d, direction, line_min, line_max, &line_min, &line_max); if (line_min >= line_max) { xy->x_line_min[i] = 0; xy->x_line_max[i] = 0; line_min = -10000; line_max = 10000; } else { xy->x_line_min[i] = line_min; xy->x_line_max[i] = line_max; } curr_y += lattice_size; } for (i = y_blocks; i; i--) { if (xy->x_line_min[i-1] != xy->x_line_max[i-1]) break; } y_blocks = i; for (i = 0; i < y_blocks; i++) { if (xy->x_line_min[i] != xy->x_line_max[i]) break; } mpz_addmul_ui(xy->y_base, xy->mp_lattice_size, i); y_blocks -= i; if (i > 0) { for (j = 0; j < y_blocks; j++) { xy->x_line_min[j] = xy->x_line_min[j+i]; xy->x_line_max[j] = xy->x_line_max[j+i]; } } xy->y_blocks = y_blocks; #if 0 printf("\n%.0lf %u %u\n", (double)lattice_size, y_blocks, num_lattices); #endif sieve_x_run_deg5(rs); }
vanilla::int_object::gmp_mpz_wrapper::gmp_mpz_wrapper(double op) : _mpz(), _valid(true) { mpz_init(_mpz); mpz_set_d(_mpz, op); }