// We can ignore scale(i) here, because it factors out. efloat_t DParrayConstrained::path_P(const vector<int>& g_path) const { const int I = size()-1; int i=I; int l=g_path.size()-1; int state2 = g_path[l]; vector<double> transition(nstates()); efloat_t Pr=1.0; while (l>0) { transition.resize(states(i).size()); for(int s1=0; s1<transition.size(); s1++) { int state1 = states(i)[s1]; transition[s1] = (*this)(i,state1)*GQ(state1,state2); } int state1 = g_path[l-1]; int s1 = find_index(states(i),state1); assert(s1 != -1); double p = choose_P(s1,transition); assert(p > 0.0); if (di(state1)) i--; l--; state2 = state1; Pr *= p; assert(Pr > 0.0); } // In column 0, all states are allowed: // - silent states can't contradict anything // - non-silent states reprent the start state. transition.resize(states(0).size()); // include probability of choosing 'Start' vs ---+ ! for(int state1=0; state1<nstates(); state1++) transition[state1] = (*this)(0,state1) * GQ(state1,state2); // Get the probability that the previous state was 'Start' double p=0.0; for(int state1=0; state1<nstates(); state1++) if (not silent(state1)) p += choose_P(state1,transition); Pr *= p; assert(Pr > 0.0); return Pr; }
// We can ignore scale(i) here, because it factors out. efloat_t DParray::path_P(const vector<int>& g_path) const { const int I = size()-1; int i=I; int l=g_path.size()-1; int state2 = g_path[l]; vector<double> transition(nstates()); efloat_t Pr=1.0; while (l>0) { for(int state1=0; state1<nstates(); state1++) transition[state1] = (*this)(i,state1)*GQ(state1,state2); int state1 = g_path[l-1]; double p = choose_P(state1,transition); assert(p > 0.0); if (di(state1)) i--; l--; state2 = state1; Pr *= p; } // include probability of choosing 'Start' vs ---+ ! for(int state1=0; state1<nstates(); state1++) transition[state1] = (*this)(0,state1) * GQ(state1,state2); // Get the probability that the previous state was 'Start' double p=0.0; for(int state1=0; state1<nstates(); state1++) if (not silent(state1)) p += choose_P(state1,transition); Pr *= p; assert(Pr > 0.0); return Pr; }
/* Input: p is the initial generator (sigma), if 0, generate it at random. N is the number to factor B1 is the stage 1 bound B2 is the stage 2 bound B1done is the stage 1 limit to which supplied residue has already been computed k is the number of blocks for stage 2 verbose is the verbosity level Output: f is the factor found, p is the residue at end of stage 1 Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2) */ int pm1 (mpz_t f, mpz_t p, mpz_t N, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int use_ntt, FILE *os, FILE *es, char *chkfilename, char *TreeFilename, double maxmem, gmp_randstate_t rng, int (*stop_asap)(void)) { int youpi = ECM_NO_FACTOR_FOUND; int base2 = 0; int Nbits, smallbase; int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpmod_t modulus; mpres_t x; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; faststage2_param_t faststage2_params; /* If stage2_variant != 0, we use the new fast stage 2 */ const int stage2_variant = (S == 1 || S == ECM_DEFAULT_S); set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (N, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } st = cputime (); if (mpz_cmp_ui (p, 0) == 0) pm1_random_seed (p, N, rng); mpz_init_set (B2min, B2min_parm); mpz_init_set (B2, B2_parm); /* Set default B2. See ecm.c for comments */ if (ECM_IS_DEFAULT_B2(B2)) { if (stage2_variant == 0) mpz_set_d (B2, B2scale * pow (B1 * PM1_COST, DEFAULT_B2_EXPONENT)); else mpz_set_d (B2, B2scale * pow (B1 * PM1FS2_COST, PM1FS2_DEFAULT_B2_EXPONENT)); } /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); if (repr != ECM_MOD_DEFAULT && repr != ECM_MOD_NOBASE2) { if (repr == ECM_MOD_MODMULN) mpmod_init_MODMULN (modulus, N); else if (repr == ECM_MOD_REDC) mpmod_init_REDC (modulus, N); else if (abs (repr) > 16) { if (mpmod_init_BASE2 (modulus, repr, N) == ECM_ERROR) return ECM_ERROR; } else mpmod_init_MPZ (modulus, N); } else /* automatic choice */ { /* Find a good arithmetic for this number */ Nbits = mpz_sizeinbase (N, 2); base2 = (repr == 0) ? isbase2 (N, BASE2_THRESHOLD) : 0; smallbase = mpz_fits_uint_p (p); /* TODO: make dependent on Nbits and base2 */ if (base2) { mpmod_init_BASE2 (modulus, base2, N); } else if (mpz_size (N) <= 2 * POWM_THRESHOLD && smallbase && B1 <= 1e6) /* Below POWM_THRESHOLD, mpz_powm uses MODMULN reduction, too, but without special code for small bases which makes our MODMULN faster. Above POWM_THRESHOLD mpz_powm uses faster mod reduction, at about 2*POWM_THRESHOLD it catches up with our smallbase-MODMULN and then is faster until REDC takes over. */ { outputf (OUTPUT_VERBOSE, "Using MODMULN\n"); mpmod_init_MODMULN (modulus, N); } else if (Nbits > 50000 || (Nbits > 3500 && smallbase)) { outputf (OUTPUT_VERBOSE, "Using REDC\n"); mpmod_init_REDC (modulus, N); } else { outputf (OUTPUT_VERBOSE, "Using mpz_powm\n"); mpmod_init_MPZ (modulus, N); } } /* Determine parameters (polynomial degree etc.) */ if (stage2_variant != 0) { long P_ntt, P_nontt; const unsigned long lmax = 1UL<<28; /* An upper bound */ unsigned long lmax_NTT, lmax_noNTT; faststage2_param_t params_ntt, params_nontt, *better_params; mpz_init (faststage2_params.m_1); faststage2_params.l = 0; mpz_init (params_ntt.m_1); params_ntt.l = 0; mpz_init (params_nontt.m_1); params_nontt.l = 0; /* Find out what the longest transform length is we can do at all. If no maxmem is given, the non-NTT can theoretically do any length. */ lmax_NTT = 0; if (use_ntt) { unsigned long t; /* See what transform length the NTT can handle (due to limited primes and limited memory) */ t = mpzspm_max_len (N); lmax_NTT = MIN (lmax, t); if (maxmem != 0.) { t = pm1fs2_maxlen (double_to_size (maxmem), N, use_ntt); lmax_NTT = MIN (lmax_NTT, t); } outputf (OUTPUT_DEVVERBOSE, "NTT can handle lmax <= %lu\n", lmax_NTT); /* FIXME: if both ntt and no-ntt are tried, but finally ntt is preferred, the last B2 bound computed is that of no-ntt, which is thus wrong */ P_ntt = choose_P (B2min, B2, lmax_NTT, k, ¶ms_ntt, B2min, B2, 1, ECM_PM1); if (P_ntt != ECM_ERROR) outputf (OUTPUT_DEVVERBOSE, "Parameters for NTT: P=%lu, l=%lu\n", params_ntt.P, params_ntt.l); } else P_ntt = 0; /* or GCC complains about uninitialized var */ /* See what transform length the non-NTT code can handle */ lmax_noNTT = lmax; if (maxmem != 0.) { unsigned long t; t = pm1fs2_maxlen (double_to_size (maxmem), N, 0); lmax_noNTT = MIN (lmax_noNTT, t); outputf (OUTPUT_DEVVERBOSE, "non-NTT can handle lmax <= %lu\n", lmax_noNTT); } if (use_ntt != 2) P_nontt = choose_P (B2min, B2, lmax_noNTT, k, ¶ms_nontt, B2min, B2, 0, ECM_PM1); else P_nontt = ECM_ERROR; if (P_nontt != ECM_ERROR) outputf (OUTPUT_DEVVERBOSE, "Parameters for non-NTT: P=%lu, l=%lu\n", params_nontt.P, params_nontt.l); if (((!use_ntt || P_ntt == ECM_ERROR) && P_nontt == ECM_ERROR) || (use_ntt == 2 && P_ntt == ECM_ERROR)) { outputf (OUTPUT_ERROR, "Error: cannot choose suitable P value for your stage 2 " "parameters.\nTry a shorter B2min,B2 interval.\n"); mpz_clear (faststage2_params.m_1); mpz_clear (params_ntt.m_1); mpz_clear (params_nontt.m_1); return ECM_ERROR; } /* Now decide wether to take NTT or non-NTT. How to choose the better one is not an easy question. It will depend on the speed ratio between NTT/non-NTT code, their difference in memory use and available memory. For now, we choose the one that uses a longer transform length. FIXME: Write something not brain-dead here */ if (use_ntt == 0 || P_ntt == ECM_ERROR || (use_ntt == 1 && params_nontt.l > params_ntt.l)) { better_params = ¶ms_nontt; use_ntt = 0; } else { better_params = ¶ms_ntt; use_ntt = 1; } faststage2_params.P = better_params->P; faststage2_params.s_1 = better_params->s_1; faststage2_params.s_2 = better_params->s_2; faststage2_params.l = better_params->l; mpz_set (faststage2_params.m_1, better_params->m_1); mpz_clear (params_ntt.m_1); mpz_clear (params_nontt.m_1); if (maxmem != 0.) outputf (OUTPUT_VERBOSE, "Using lmax = %lu with%s NTT which takes " "about %luMB of memory\n", faststage2_params.l, (use_ntt) ? "" : "out", pm1fs2_memory_use (faststage2_params.l, N, use_ntt)/1048576); } else { mpz_init (root_params.i0); root_params.d2 = 0; /* Enable automatic choice of d2 */ if (use_ntt || (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0)) po2 = 1; if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto clear_and_exit; } root_params.S = S; /* Set default degree for Brent-Suyama extension */ if (root_params.S == ECM_DEFAULT_S) { if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { /* For Fermat numbers, default is 2 (no Brent-Suyama) */ root_params.S = 2; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); if (mpz_cmp_d (t, 3.5e5) < 0) /* B1 < 50000 */ root_params.S = -4; /* Dickson polys give a slightly better chance of success */ else if (mpz_cmp_d (t, 1.1e7) < 0) /* B1 < 500000 */ root_params.S = -6; else if (mpz_cmp_d (t, 1.25e8) < 0) /* B1 < 3000000 */ root_params.S = 12; /* but for S>6, S-th powers are faster thanks to invtrick */ else if (mpz_cmp_d (t, 7.e9) < 0) /* B1 < 50000000 */ root_params.S = 24; else if (mpz_cmp_d (t, 1.9e10) < 0) /* B1 < 100000000 */ root_params.S = 48; else if (mpz_cmp_d (t, 5.e11) < 0) /* B1 < 1000000000 */ root_params.S = 60; else root_params.S = 120; mpz_clear (t); } } /* We need Suyama's power even and at least 2 for P-1 stage 2 to work correctly */ if (root_params.S & 1) root_params.S *= 2; /* FIXME: Is this what the user would expect? */ } /* Print B1, B2, polynomial and x0 */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_PM1, B1, *B1done, B2min_parm, B2min, B2, (stage2_variant == 0) ? root_params.S : 1, p, 0, NULL); /* If we do a stage 2, print its parameters */ if (mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) outputf (OUTPUT_VERBOSE, "P = %lu, l = %lu, s_1 = %lu, k = s_2 = %lu, " "m_1 = %Zd\n", faststage2_params.P, faststage2_params.l, faststage2_params.s_1,faststage2_params.s_2, faststage2_params.m_1); else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, root_params.i0); } if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_sgn (B2min_parm) >= 0) { outputf (OUTPUT_VERBOSE, "Can't compute success probabilities for B1 <> B2min\n"); } else { rhoinit (256, 10); print_prob (B1, B2, dF, k, (stage2_variant == 0) ? root_params.S : 1, go); } } mpres_init (x, modulus); mpres_set_z (x, p, modulus); st = cputime (); if (B1 > *B1done) youpi = pm1_stage1 (f, x, modulus, B1, B1done, go, stop_asap, chkfilename); st = elltime (st, cputime ()); outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", st); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t tx; mpz_init (tx); mpres_get_z (tx, x, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", tx); mpz_clear (tx); } if (stop_asap != NULL && (*stop_asap) ()) goto clear_and_exit; if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) { if (use_ntt) youpi = pm1fs2_ntt (f, x, modulus, &faststage2_params); else youpi = pm1fs2 (f, x, modulus, &faststage2_params); } else youpi = stage2 (f, &x, modulus, dF, k, &root_params, ECM_PM1, use_ntt, TreeFilename, stop_asap); } if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_sgn (B2min_parm) < 0) rhoinit (1, 0); /* Free memory of rhotable */ } clear_and_exit: mpres_get_z (p, x, modulus); mpres_clear (x, modulus); mpmod_clear (modulus); if (stage2_variant != 0) mpz_clear (faststage2_params.m_1); else mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; }
/* Input: p is the initial generator (sigma), if 0 generate it at random. n is the number to factor B1 is the stage 1 bound B2 is the stage 2 bound k is the number of blocks for stage 2 verbose is the verbosity level Output: p is the factor found Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2) */ int pp1 (mpz_t f, mpz_t p, mpz_t n, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int use_ntt, FILE *os, FILE *es, char *chkfilename, char *TreeFilename, double maxmem, gmp_randstate_t rng, int (*stop_asap)(void)) { int youpi = ECM_NO_FACTOR_FOUND; int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpres_t a; mpmod_t modulus; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; faststage2_param_t faststage2_params; const int stage2_variant = (S == 1 || S == ECM_DEFAULT_S); int twopass = 0; set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (n, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } st = cputime (); if (mpz_cmp_ui (p, 0) == 0) pm1_random_seed (p, n, rng); mpz_init_set (B2min, B2min_parm); mpz_init_set (B2, B2_parm); /* Set default B2. See ecm.c for comments */ if (ECM_IS_DEFAULT_B2(B2)) { if (stage2_variant == 0) mpz_set_d (B2, B2scale * pow (B1 * PP1_COST, DEFAULT_B2_EXPONENT)); else mpz_set_d (B2, B2scale * pow (B1 * PP1FS2_COST, PM1FS2_DEFAULT_B2_EXPONENT)); } /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); mpmod_init (modulus, n, repr); if (use_ntt) po2 = 1; if (stage2_variant != 0) { long P; const unsigned long lmax = 1UL<<28; /* An upper bound */ unsigned long lmax_NTT, lmax_noNTT; mpz_init (faststage2_params.m_1); faststage2_params.l = 0; /* Find out what the longest transform length is we can do at all. If no maxmem is given, the non-NTT can theoretically do any length. */ lmax_NTT = 0; if (use_ntt) { unsigned long t, t2 = 0; /* See what transform length that the NTT can handle (due to limited primes and limited memory) */ t = mpzspm_max_len (n); lmax_NTT = MIN (lmax, t); if (maxmem != 0.) { t = pp1fs2_maxlen (double_to_size (maxmem), n, use_ntt, 0); t = MIN (t, lmax_NTT); /* Maybe the two pass variant lets us use a longer transform */ t2 = pp1fs2_maxlen (double_to_size (maxmem), n, use_ntt, 1); t2 = MIN (t2, lmax_NTT); if (t2 > t) { t = t2; twopass = 1; } lmax_NTT = t; } outputf (OUTPUT_DEVVERBOSE, "NTT can handle lmax <= %lu\n", lmax_NTT); } /* See what transform length that the non-NTT code can handle */ lmax_noNTT = lmax; if (maxmem != 0.) { unsigned long t; t = pp1fs2_maxlen (double_to_size (maxmem), n, 0, 0); lmax_noNTT = MIN (lmax_noNTT, t); outputf (OUTPUT_DEVVERBOSE, "non-NTT can handle lmax <= %lu\n", lmax_noNTT); } P = choose_P (B2min, B2, MAX(lmax_noNTT, lmax_NTT), k, &faststage2_params, B2min, B2, use_ntt, ECM_PP1); if (P == ECM_ERROR) { outputf (OUTPUT_ERROR, "Error: cannot choose suitable P value for your stage 2 " "parameters.\nTry a shorter B2min,B2 interval.\n"); mpz_clear (faststage2_params.m_1); return ECM_ERROR; } /* See if the selected parameters let us use NTT or not */ if (faststage2_params.l > lmax_NTT) use_ntt = 0; if (maxmem != 0.) { unsigned long MB; char *s; if (!use_ntt) s = "out"; else if (twopass) s = " two pass"; else s = " one pass"; MB = pp1fs2_memory_use (faststage2_params.l, n, use_ntt, twopass) / 1048576; outputf (OUTPUT_VERBOSE, "Using lmax = %lu with%s NTT which takes " "about %luMB of memory\n", faststage2_params.l, s, MB); } } else { mpz_init (root_params.i0); root_params.d2 = 0; /* Enable automatic choice of d2 */ if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto clear_and_exit; } /* Set default degree for Brent-Suyama extension */ root_params.S = S; if (root_params.S == ECM_DEFAULT_S) { if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { /* For Fermat numbers, default is 1 (no Brent-Suyama) */ root_params.S = 1; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); root_params.S = choose_S (t); mpz_clear (t); } } } /* Print B1, B2, polynomial and x0 */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_PP1, B1, *B1done, B2min_parm, B2min, B2, (stage2_variant == 0) ? root_params.S : 1, p, 0, NULL); /* If we do a stage 2, print its parameters */ if (mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) outputf (OUTPUT_VERBOSE, "P = %lu, l = %lu, s_1 = %lu, k = s_2 = %lu, " "m_1 = %Zd\n", faststage2_params.P, faststage2_params.l, faststage2_params.s_1,faststage2_params.s_2, faststage2_params.m_1); else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, S == 1 ? faststage2_params.m_1 : root_params.i0); } mpres_init (a, modulus); mpres_set_z (a, p, modulus); /* since pp1_mul_prac takes an ecm_uint, we have to check that B1 <= ECM_UINT_MAX */ if (B1 > (double) ECM_UINT_MAX) { outputf (OUTPUT_ERROR, "Error, maximal step1 bound for P+1 is %lu\n", ECM_UINT_MAX); youpi = ECM_ERROR; goto clear_and_exit; } if (B1 > *B1done) youpi = pp1_stage1 (f, a, modulus, B1, B1done, go, stop_asap, chkfilename); outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ())); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; mpz_init (t); mpres_get_z (t, a, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", t); mpz_clear (t); } mpres_get_z (p, a, modulus); if (stop_asap != NULL && (*stop_asap) ()) goto clear_and_exit; if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) { if (use_ntt) youpi = pp1fs2_ntt (f, a, modulus, &faststage2_params, twopass); else youpi = pp1fs2 (f, a, modulus, &faststage2_params); } else youpi = stage2 (f, &a, modulus, dF, k, &root_params, ECM_PP1, use_ntt, TreeFilename, stop_asap); } if (youpi > 0 && test_verbose (OUTPUT_NORMAL)) pp1_check_factor (p, f); /* tell user if factor was found by P-1 */ clear_and_exit: mpres_clear (a, modulus); mpmod_clear (modulus); if (stage2_variant != 0) mpz_clear (faststage2_params.m_1); else mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; }