int check_input(uint64 highlimit, uint64 lowlimit, uint32 num_sp, uint32 *sieve_p, soe_staticdata_t *sdata, mpz_t offset) { int i; sdata->orig_hlimit = highlimit; sdata->orig_llimit = lowlimit; //the wrapper should handle this, but just in case we are called //directly and not via the wrapper... if (highlimit - lowlimit < 1000000) highlimit = lowlimit + 1000000; if ((highlimit - lowlimit) > 1000000000000ULL) { printf("range too big\n"); return 1; } if (highlimit > 4000000000000000000ULL) { printf("input too high\n"); return 1; } //set sieve primes in the local data structure to the ones that were passed in sdata->sieve_p = sieve_p; if (offset == NULL) { //see if we were provided enough primes to do the job sdata->pbound = (uint64)(sqrt((int64)(highlimit))); if (sieve_p[num_sp - 1] < sdata->pbound) { printf("not enough sieving primes\n"); exit(1); } //find the highest index that we'll need. Much of the rest of the code is //sensitive to this. Note that this could be slow for large numbers of //sieve primes... could replace with a binary search. for (i=0; i<num_sp; i++) { // stop when we have enough for this input if (sieve_p[i] > sdata->pbound) break; } sdata->pboundi = i; sdata->offset = NULL; sdata->sieve_range = 0; } else { // for ranges with offsets, don't worry if we don't have enough // primes, but still check to see if we have too many. mpz_t tmpz; mpz_init(tmpz); mpz_add_ui(tmpz, offset, highlimit); mpz_sqrt(tmpz, tmpz); if (mpz_cmp_ui(tmpz, sieve_p[num_sp - 1]) < 0) { // then we were passed too many. truncate the input list. sdata->pbound = mpz_get_64(tmpz); for (i=0; i<num_sp; i++) { // stop when we have enough for this input if (sieve_p[i] > sdata->pbound) break; } sdata->pboundi = i; } else { // use all of 'em. sdata->pbound = sieve_p[num_sp - 1]; sdata->pboundi = num_sp; } sdata->offset = offset; mpz_clear(tmpz); sdata->sieve_range = 1; } return 0; }
/* Allocates the rel_t object, and adds it to the list in the polygroup if it factored or was a partial. * It determines the factors by trial division, and it also adds the factors to the linked list in the * rel_t. If it didn't factor and wasn't a partial, the relation is freed. */ void construct_relation (mpz_t qx, int32_t x, poly_t *p, nsieve_t *ns){ ns->tdiv_ct ++; rel_t *rel = (rel_t *)(malloc(sizeof(rel_t))); if (rel == NULL){ printf ("Malloc failed\n"); exit(1); } rel->poly = p; rel->x = x; rel->cofactor = 1; rel->factors = NULL; if (mpz_cmp_ui (qx, 0) < 0){ fl_add (rel, 0); } mpz_abs(qx, qx); uint64_t q = 0; int i; while (mpz_divisible_ui_p(qx, 2)){ // handle 2 separately mpz_divexact_ui(qx, qx, 2); fl_add (rel, 1); } for (i=1; i < ns->fb_len; i++){ // instead of doing a multi-precision divisiblilty test, we can use the get_offset method to // detect if 'x' is in the arithmetic progression of sieve values divisible by ns->fb[i]. if (get_offset (ns->fb[i], i, x, 0, p, p->group, ns) == 0 || get_offset (ns->fb[i], i, x, 1, p, p->group, ns) == 0){ mpz_divexact_ui(qx, qx, ns->fb[i]); fl_add (rel, i+1); // add to the factor list // If the result of the division fits in 64 bits, ditch the arbitrary precision. if (mpz_fits_64 (qx)) goto fixedprec_tdiv; while (mpz_divisible_ui_p (qx, ns->fb[i])){ // the sieve doesn't tell us mpz_divexact_ui(qx, qx, ns->fb[i]); // how many times the factor divided fl_add (rel, i+1); if (mpz_fits_64 (qx)){ goto fixedprec_tdiv; } } } } fixedprec_tdiv: q = mpz_get_64 (qx); if (q < ns->fb[i] * ns->fb[i]){ // q must be prime if (q < ns->fb_bound){ // if it's less than the factor base bound, it had better be in the FB. fl_add (rel, fb_lookup (q, ns)); // look it up and add it to the list. goto add_rel; } if (q < ns->lp_bound) { // in this case we have a partial relation. rel->cofactor = q; goto add_rel; } } while (i < ns->fb_len){ // continue the trial division while (q % ns->fb[i] == 0){ // it is no longer efficient to compute offsets here (that q /= ns->fb[i]; // calculation involved mods!) fl_add (rel, i+1); if (q < ns->fb[i] * ns->fb[i]){ if (q < ns->fb_bound){ fl_add (rel, fb_lookup (q, ns)); goto add_rel; } if (q < ns->lp_bound) { rel->cofactor = q; goto add_rel; } } } i++; } // if we're here, we weren't able to do anything with this relation. // rel_free (rel); return; add_rel: // add the relation to the list in the poly_group_t we're working with. if (p->group->nrels < PG_REL_STORAGE){ p->group->relns[ p->group->nrels ] = rel; p->group->nrels ++; return; } else { // if we ran out of space, just let it go. // rel_free(rel); return; } }
void trial_divide_Q_siqs(uint32 report_num, uint8 parity, uint32 poly_id, uint32 bnum, static_conf_t *sconf, dynamic_conf_t *dconf) { //we have flagged this sieve offset as likely to produce a relation //nothing left to do now but check and see. uint64 q64, f64; int j,it; uint32 prime; int smooth_num; uint32 *fb_offsets; uint32 polya_factors[20]; sieve_fb *fb; uint32 offset, block_loc; fb_offsets = &dconf->fb_offsets[report_num][0]; smooth_num = dconf->smooth_num[report_num]; block_loc = dconf->reports[report_num]; #ifdef QS_TIMING gettimeofday(&qs_timing_start, NULL); #endif offset = (bnum << sconf->qs_blockbits) + block_loc; if (parity) fb = dconf->fb_sieve_n; else fb = dconf->fb_sieve_p; #ifdef USE_YAFU_TDIV z32_to_mpz(&dconf->Qvals32[report_num], dconf->Qvals[report_num]); #endif //check for additional factors of the a-poly factors //make a separate list then merge it with fb_offsets it=0; //max 20 factors allocated for - should be overkill for (j = 0; (j < dconf->curr_poly->s) && (it < 20); j++) { //fbptr = fb + dconf->curr_poly->qlisort[j]; //prime = fbptr->prime; prime = fb[dconf->curr_poly->qlisort[j]].prime; while ((mpz_tdiv_ui(dconf->Qvals[report_num],prime) == 0) && (it < 20)) { mpz_tdiv_q_ui(dconf->Qvals[report_num], dconf->Qvals[report_num], prime); polya_factors[it++] = dconf->curr_poly->qlisort[j]; } } //check if it completely factored by looking at the unfactored portion in tmp //if ((mpz_size(dconf->Qvals[report_num]) == 1) && //(mpz_get_64(dconf->Qvals[report_num]) < (uint64)sconf->large_prime_max)) if ((mpz_size(dconf->Qvals[report_num]) == 1) && (mpz_cmp_ui(dconf->Qvals[report_num], sconf->large_prime_max) < 0)) { uint32 large_prime[2]; large_prime[0] = (uint32)mpz_get_ui(dconf->Qvals[report_num]); //Q->val[0]; large_prime[1] = 1; //add this one if (sconf->is_tiny) { // we need to encode both the a_poly and b_poly index // in poly_id poly_id |= (sconf->total_poly_a << 16); buffer_relation(offset,large_prime,smooth_num+1, fb_offsets,poly_id,parity,dconf,polya_factors,it); } else buffer_relation(offset,large_prime,smooth_num+1, fb_offsets,poly_id,parity,dconf,polya_factors,it); #ifdef QS_TIMING gettimeofday (&qs_timing_stop, NULL); qs_timing_diff = my_difftime (&qs_timing_start, &qs_timing_stop); TF_STG6 += ((double)qs_timing_diff->secs + (double)qs_timing_diff->usecs / 1000000); free(qs_timing_diff); #endif return; } if (sconf->use_dlp == 0) return; //quick check if Q is way too big for DLP (more than 64 bits) if (mpz_sizeinbase(dconf->Qvals[report_num], 2) >= 64) return; q64 = mpz_get_64(dconf->Qvals[report_num]); if ((q64 > sconf->max_fb2) && (q64 < sconf->large_prime_max2)) { //quick prime check: compute 2^(residue-1) mod residue. uint64 res; //printf("%llu\n",q64); #if BITS_PER_DIGIT == 32 mpz_set_64(dconf->gmptmp1, q64); mpz_set_64(dconf->gmptmp2, 2); mpz_set_64(dconf->gmptmp3, q64-1); mpz_powm(dconf->gmptmp1, dconf->gmptmp2, dconf->gmptmp3, dconf->gmptmp1); res = mpz_get_64(dconf->gmptmp1); #else spModExp(2, q64 - 1, q64, &res); #endif //if equal to 1, assume it is prime. this may be wrong sometimes, but we don't care. //more important to quickly weed out probable primes than to spend more time to be //more sure. if (res == 1) { #ifdef QS_TIMING gettimeofday (&qs_timing_stop, NULL); qs_timing_diff = my_difftime (&qs_timing_start, &qs_timing_stop); TF_STG6 += ((double)qs_timing_diff->secs + (double)qs_timing_diff->usecs / 1000000); free(qs_timing_diff); #endif dconf->dlp_prp++; return; } //try to find a double large prime #ifdef HAVE_CUDA { uint32 large_prime[2] = {1,1}; // remember the residue and the relation it is associated with dconf->buf_id[dconf->num_squfof_cand] = dconf->buffered_rels; dconf->squfof_candidates[dconf->num_squfof_cand++] = q64; // buffer the relation buffer_relation(offset,large_prime,smooth_num+1, fb_offsets,poly_id,parity,dconf,polya_factors,it); } #else dconf->attempted_squfof++; mpz_set_64(dconf->gmptmp1, q64); f64 = sp_shanks_loop(dconf->gmptmp1, sconf->obj); if (f64 > 1 && f64 != q64) { uint32 large_prime[2]; large_prime[0] = (uint32)f64; large_prime[1] = (uint32)(q64 / f64); if (large_prime[0] < sconf->large_prime_max && large_prime[1] < sconf->large_prime_max) { //add this one dconf->dlp_useful++; buffer_relation(offset,large_prime,smooth_num+1, fb_offsets,poly_id,parity,dconf,polya_factors,it); } } else { dconf->failed_squfof++; //printf("squfof failure: %" PRIu64 "\n", q64); } #endif } else dconf->dlp_outside_range++; #ifdef QS_TIMING gettimeofday (&qs_timing_stop, NULL); qs_timing_diff = my_difftime (&qs_timing_start, &qs_timing_stop); TF_STG6 += ((double)qs_timing_diff->secs + (double)qs_timing_diff->usecs / 1000000); free(qs_timing_diff); #endif return; }
uint64 *sieve_to_depth(uint32 *seed_p, uint32 num_sp, mpz_t lowlimit, mpz_t highlimit, int count, int num_witnesses, uint64 *num_p) { //public interface to a routine which will sieve a range of integers //with the supplied primes and either count or compute the values //that survive. Basically, it is just the sieve, but with no //guareentees that what survives the sieving is prime. The idea is to //remove cheap composites. uint64 retval, i, range, tmpl, tmph; uint64 *values = NULL; mpz_t tmpz; mpz_t *offset; if (mpz_cmp(highlimit, lowlimit) <= 0) { printf("error: lowlimit must be less than highlimit\n"); *num_p = 0; return values; } offset = (mpz_t *)malloc(sizeof(mpz_t)); mpz_init(tmpz); mpz_init(*offset); mpz_set(*offset, lowlimit); mpz_sub(tmpz, highlimit, lowlimit); range = mpz_get_64(tmpz); if (count) { //this needs to be a range of at least 1e6 if (range < 1000000) { //go and get a new range. tmpl = 0; tmph = 1000000; //since this is a small range, we need to //find a bigger range and count them. values = GetPRIMESRange(seed_p, num_sp, offset, tmpl, tmph, &retval); *num_p = 0; //count how many are in the original range of interest for (i = 0; i < retval; i++) { mpz_add_ui(tmpz, *offset, values[i]); if ((mpz_cmp(tmpz, lowlimit) >= 0) && (mpz_cmp(highlimit, tmpz) >= 0)) (*num_p)++; } free(values); values = NULL; } else { //check for really big ranges uint64 maxrange = 100000000000ULL; if (range > maxrange) { uint32 num_ranges = (uint32)(range / maxrange); uint64 remainder = range % maxrange; uint32 j; *num_p = 0; tmpl = 0; tmph = tmpl + maxrange; for (j = 0; j < num_ranges; j++) { *num_p += spSOE(seed_p, num_sp, offset, tmpl, &tmph, 1, NULL); if (VFLAG > 1) printf("so far, found %" PRIu64 " primes\n",*num_p); tmpl += maxrange; tmph = tmpl + maxrange; } if (remainder > 0) { tmph = tmpl + remainder; *num_p += spSOE(seed_p, num_sp, offset, tmpl, &tmph, 1, NULL); } if (VFLAG > 1) printf("so far, found %" PRIu64 " primes\n",*num_p); } else { //we're in a sweet spot already, just get the requested range *num_p = spSOE(seed_p, num_sp, offset, 0, &range, 1, NULL); } } } else { //this needs to be a range of at least 1e6 if (range < 1000000) { //there is slack built into the sieve limit, so go ahead and increase //the size of the interval to make it at least 1e6. tmpl = 0; tmph = tmpl + 1000000; //since this is a small range, we need to //find a bigger range and count them. values = GetPRIMESRange(seed_p, num_sp, offset, tmpl, tmph, &retval); *num_p = 0; for (i = 0; i < retval; i++) { mpz_add_ui(tmpz, *offset, values[i]); if ((mpz_cmp(tmpz, lowlimit) >= 0) && (mpz_cmp(highlimit, tmpz) >= 0)) (*num_p)++; } } else { //we don't need to mess with the requested range, //so GetPRIMESRange will return the requested range directly //and the count will be in NUM_P values = GetPRIMESRange(seed_p, num_sp, offset, 0, range, num_p); } if (num_witnesses > 0) { int pchar = 0; thread_soedata_t *thread_data; //an array of thread data objects uint32 lastid; int j; //allocate thread data structure thread_data = (thread_soedata_t *)malloc(THREADS * sizeof(thread_soedata_t)); // conduct PRP tests on all surviving values if (VFLAG > 0) printf("starting PRP tests with %d witnesses on %" PRIu64 " surviving candidates\n", num_witnesses, *num_p); // start the threads for (i = 0; i < THREADS - 1; i++) start_soe_worker_thread(thread_data + i, 0); start_soe_worker_thread(thread_data + i, 1); range = *num_p / THREADS; lastid = 0; // divvy up the range for (j = 0; j < THREADS; j++) { thread_soedata_t *t = thread_data + j; t->startid = lastid; t->stopid = t->startid + range; lastid = t->stopid; if (VFLAG > 2) printf("thread %d computing PRPs from %u to %u\n", (int)i, t->startid, t->stopid); } // the last one gets any leftover if (thread_data[THREADS-1].stopid != (uint32)*num_p) thread_data[THREADS-1].stopid = (uint32)*num_p; // allocate space for stuff in the threads if (THREADS == 1) { thread_data[0].ddata.primes = values; } else { for (j = 0; j < THREADS; j++) { thread_soedata_t *t = thread_data + j; mpz_init(t->tmpz); mpz_init(t->offset); mpz_init(t->lowlimit); mpz_init(t->highlimit); mpz_set(t->offset, *offset); mpz_set(t->lowlimit, lowlimit); mpz_set(t->highlimit, highlimit); t->current_line = (uint64)num_witnesses; t->ddata.primes = (uint64 *)malloc((t->stopid - t->startid) * sizeof(uint64)); for (i = t->startid; i < t->stopid; i++) t->ddata.primes[i - t->startid] = values[i]; } } // now run with the threads for (j = 0; j < THREADS; j++) { thread_soedata_t *t = thread_data + j; if (j == (THREADS - 1)) { t->linecount = 0; for (i = t->startid; i < t->stopid; i++) { if (((i & 128) == 0) && (VFLAG > 0)) { int k; for (k = 0; k<pchar; k++) printf("\b"); pchar = printf("progress: %d%%",(int)((double)i / (double)(*num_p) * 100.0)); fflush(stdout); } mpz_add_ui(tmpz, *offset, t->ddata.primes[i - t->startid]); if ((mpz_cmp(tmpz, lowlimit) >= 0) && (mpz_cmp(highlimit, tmpz) >= 0)) { if (mpz_probab_prime_p(tmpz, num_witnesses)) t->ddata.primes[t->linecount++] = t->ddata.primes[i - t->startid]; } } } else { t->command = SOE_COMPUTE_PRPS; #if defined(WIN32) || defined(_WIN64) SetEvent(t->run_event); #else pthread_cond_signal(&t->run_cond); pthread_mutex_unlock(&t->run_lock); #endif } } //wait for each thread to finish for (i = 0; i < THREADS; i++) { thread_soedata_t *t = thread_data + i; if (i < (THREADS - 1)) { #if defined(WIN32) || defined(_WIN64) WaitForSingleObject(t->finish_event, INFINITE); #else pthread_mutex_lock(&t->run_lock); while (t->command != SOE_COMMAND_WAIT) pthread_cond_wait(&t->run_cond, &t->run_lock); #endif } } //stop the worker threads for (i=0; i<THREADS - 1; i++) stop_soe_worker_thread(thread_data + i, 0); // combine results and free stuff if (THREADS == 1) { retval = thread_data[0].linecount; } else { retval = 0; for (i=0; i<THREADS; i++) { thread_soedata_t *t = thread_data + i; for (j=0; j < t->linecount; j++) values[retval++] = t->ddata.primes[j]; free(t->ddata.primes); mpz_clear(t->tmpz); mpz_clear(t->offset); mpz_clear(t->lowlimit); mpz_clear(t->highlimit); } } free(thread_data); if (VFLAG > 0) { int k; for (k = 0; k<pchar; k++) printf("\b"); } *num_p = retval; if (VFLAG > 0) printf("found %" PRIu64 " PRPs\n", *num_p); } // now dump the requested range of primes to a file, or the // screen, both, or neither, depending on the state of a couple // global configuration variables if (PRIMES_TO_FILE) { FILE *out; if (num_witnesses > 0) out = fopen("prp_values.dat", "w"); else out = fopen("sieved_values.dat","w"); if (out == NULL) { printf("fopen error: %s\n", strerror(errno)); printf("can't open file for writing\n"); } else { for (i = 0; i < *num_p; i++) { mpz_add_ui(tmpz, *offset, values[i]); if ((mpz_cmp(tmpz, lowlimit) >= 0) && (mpz_cmp(highlimit, tmpz) >= 0)) gmp_fprintf(out,"%Zd\n",tmpz); } fclose(out); } } if (PRIMES_TO_SCREEN) { for (i = 0; i < *num_p; i++) { mpz_add_ui(tmpz, *offset, values[i]); if ((mpz_cmp(tmpz, lowlimit) >= 0) && (mpz_cmp(highlimit, tmpz) >= 0)) gmp_printf("%Zd\n",tmpz); } printf("\n"); } } mpz_clear(tmpz); mpz_clear(*offset); free(offset); return values; }