DWORD WINAPI soe_worker_thread_main(LPVOID thread_data) { #else void *soe_worker_thread_main(void *thread_data) { #endif thread_soedata_t *t = (thread_soedata_t *)thread_data; while(1) { uint32 i; /* wait forever for work to do */ #if defined(WIN32) || defined(_WIN64) WaitForSingleObject(t->run_event, INFINITE); #else pthread_mutex_lock(&t->run_lock); while (t->command == SOE_COMMAND_WAIT) { pthread_cond_wait(&t->run_cond, &t->run_lock); } #endif /* do work */ if (t->command == SOE_COMMAND_SIEVE_AND_COUNT) { t->sdata.lines[t->current_line] = (uint8 *)malloc(t->sdata.numlinebytes * sizeof(uint8)); sieve_line(t); t->linecount = count_line(&t->sdata, t->current_line); free(t->sdata.lines[t->current_line]); } else if (t->command == SOE_COMMAND_SIEVE_AND_COMPUTE) { sieve_line(t); } else if (t->command == SOE_COMPUTE_ROOTS) { if (VFLAG > 2) printf("starting root computation over %u to %u\n", t->startid, t->stopid); if (t->sdata.sieve_range == 0) { for (i = t->startid; i < t->stopid; i++) { uint32 inv; uint32 prime = t->sdata.sieve_p[i]; inv = modinv_1(t->sdata.prodN, prime); t->sdata.root[i] = prime - inv; t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = (t->sdata.lowlimit + 1) % prime; } } else { mpz_t tmpz; //mpz_t t1, t2; mpz_init(tmpz); //experiment for custom ranges that can be expressed as base^exp + range //mpz_init(t1); //mpz_init(t2); mpz_add_ui(tmpz, *t->sdata.offset, t->sdata.lowlimit + 1); for (i = t->startid; i < t->stopid; i++) { uint32 inv; uint32 prime = t->sdata.sieve_p[i]; inv = modinv_1(t->sdata.prodN, prime); t->sdata.root[i] = prime - inv; t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = mpz_tdiv_ui(tmpz, prime); //mpz_set_ui(t2,prime); //mpz_set_ui(t1, 10); //mpz_powm_ui(t1, t1, 999999, t2); //t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = mpz_get_ui(t1); } //mpz_clear(t1); //mpz_clear(t2); } } else if (t->command == SOE_COMPUTE_PRIMES) { t->linecount = 0; for (i = t->startid; i < t->stopid; i+=8) { t->linecount = compute_8_bytes(&t->sdata, t->linecount, t->ddata.primes, i, NULL); } } else if (t->command == SOE_COMPUTE_PRPS) { t->linecount = 0; for (i = t->startid; i < t->stopid; i++) { mpz_add_ui(t->tmpz, t->offset, t->ddata.primes[i - t->startid]); if ((mpz_cmp(t->tmpz, t->lowlimit) >= 0) && (mpz_cmp(t->highlimit, t->tmpz) >= 0)) { if (mpz_probab_prime_p(t->tmpz, t->current_line)) t->ddata.primes[t->linecount++] = t->ddata.primes[i - t->startid]; } } } else if (t->command == SOE_COMMAND_END) break; /* signal completion */ t->command = SOE_COMMAND_WAIT; #if defined(WIN32) || defined(_WIN64) SetEvent(t->finish_event); #else pthread_cond_signal(&t->run_cond); pthread_mutex_unlock(&t->run_lock); #endif } #if defined(WIN32) || defined(_WIN64) return 0; #else return NULL; #endif }
void firstRoots(static_conf_t *sconf, dynamic_conf_t *dconf) { //the roots are computed using a and b as follows: //(+/-t - b)(a)^-1 mod p //where the t values are the roots to t^2 = N mod p, found by shanks_tonelli //when constructing the factor base. //assume b > t //unpack stuff from the job data structures siqs_poly *poly = dconf->curr_poly; fb_list *fb = sconf->factor_base; uint32 start_prime = 2; int *rootupdates = dconf->rootupdates; update_t update_data = dconf->update_data; sieve_fb_compressed *fb_p = dconf->comp_sieve_p; sieve_fb_compressed *fb_n = dconf->comp_sieve_n; lp_bucket *lp_bucket_p = dconf->buckets; uint32 *modsqrt = sconf->modsqrt_array; //locals uint32 i, interval; uint8 logp; int root1, root2, prime, amodp, bmodp, inv, x, bnum,j,numblocks; int s = poly->s; int bound_index = 0, k; uint32 bound_val = fb->med_B; uint32 *bptr, *sliceptr_p, *sliceptr_n; uint32 *numptr_p, *numptr_n; int check_bound = BUCKET_ALLOC/2 - 1, room; numblocks = sconf->num_blocks; interval = numblocks << BLOCKBITS; if (lp_bucket_p->list != NULL) { lp_bucket_p->fb_bounds[0] = fb->med_B; sliceptr_p = lp_bucket_p->list; sliceptr_n = lp_bucket_p->list + (numblocks << BUCKET_BITS); numptr_p = lp_bucket_p->num; numptr_n = lp_bucket_p->num + numblocks; //reset lp_buckets for (i=0;i< (2*numblocks*lp_bucket_p->alloc_slices) ;i++) numptr_p[i] = 0; lp_bucket_p->num_slices = 0; } else { sliceptr_p = NULL; sliceptr_n = NULL; numptr_p = NULL; numptr_n = NULL; } for (i=start_prime;i<sconf->sieve_small_fb_start;i++) { uint64 q64, tmp, t2; prime = fb->tinylist->prime[i]; root1 = modsqrt[i]; root2 = prime - root1; amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime); bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime); //find a^-1 mod p = inv(a mod p) mod p inv = modinv_1(amodp,prime); COMPUTE_FIRST_ROOTS // reuse integer inverse of prime that we've calculated for use // in trial division stage // inv * root1 % prime t2 = (uint64)inv * (uint64)root1; tmp = t2 + (uint64)fb->tinylist->correction[i]; q64 = tmp * (uint64)fb->tinylist->small_inv[i]; tmp = q64 >> 32; root1 = t2 - tmp * prime; // inv * root2 % prime t2 = (uint64)inv * (uint64)root2; tmp = t2 + (uint64)fb->tinylist->correction[i]; q64 = tmp * (uint64)fb->tinylist->small_inv[i]; tmp = q64 >> 32; root2 = t2 - tmp * prime; //we don't sieve these primes, so ordering doesn't matter update_data.firstroots1[i] = root1; update_data.firstroots2[i] = root2; fb_p->root1[i] = (uint16)root1; fb_p->root2[i] = (uint16)root2; fb_n->root1[i] = (uint16)(prime - root2); fb_n->root2[i] = (uint16)(prime - root1); //if we were sieving, this would double count the location on the //positive side. but since we're not, its easier to check for inclusion //on the progression if we reset the negative root to zero if it is == prime if (fb_n->root1[i] == prime) fb_n->root1[i] = 0; if (fb_n->root2[i] == prime) fb_n->root2[i] = 0; //for this factor base prime, compute the rootupdate value for all s //Bl values. amodp holds a^-1 mod p //the rootupdate value is given by 2*Bj*amodp //Bl[j] now holds 2*Bl for (j=0;j<s;j++) { x = (int)mpz_tdiv_ui(dconf->Bl[j],prime); // x * inv % prime t2 = (uint64)inv * (uint64)x; tmp = t2 + (uint64)fb->tinylist->correction[i]; q64 = tmp * (uint64)fb->tinylist->small_inv[i]; tmp = q64 >> 32; x = t2 - tmp * prime; rootupdates[(j)*fb->B+i] = x; } } for (i=sconf->sieve_small_fb_start;i<fb->fb_15bit_B;i++) { uint64 small_inv, correction; uint64 q64, tmp, t2; prime = fb->list->prime[i]; root1 = modsqrt[i]; root2 = prime - root1; // compute integer inverse of prime for use in mod operations in this // function. small_inv = ((uint64)1 << 48) / (uint64)prime; if (floor((double)((uint64)1 << 48) / (double)prime + 0.5) == (double)small_inv) { correction = 1; } else { correction = 0; small_inv++; } amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime); bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime); //find a^-1 mod p = inv(a mod p) mod p inv = modinv_1(amodp,prime); COMPUTE_FIRST_ROOTS // inv * root1 % prime t2 = (uint64)inv * (uint64)root1; tmp = t2 + correction; q64 = tmp * small_inv; tmp = q64 >> 48; root1 = t2 - tmp * prime; // inv * root2 % prime t2 = (uint64)inv * (uint64)root2; tmp = t2 + correction; q64 = tmp * small_inv; tmp = q64 >> 48; root2 = t2 - tmp * prime; if (root2 < root1) { update_data.sm_firstroots1[i] = (uint16)root2; update_data.sm_firstroots2[i] = (uint16)root1; fb_p->root1[i] = (uint16)root2; fb_p->root2[i] = (uint16)root1; fb_n->root1[i] = (uint16)(prime - root1); fb_n->root2[i] = (uint16)(prime - root2); } else { update_data.sm_firstroots1[i] = (uint16)root1; update_data.sm_firstroots2[i] = (uint16)root2; fb_p->root1[i] = (uint16)root1; fb_p->root2[i] = (uint16)root2; fb_n->root1[i] = (uint16)(prime - root2); fb_n->root2[i] = (uint16)(prime - root1); } //for this factor base prime, compute the rootupdate value for all s //Bl values. amodp holds a^-1 mod p //the rootupdate value is given by 2*Bj*amodp //Bl[j] now holds 2*Bl for (j=0;j<s;j++) { x = (int)mpz_tdiv_ui(dconf->Bl[j],prime); // x * inv % prime t2 = (uint64)inv * (uint64)x; tmp = t2 + correction; q64 = tmp * small_inv; tmp = q64 >> 48; x = t2 - tmp * prime; rootupdates[(j)*fb->B+i] = x; dconf->sm_rootupdates[(j)*fb->B+i] = (uint16)x; } } //printf("prime[15bit-1] = %u\n", fb_p->prime[fb->fb_15bit_B-1]); for (i=fb->fb_15bit_B;i<fb->med_B;i++) { uint64 small_inv, correction; uint64 q64, tmp, t2; prime = fb->list->prime[i]; root1 = modsqrt[i]; root2 = prime - root1; // compute integer inverse of prime for use in mod operations in this // function. small_inv = ((uint64)1 << 48) / (uint64)prime; if (floor((double)((uint64)1 << 48) / (double)prime + 0.5) == (double)small_inv) { correction = 1; } else { correction = 0; small_inv++; } amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime); bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime); //find a^-1 mod p = inv(a mod p) mod p inv = modinv_1(amodp,prime); COMPUTE_FIRST_ROOTS // inv * root1 % prime t2 = (uint64)inv * (uint64)root1; tmp = t2 + correction; q64 = tmp * small_inv; tmp = q64 >> 48; root1 = t2 - tmp * prime; // inv * root2 % prime t2 = (uint64)inv * (uint64)root2; tmp = t2 + correction; q64 = tmp * small_inv; tmp = q64 >> 48; root2 = t2 - tmp * prime; if (root2 < root1) { update_data.firstroots1[i] = root2; update_data.firstroots2[i] = root1; fb_p->root1[i] = (uint16)root2; fb_p->root2[i] = (uint16)root1; fb_n->root1[i] = (uint16)(prime - root1); fb_n->root2[i] = (uint16)(prime - root2); } else { update_data.firstroots1[i] = root1; update_data.firstroots2[i] = root2; fb_p->root1[i] = (uint16)root1; fb_p->root2[i] = (uint16)root2; fb_n->root1[i] = (uint16)(prime - root2); fb_n->root2[i] = (uint16)(prime - root1); } //for this factor base prime, compute the rootupdate value for all s //Bl values. amodp holds a^-1 mod p //the rootupdate value is given by 2*Bj*amodp //Bl[j] now holds 2*Bl for (j=0;j<s;j++) { x = (int)mpz_tdiv_ui(dconf->Bl[j],prime); // x * inv % prime t2 = (uint64)inv * (uint64)x; tmp = t2 + correction; q64 = tmp * small_inv; tmp = q64 >> 48; x = t2 - tmp * prime; rootupdates[(j)*fb->B+i] = x; } } check_bound = fb->med_B + BUCKET_ALLOC/2; logp = fb->list->logprime[fb->med_B-1]; for (i=fb->med_B;i<fb->large_B;i++) { //uint64 small_inv, correction; //uint64 q64, tmp, t2; CHECK_NEW_SLICE(i); prime = fb->list->prime[i]; root1 = modsqrt[i]; root2 = prime - root1; amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime); bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime); //find a^-1 mod p = inv(a mod p) mod p inv = modinv_1(amodp,prime); COMPUTE_FIRST_ROOTS root1 = (uint32)((uint64)inv * (uint64)root1 % (uint64)prime); root2 = (uint32)((uint64)inv * (uint64)root2 % (uint64)prime); update_data.firstroots1[i] = root1; update_data.firstroots2[i] = root2; FILL_ONE_PRIME_LOOP_P(i); root1 = (prime - update_data.firstroots1[i]); root2 = (prime - update_data.firstroots2[i]); FILL_ONE_PRIME_LOOP_N(i); //for this factor base prime, compute the rootupdate value for all s //Bl values. amodp holds a^-1 mod p //the rootupdate value is given by 2*Bj*amodp //Bl[j] now holds 2*Bl for (j=0;j<s;j++) { x = (int)mpz_tdiv_ui(dconf->Bl[j], prime); x = (int)((int64)x * (int64)inv % (int64)prime); rootupdates[(j)*fb->B+i] = x; } } logp = fb->list->logprime[fb->large_B-1]; for (i=fb->large_B;i<fb->B;i++) { CHECK_NEW_SLICE(i); prime = fb->list->prime[i]; root1 = modsqrt[i]; root2 = prime - root1; amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime); bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime); //find a^-1 mod p = inv(a mod p) mod p inv = modinv_1(amodp,prime); COMPUTE_FIRST_ROOTS root1 = (uint32)((uint64)inv * (uint64)root1 % (uint64)prime); root2 = (uint32)((uint64)inv * (uint64)root2 % (uint64)prime); update_data.firstroots1[i] = root1; update_data.firstroots2[i] = root2; FILL_ONE_PRIME_P(i); root1 = (prime - root1); root2 = (prime - root2); FILL_ONE_PRIME_N(i); //for this factor base prime, compute the rootupdate value for all s //Bl values. amodp holds a^-1 mod p //the rootupdate value is given by 2*Bj*amodp //Bl[j] now holds 2*Bl //s is the number of primes in 'a' for (j=0;j<s;j++) { x = (int)mpz_tdiv_ui(dconf->Bl[j], prime); x = (int)((int64)x * (int64)inv % (int64)prime); rootupdates[(j)*fb->B+i] = x; } } if (lp_bucket_p->list != NULL) lp_bucket_p->num_slices = bound_index + 1; return; }
void getRoots(soe_staticdata_t *sdata, thread_soedata_t *thread_data) { int prime, prodN; uint64 startprime; uint64 i; int j; uint32 range, lastid; //timing double t; struct timeval tstart, tstop; TIME_DIFF * difference; prodN = (int)sdata->prodN; startprime = sdata->startprime; gettimeofday(&tstart, NULL); for (i=startprime; i<sdata->bucket_start_id; i++) { uint32 inv; prime = sdata->sieve_p[i]; //sieving requires that we find the offset of each sieve prime in each block //that we sieve. We are more restricted in choice of offset because we //sieve residue classes. A good way to find the offset is the extended //euclidean algorithm, which reads ax + by = gcd(a,b), //where a = prime, b = prodN, and therefore gcd = 1. //since a and b are coprime, y is the multiplicative inverse of prodN modulo prime. //This value is a constant, so compute it here in order to facilitate //finding offsets later. //solve prodN ^ -1 % p inv = modinv_1(prodN,prime); sdata->root[i] = prime - inv; } gettimeofday(&tstop, NULL); difference = my_difftime(&tstart, &tstop); t = ((double)difference->secs + (double)difference->usecs / 1000000); free(difference); if (VFLAG > 2) printf("time to compute linear sieve roots = %1.2f\n", t); gettimeofday(&tstart, NULL); // start the threads for (i = 0; i < THREADS - 1; i++) start_soe_worker_thread(thread_data + i, 0); start_soe_worker_thread(thread_data + i, 1); range = (sdata->pboundi - sdata->bucket_start_id) / THREADS; lastid = sdata->bucket_start_id; // divvy up the primes for (j = 0; j < THREADS; j++) { thread_soedata_t *t = thread_data + j; t->sdata = *sdata; t->startid = lastid; t->stopid = t->startid + range; lastid = t->stopid; } // the last one gets any leftover if (thread_data[THREADS-1].stopid != sdata->pboundi) thread_data[THREADS-1].stopid = sdata->pboundi; // now run with the threads for (j = 0; j < THREADS; j++) { thread_soedata_t *t = thread_data + j; if (j == (THREADS - 1)) { if (VFLAG > 2) printf("starting root computation over %u to %u\n", t->startid, t->stopid); // run in the current thread // bucket sieved primes need more data if (sdata->sieve_range == 0) { for (i = t->startid; i < t->stopid; i++) { uint32 inv; prime = t->sdata.sieve_p[i]; //sieving requires that we find the offset of each sieve prime in each block //that we sieve. We are more restricted in choice of offset because we //sieve residue classes. A good way to find the offset is the extended //euclidean algorithm, which reads ax + by = gcd(a,b), //where a = prime, b = prodN, and therefore gcd = 1. //since a and b are coprime, y is the multiplicative inverse of prodN modulo prime. //This value is a constant, so compute it here in order to facilitate //finding offsets later. //solve prodN ^ -1 % p inv = modinv_1(prodN, prime); t->sdata.root[i] = prime - inv; //we can also speed things up by computing and storing the residue //mod p of the first sieve location in the first residue class. This provides //a speedup by pulling this constant (involving a division) out of a critical loop //when finding offsets of bucket sieved primes. //these are only used by bucket sieved primes. t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = (t->sdata.lowlimit + 1) % prime; } } else { mpz_t tmpz; //mpz_t t1, t2; mpz_init(tmpz); //uint64 res; //experiment for custom ranges that can be expressed as base^exp + range //mpz_init(t1); //mpz_init(t2); mpz_add_ui(tmpz, *t->sdata.offset, t->sdata.lowlimit + 1); for (i = t->startid; i < t->stopid; i++) { uint32 inv; prime = t->sdata.sieve_p[i]; //sieving requires that we find the offset of each sieve prime in each block //that we sieve. We are more restricted in choice of offset because we //sieve residue classes. A good way to find the offset is the extended //euclidean algorithm, which reads ax + by = gcd(a,b), //where a = prime, b = prodN, and therefore gcd = 1. //since a and b are coprime, y is the multiplicative inverse of prodN modulo prime. //This value is a constant, so compute it here in order to facilitate //finding offsets later. //solve prodN ^ -1 % p inv = modinv_1(prodN,prime); t->sdata.root[i] = prime - inv; //we can also speed things up by computing and storing the residue //mod p of the first sieve location in the first residue class. This provides //a speedup by pulling this constant (involving a division) out of a critical loop //when finding offsets of bucket sieved primes. //these are only used by bucket sieved primes. t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = mpz_tdiv_ui(tmpz, prime); //mpz_set_ui(t2,prime); //mpz_set_ui(t1, 1000000000); //mpz_powm_ui(t1, t1, 111111, t2); //res = mpz_get_64(t1); //t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = (uint32)res; } //mpz_clear(t1); //mpz_clear(t2); } } else { t->command = SOE_COMPUTE_ROOTS; #if defined(WIN32) || defined(_WIN64) SetEvent(t->run_event); #else pthread_cond_signal(&t->run_cond); pthread_mutex_unlock(&t->run_lock); #endif } } //wait for each thread to finish for (i = 0; i < THREADS; i++) { thread_soedata_t *t = thread_data + i; if (i < (THREADS - 1)) { #if defined(WIN32) || defined(_WIN64) WaitForSingleObject(t->finish_event, INFINITE); #else pthread_mutex_lock(&t->run_lock); while (t->command != SOE_COMMAND_WAIT) pthread_cond_wait(&t->run_cond, &t->run_lock); #endif } } //stop the worker threads for (i=0; i<THREADS - 1; i++) stop_soe_worker_thread(thread_data + i, 0); gettimeofday(&tstop, NULL); difference = my_difftime(&tstart, &tstop); t = ((double)difference->secs + (double)difference->usecs / 1000000); free(difference); if (VFLAG > 2) printf("time to compute bucket sieve roots = %1.2f\n", t); #ifdef INPLACE_BUCKET gettimeofday(&tstart, NULL); // inplace primes have special requirements because they operate on // the normal number line, and not in residue space for (; i < sdata->pboundi; i++) { uint64 starthit; uint32 startclass; uint64 startbit; uint32 rclass, bnum, rclassid; uint32 index = i - sdata->inplace_start_id; int a; // copy the prime into the special data structure //sdata->inplace_data[index].prime = sdata->sieve_p[i]; // pull some computations involving a division out of the inner loop. // we need to know what prime/prodN and prime%prodN are. sdata->inplace_data[index].p_div = sdata->sieve_p[i] / prodN; rclass = sdata->sieve_p[i] % prodN; rclassid = resID_mod30[rclass]; sdata->inplace_data[index].p_mod = rclass; // now compute the starting hit in our sieve interval... starthit = (sdata->lowlimit / sdata->sieve_p[i] + 1) * sdata->sieve_p[i]; // ... that is in one of our residue classes startclass = starthit % prodN; // using a lookup table startclass = next_mod30[rclassid][startclass]; starthit += ((uint64)sdata->sieve_p[i] * (uint64)(startclass >> 8)); startclass = startclass & 0xff; // the starting accumulated error is equal to the starting class sdata->inplace_data[index].eacc = startclass; // now compute the starting bit and block location for this starting hit startbit = (starthit - sdata->lowlimit - (uint64)startclass) / (uint64)prodN; // sanity check if (((starthit - sdata->lowlimit - (uint64)startclass) % (uint64)prodN) != 0) printf("starting bit is invalid!\n"); sdata->inplace_data[index].bitloc = startbit & FLAGSIZEm1; bnum = startbit >> FLAGBITS; // finally, add the prime to a linked list // if the next hit is within our interval if (bnum < sdata->blocks) { //then reassign this prime to its next hit if (sdata->inplace_ptrs[bnum][resID_mod30[startclass]] == -1) { // this is the first hit in this block and rclass, so set the pointer // to this prime, and set next_pid = 0 so that we know to stop here // when we sieve sdata->inplace_ptrs[bnum][resID_mod30[startclass]] = index; sdata->inplace_data[index].next_pid = 0; } else { // add this prime to a listed list within the inplace sieve array. // this is done by first setting the next id to the current prime // at the end of the list sdata->inplace_data[index].next_pid = sdata->inplace_ptrs[bnum][resID_mod30[startclass]]; // and then setting the end of the list to this prime sdata->inplace_ptrs[bnum][resID_mod30[startclass]] = index; } } } gettimeofday(&tstop, NULL); difference = my_difftime(&tstart, &tstop); t = ((double)difference->secs + (double)difference->usecs / 1000000); free(difference); if (VFLAG > 2) printf("time to compute inplace sieve roots = %1.2f\n", t); #endif return; }
void testfirstRoots(static_conf_t *sconf, dynamic_conf_t *dconf) { //the roots are computed using a and b as follows: //(+/-t - b)(a)^-1 mod p //where the t values are the roots to t^2 = N mod p, found by shanks_tonelli //when constructing the factor base. //assume b > t //compute the roots as if we were actually going to use this, but don't save //anything. We are just trying to determine the size needed for each large //prime bucket by sieving over just the first bucket uint32 i,logp; int root1, root2, prime, amodp, bmodp, inv, bnum,numblocks; int lpnum,last_bound; //unpack stuff from the job data siqs_poly *poly = dconf->curr_poly; fb_list *fb = sconf->factor_base; lp_bucket *lp_bucket_p = dconf->buckets; uint32 *modsqrt = sconf->modsqrt_array; numblocks = sconf->num_blocks; lpnum = 0; dconf->buckets->alloc_slices = 1; //extreme estimate for number of slices i = (sconf->factor_base->B - sconf->factor_base->med_B) / 512; last_bound = fb->med_B; for (i=fb->med_B;i<fb->B;i++) { prime = fb->list->prime[i]; root1 = modsqrt[i]; root2 = prime - root1; logp = fb->list->logprime[i]; amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime); bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime); //find a^-1 mod p = inv(a mod p) mod p inv = modinv_1(amodp,prime); root1 = (int)root1 - bmodp; if (root1 < 0) root1 += prime; root2 = (int)root2 - bmodp; if (root2 < 0) root2 += prime; root1 = (uint32)((uint64)inv * (uint64)root1 % (uint64)prime); root2 = (uint32)((uint64)inv * (uint64)root2 % (uint64)prime); //just need to do this once, because the next step of prime will be //into a different bucket bnum = root1 >> BLOCKBITS; if (bnum == 0) lpnum++; //repeat for the other root bnum = root2 >> BLOCKBITS; if (bnum == 0) lpnum++; if ((uint32)lpnum > (double)BUCKET_ALLOC * 0.75) { //we want to allocate more slices than we will probably need //assume alloc/2 is a safe amount of slack lp_bucket_p->alloc_slices++; lpnum = 0; } if (i - last_bound == 65536) { //when prime are really big, we may cross this boundary //before the buckets fill up lp_bucket_p->alloc_slices++; lpnum = 0; last_bound = i; } } // extra cushion - may increase the memory usage a bit, but in very // rare circumstances not enough slices allocated causes crashes. lp_bucket_p->alloc_slices++; return; }