Esempio n. 1
0
DWORD WINAPI soe_worker_thread_main(LPVOID thread_data) {
#else
void *soe_worker_thread_main(void *thread_data) {
#endif
	thread_soedata_t *t = (thread_soedata_t *)thread_data;

	while(1) {
		uint32 i;

		/* wait forever for work to do */
#if defined(WIN32) || defined(_WIN64)
		WaitForSingleObject(t->run_event, INFINITE);
#else
		pthread_mutex_lock(&t->run_lock);
		while (t->command == SOE_COMMAND_WAIT) {
			pthread_cond_wait(&t->run_cond, &t->run_lock);
		}
#endif
		/* do work */

		if (t->command == SOE_COMMAND_SIEVE_AND_COUNT)
		{
			t->sdata.lines[t->current_line] = 
				(uint8 *)malloc(t->sdata.numlinebytes * sizeof(uint8));
			sieve_line(t);
			t->linecount = count_line(&t->sdata, t->current_line);
			free(t->sdata.lines[t->current_line]);
		}
		else if (t->command == SOE_COMMAND_SIEVE_AND_COMPUTE)
		{
			sieve_line(t);
		}
		else if (t->command == SOE_COMPUTE_ROOTS)
		{
			if (VFLAG > 2)
				printf("starting root computation over %u to %u\n", t->startid, t->stopid);

			if (t->sdata.sieve_range == 0)
			{
				for (i = t->startid; i < t->stopid; i++)
				{
					uint32 inv;
					uint32 prime = t->sdata.sieve_p[i];

					inv = modinv_1(t->sdata.prodN, prime);
					t->sdata.root[i] = prime - inv;

					t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = 
						(t->sdata.lowlimit + 1) % prime;
				}
			}
			else
			{
				mpz_t tmpz;
				//mpz_t t1, t2;
				mpz_init(tmpz);

				//experiment for custom ranges that can be expressed as base^exp + range
				//mpz_init(t1);
				//mpz_init(t2);

				mpz_add_ui(tmpz, *t->sdata.offset, t->sdata.lowlimit + 1);
				for (i = t->startid; i < t->stopid; i++)
				{
					uint32 inv;
					uint32 prime = t->sdata.sieve_p[i];

					inv = modinv_1(t->sdata.prodN, prime);
					t->sdata.root[i] = prime - inv;
		
					t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = 
						mpz_tdiv_ui(tmpz, prime);

					//mpz_set_ui(t2,prime);
					//mpz_set_ui(t1, 10);
					//mpz_powm_ui(t1, t1, 999999, t2);
					//t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = mpz_get_ui(t1);
				}

				//mpz_clear(t1);
				//mpz_clear(t2);
			}

		}
		else if (t->command == SOE_COMPUTE_PRIMES)
		{
			t->linecount = 0;

			for (i = t->startid; i < t->stopid; i+=8)
			{
				t->linecount = compute_8_bytes(&t->sdata, t->linecount, t->ddata.primes, i, NULL);		
			}
		}
		else if (t->command == SOE_COMPUTE_PRPS)
		{
			t->linecount = 0;
			for (i = t->startid; i < t->stopid; i++)
			{
				mpz_add_ui(t->tmpz, t->offset, t->ddata.primes[i - t->startid]);
				if ((mpz_cmp(t->tmpz, t->lowlimit) >= 0) && (mpz_cmp(t->highlimit, t->tmpz) >= 0))
				{
					if (mpz_probab_prime_p(t->tmpz, t->current_line))
						t->ddata.primes[t->linecount++] = t->ddata.primes[i - t->startid];
				}
			}
		}
		else if (t->command == SOE_COMMAND_END)
			break;

		/* signal completion */

		t->command = SOE_COMMAND_WAIT;
#if defined(WIN32) || defined(_WIN64)
		SetEvent(t->finish_event);
#else
		pthread_cond_signal(&t->run_cond);
		pthread_mutex_unlock(&t->run_lock);
#endif
	}

#if defined(WIN32) || defined(_WIN64)
	return 0;
#else
	return NULL;
#endif
}
Esempio n. 2
0
void firstRoots(static_conf_t *sconf, dynamic_conf_t *dconf)
{
	//the roots are computed using a and b as follows:
	//(+/-t - b)(a)^-1 mod p
	//where the t values are the roots to t^2 = N mod p, found by shanks_tonelli
	//when constructing the factor base.
	//assume b > t

	//unpack stuff from the job data structures
	siqs_poly *poly = dconf->curr_poly;
	fb_list *fb = sconf->factor_base;
	uint32 start_prime = 2;
	int *rootupdates = dconf->rootupdates;
	update_t update_data = dconf->update_data;
	sieve_fb_compressed *fb_p = dconf->comp_sieve_p;
	sieve_fb_compressed *fb_n = dconf->comp_sieve_n;
	lp_bucket *lp_bucket_p = dconf->buckets;
	uint32 *modsqrt = sconf->modsqrt_array;

	//locals
	uint32 i, interval;
	uint8 logp;
	int root1, root2, prime, amodp, bmodp, inv, x, bnum,j,numblocks;
	int s = poly->s;
	int bound_index = 0, k;
	uint32 bound_val = fb->med_B;
	uint32 *bptr, *sliceptr_p, *sliceptr_n;
	uint32 *numptr_p, *numptr_n;
	int check_bound = BUCKET_ALLOC/2 - 1, room;

	numblocks = sconf->num_blocks;
	interval = numblocks << BLOCKBITS;

	if (lp_bucket_p->list != NULL)
	{
		lp_bucket_p->fb_bounds[0] = fb->med_B;

		sliceptr_p = lp_bucket_p->list;
		sliceptr_n = lp_bucket_p->list + (numblocks << BUCKET_BITS);

		numptr_p = lp_bucket_p->num;
		numptr_n = lp_bucket_p->num + numblocks;
		//reset lp_buckets
		for (i=0;i< (2*numblocks*lp_bucket_p->alloc_slices) ;i++)
			numptr_p[i] = 0;

		lp_bucket_p->num_slices = 0;
	}
	else
	{
		sliceptr_p = NULL;
		sliceptr_n = NULL;
		numptr_p = NULL;
		numptr_n = NULL;
	}

	for (i=start_prime;i<sconf->sieve_small_fb_start;i++)
	{
		uint64 q64, tmp, t2;

		prime = fb->tinylist->prime[i];
		root1 = modsqrt[i]; 
		root2 = prime - root1; 

		amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime);
		bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime);

		//find a^-1 mod p = inv(a mod p) mod p
		inv = modinv_1(amodp,prime);

		COMPUTE_FIRST_ROOTS
	
		// reuse integer inverse of prime that we've calculated for use
		// in trial division stage
		// inv * root1 % prime
		t2 = (uint64)inv * (uint64)root1;
		tmp = t2 + (uint64)fb->tinylist->correction[i];
		q64 = tmp * (uint64)fb->tinylist->small_inv[i];
		tmp = q64 >> 32; 
		root1 = t2 - tmp * prime;

		// inv * root2 % prime
		t2 = (uint64)inv * (uint64)root2;
		tmp = t2 + (uint64)fb->tinylist->correction[i];
		q64 = tmp * (uint64)fb->tinylist->small_inv[i];
		tmp = q64 >> 32; 
		root2 = t2 - tmp * prime;
	
		//we don't sieve these primes, so ordering doesn't matter
		update_data.firstroots1[i] = root1;
		update_data.firstroots2[i] = root2;

		fb_p->root1[i] = (uint16)root1;
		fb_p->root2[i] = (uint16)root2;
		fb_n->root1[i] = (uint16)(prime - root2);
		fb_n->root2[i] = (uint16)(prime - root1);
		//if we were sieving, this would double count the location on the 
		//positive side.  but since we're not, its easier to check for inclusion
		//on the progression if we reset the negative root to zero if it is == prime
		if (fb_n->root1[i] == prime)
			fb_n->root1[i] = 0;
		if (fb_n->root2[i] == prime)
			fb_n->root2[i] = 0;

		//for this factor base prime, compute the rootupdate value for all s
		//Bl values.  amodp holds a^-1 mod p
		//the rootupdate value is given by 2*Bj*amodp
		//Bl[j] now holds 2*Bl
		for (j=0;j<s;j++)
		{
			x = (int)mpz_tdiv_ui(dconf->Bl[j],prime);
			
			// x * inv % prime
			t2 = (uint64)inv * (uint64)x;
			tmp = t2 + (uint64)fb->tinylist->correction[i];
			q64 = tmp * (uint64)fb->tinylist->small_inv[i];
			tmp = q64 >> 32; 
			x = t2 - tmp * prime;

			rootupdates[(j)*fb->B+i] = x;
		}
	}

	for (i=sconf->sieve_small_fb_start;i<fb->fb_15bit_B;i++)
	{
		uint64 small_inv, correction;
		uint64 q64, tmp, t2;

		prime = fb->list->prime[i];
		root1 = modsqrt[i]; 
		root2 = prime - root1; 

		// compute integer inverse of prime for use in mod operations in this
		// function.
		small_inv = ((uint64)1 << 48) / (uint64)prime;
		if (floor((double)((uint64)1 << 48) / (double)prime + 0.5) ==
						(double)small_inv) {
			correction = 1;
		}
		else {
			correction = 0;
			small_inv++;
		}

		amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime);
		bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime);

		//find a^-1 mod p = inv(a mod p) mod p
		inv = modinv_1(amodp,prime);

		COMPUTE_FIRST_ROOTS

		// inv * root1 % prime
		t2 = (uint64)inv * (uint64)root1;
		tmp = t2 + correction;
		q64 = tmp * small_inv;
		tmp = q64 >> 48; 
		root1 = t2 - tmp * prime;

		// inv * root2 % prime
		t2 = (uint64)inv * (uint64)root2;
		tmp = t2 + correction;
		q64 = tmp * small_inv;
		tmp = q64 >> 48; 
		root2 = t2 - tmp * prime;

		if (root2 < root1)
		{
			update_data.sm_firstroots1[i] = (uint16)root2;
			update_data.sm_firstroots2[i] = (uint16)root1;

			fb_p->root1[i] = (uint16)root2;
			fb_p->root2[i] = (uint16)root1;
			fb_n->root1[i] = (uint16)(prime - root1);
			fb_n->root2[i] = (uint16)(prime - root2);
		}
		else
		{
			update_data.sm_firstroots1[i] = (uint16)root1;
			update_data.sm_firstroots2[i] = (uint16)root2;

			fb_p->root1[i] = (uint16)root1;
			fb_p->root2[i] = (uint16)root2;
			fb_n->root1[i] = (uint16)(prime - root2);
			fb_n->root2[i] = (uint16)(prime - root1);
		}

		//for this factor base prime, compute the rootupdate value for all s
		//Bl values.  amodp holds a^-1 mod p
		//the rootupdate value is given by 2*Bj*amodp
		//Bl[j] now holds 2*Bl
		for (j=0;j<s;j++)
		{
			x = (int)mpz_tdiv_ui(dconf->Bl[j],prime);

			// x * inv % prime
			t2 = (uint64)inv * (uint64)x;
			tmp = t2 + correction;
			q64 = tmp * small_inv;
			tmp = q64 >> 48; 
			x = t2 - tmp * prime;

			rootupdates[(j)*fb->B+i] = x;
			dconf->sm_rootupdates[(j)*fb->B+i] = (uint16)x;
		}
	}

	//printf("prime[15bit-1] = %u\n", fb_p->prime[fb->fb_15bit_B-1]);
	for (i=fb->fb_15bit_B;i<fb->med_B;i++)
	{
		uint64 small_inv, correction;
		uint64 q64, tmp, t2;

		prime = fb->list->prime[i];
		root1 = modsqrt[i]; 
		root2 = prime - root1; 

		// compute integer inverse of prime for use in mod operations in this
		// function.
		small_inv = ((uint64)1 << 48) / (uint64)prime;
		if (floor((double)((uint64)1 << 48) / (double)prime + 0.5) ==
						(double)small_inv) {
			correction = 1;
		}
		else {
			correction = 0;
			small_inv++;
		}

		amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime);
		bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime);

		//find a^-1 mod p = inv(a mod p) mod p
		inv = modinv_1(amodp,prime);

		COMPUTE_FIRST_ROOTS

		// inv * root1 % prime
		t2 = (uint64)inv * (uint64)root1;
		tmp = t2 + correction;
		q64 = tmp * small_inv;
		tmp = q64 >> 48; 
		root1 = t2 - tmp * prime;

		// inv * root2 % prime
		t2 = (uint64)inv * (uint64)root2;
		tmp = t2 + correction;
		q64 = tmp * small_inv;
		tmp = q64 >> 48; 
		root2 = t2 - tmp * prime;

		if (root2 < root1)
		{
			update_data.firstroots1[i] = root2;
			update_data.firstroots2[i] = root1;

			fb_p->root1[i] = (uint16)root2;
			fb_p->root2[i] = (uint16)root1;
			fb_n->root1[i] = (uint16)(prime - root1);
			fb_n->root2[i] = (uint16)(prime - root2);
		}
		else
		{
			update_data.firstroots1[i] = root1;
			update_data.firstroots2[i] = root2;

			fb_p->root1[i] = (uint16)root1;
			fb_p->root2[i] = (uint16)root2;
			fb_n->root1[i] = (uint16)(prime - root2);
			fb_n->root2[i] = (uint16)(prime - root1);
		}

		//for this factor base prime, compute the rootupdate value for all s
		//Bl values.  amodp holds a^-1 mod p
		//the rootupdate value is given by 2*Bj*amodp
		//Bl[j] now holds 2*Bl
		for (j=0;j<s;j++)
		{
			x = (int)mpz_tdiv_ui(dconf->Bl[j],prime);

			// x * inv % prime
			t2 = (uint64)inv * (uint64)x;
			tmp = t2 + correction;
			q64 = tmp * small_inv;
			tmp = q64 >> 48; 
			x = t2 - tmp * prime;

			rootupdates[(j)*fb->B+i] = x;
		}
	}

	check_bound = fb->med_B + BUCKET_ALLOC/2;
	logp = fb->list->logprime[fb->med_B-1];
	for (i=fb->med_B;i<fb->large_B;i++)
	{
		//uint64 small_inv, correction;
		//uint64 q64, tmp, t2;

		CHECK_NEW_SLICE(i);

		prime = fb->list->prime[i];
		root1 = modsqrt[i];
		root2 = prime - root1; 

		amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime);
		bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime);

		//find a^-1 mod p = inv(a mod p) mod p
		inv = modinv_1(amodp,prime);

		COMPUTE_FIRST_ROOTS

		root1 = (uint32)((uint64)inv * (uint64)root1 % (uint64)prime);
		root2 = (uint32)((uint64)inv * (uint64)root2 % (uint64)prime);
		
		update_data.firstroots1[i] = root1;
		update_data.firstroots2[i] = root2;

		FILL_ONE_PRIME_LOOP_P(i);

		root1 = (prime - update_data.firstroots1[i]);
		root2 = (prime - update_data.firstroots2[i]);

		FILL_ONE_PRIME_LOOP_N(i);

		//for this factor base prime, compute the rootupdate value for all s
		//Bl values.  amodp holds a^-1 mod p
		//the rootupdate value is given by 2*Bj*amodp
		//Bl[j] now holds 2*Bl
		for (j=0;j<s;j++)
		{
			x = (int)mpz_tdiv_ui(dconf->Bl[j], prime);
			x = (int)((int64)x * (int64)inv % (int64)prime);

			rootupdates[(j)*fb->B+i] = x;
		}

	}

	logp = fb->list->logprime[fb->large_B-1];
	for (i=fb->large_B;i<fb->B;i++)
	{
		CHECK_NEW_SLICE(i);

		prime = fb->list->prime[i];
		root1 = modsqrt[i];
		root2 = prime - root1; 

		amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime);
		bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime);

		//find a^-1 mod p = inv(a mod p) mod p
		inv = modinv_1(amodp,prime);

		COMPUTE_FIRST_ROOTS
	
		root1 = (uint32)((uint64)inv * (uint64)root1 % (uint64)prime);
		root2 = (uint32)((uint64)inv * (uint64)root2 % (uint64)prime);

		update_data.firstroots1[i] = root1;
		update_data.firstroots2[i] = root2;

		FILL_ONE_PRIME_P(i);

		root1 = (prime - root1);
		root2 = (prime - root2);

		FILL_ONE_PRIME_N(i);

		//for this factor base prime, compute the rootupdate value for all s
		//Bl values.  amodp holds a^-1 mod p
		//the rootupdate value is given by 2*Bj*amodp
		//Bl[j] now holds 2*Bl
		//s is the number of primes in 'a'
		for (j=0;j<s;j++)
		{
			x = (int)mpz_tdiv_ui(dconf->Bl[j], prime);
			x = (int)((int64)x * (int64)inv % (int64)prime);
			rootupdates[(j)*fb->B+i] = x;
		}
	}

	if (lp_bucket_p->list != NULL)
		lp_bucket_p->num_slices = bound_index + 1;
	

	return;
}
Esempio n. 3
0
void getRoots(soe_staticdata_t *sdata, thread_soedata_t *thread_data)
{
    int prime, prodN;
    uint64 startprime;
    uint64 i;
    int j;
    uint32 range, lastid;

    //timing
    double t;
    struct timeval tstart, tstop;
    TIME_DIFF *	difference;

    prodN = (int)sdata->prodN;
    startprime = sdata->startprime;

    gettimeofday(&tstart, NULL);

    for (i=startprime; i<sdata->bucket_start_id; i++)
    {
        uint32 inv;
        prime = sdata->sieve_p[i];

        //sieving requires that we find the offset of each sieve prime in each block
        //that we sieve.  We are more restricted in choice of offset because we
        //sieve residue classes.  A good way to find the offset is the extended
        //euclidean algorithm, which reads ax + by = gcd(a,b),
        //where a = prime, b = prodN, and therefore gcd = 1.
        //since a and b are coprime, y is the multiplicative inverse of prodN modulo prime.
        //This value is a constant, so compute it here in order to facilitate
        //finding offsets later.

        //solve prodN ^ -1 % p
        inv = modinv_1(prodN,prime);
        sdata->root[i] = prime - inv;
    }

    gettimeofday(&tstop, NULL);

    difference = my_difftime(&tstart, &tstop);
    t = ((double)difference->secs + (double)difference->usecs / 1000000);
    free(difference);

    if (VFLAG > 2)
        printf("time to compute linear sieve roots = %1.2f\n", t);

    gettimeofday(&tstart, NULL);

    // start the threads
    for (i = 0; i < THREADS - 1; i++)
        start_soe_worker_thread(thread_data + i, 0);

    start_soe_worker_thread(thread_data + i, 1);

    range = (sdata->pboundi - sdata->bucket_start_id) / THREADS;
    lastid = sdata->bucket_start_id;

    // divvy up the primes
    for (j = 0; j < THREADS; j++)
    {
        thread_soedata_t *t = thread_data + j;

        t->sdata = *sdata;
        t->startid = lastid;
        t->stopid = t->startid + range;
        lastid = t->stopid;
    }

    // the last one gets any leftover
    if (thread_data[THREADS-1].stopid != sdata->pboundi)
        thread_data[THREADS-1].stopid = sdata->pboundi;

    // now run with the threads
    for (j = 0; j < THREADS; j++)
    {
        thread_soedata_t *t = thread_data + j;

        if (j == (THREADS - 1))
        {
            if (VFLAG > 2)
                printf("starting root computation over %u to %u\n", t->startid, t->stopid);

            // run in the current thread
            // bucket sieved primes need more data
            if (sdata->sieve_range == 0)
            {
                for (i = t->startid; i < t->stopid; i++)
                {
                    uint32 inv;
                    prime = t->sdata.sieve_p[i];

                    //sieving requires that we find the offset of each sieve prime in each block
                    //that we sieve.  We are more restricted in choice of offset because we
                    //sieve residue classes.  A good way to find the offset is the extended
                    //euclidean algorithm, which reads ax + by = gcd(a,b),
                    //where a = prime, b = prodN, and therefore gcd = 1.
                    //since a and b are coprime, y is the multiplicative inverse of prodN modulo prime.
                    //This value is a constant, so compute it here in order to facilitate
                    //finding offsets later.

                    //solve prodN ^ -1 % p
                    inv = modinv_1(prodN, prime);
                    t->sdata.root[i] = prime - inv;

                    //we can also speed things up by computing and storing the residue
                    //mod p of the first sieve location in the first residue class.  This provides
                    //a speedup by pulling this constant (involving a division) out of a critical loop
                    //when finding offsets of bucket sieved primes.
                    //these are only used by bucket sieved primes.
                    t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] =
                        (t->sdata.lowlimit + 1) % prime;
                }
            }
            else
            {
                mpz_t tmpz;
                //mpz_t t1, t2;
                mpz_init(tmpz);
                //uint64 res;

                //experiment for custom ranges that can be expressed as base^exp + range
                //mpz_init(t1);
                //mpz_init(t2);

                mpz_add_ui(tmpz, *t->sdata.offset, t->sdata.lowlimit + 1);
                for (i = t->startid; i < t->stopid; i++)
                {
                    uint32 inv;
                    prime = t->sdata.sieve_p[i];

                    //sieving requires that we find the offset of each sieve prime in each block
                    //that we sieve.  We are more restricted in choice of offset because we
                    //sieve residue classes.  A good way to find the offset is the extended
                    //euclidean algorithm, which reads ax + by = gcd(a,b),
                    //where a = prime, b = prodN, and therefore gcd = 1.
                    //since a and b are coprime, y is the multiplicative inverse of prodN modulo prime.
                    //This value is a constant, so compute it here in order to facilitate
                    //finding offsets later.

                    //solve prodN ^ -1 % p
                    inv = modinv_1(prodN,prime);
                    t->sdata.root[i] = prime - inv;

                    //we can also speed things up by computing and storing the residue
                    //mod p of the first sieve location in the first residue class.  This provides
                    //a speedup by pulling this constant (involving a division) out of a critical loop
                    //when finding offsets of bucket sieved primes.
                    //these are only used by bucket sieved primes.
                    t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] =
                        mpz_tdiv_ui(tmpz, prime);
                    //mpz_set_ui(t2,prime);
                    //mpz_set_ui(t1, 1000000000);
                    //mpz_powm_ui(t1, t1, 111111, t2);
                    //res = mpz_get_64(t1);

                    //t->sdata.lower_mod_prime[i - t->sdata.bucket_start_id] = (uint32)res;
                }

                //mpz_clear(t1);
                //mpz_clear(t2);

            }
        }
        else
        {
            t->command = SOE_COMPUTE_ROOTS;

#if defined(WIN32) || defined(_WIN64)
            SetEvent(t->run_event);
#else
            pthread_cond_signal(&t->run_cond);
            pthread_mutex_unlock(&t->run_lock);
#endif
        }
    }


    //wait for each thread to finish
    for (i = 0; i < THREADS; i++)
    {
        thread_soedata_t *t = thread_data + i;

        if (i < (THREADS - 1))
        {
#if defined(WIN32) || defined(_WIN64)
            WaitForSingleObject(t->finish_event, INFINITE);
#else
            pthread_mutex_lock(&t->run_lock);
            while (t->command != SOE_COMMAND_WAIT)
                pthread_cond_wait(&t->run_cond, &t->run_lock);
#endif
        }
    }

    //stop the worker threads
    for (i=0; i<THREADS - 1; i++)
        stop_soe_worker_thread(thread_data + i, 0);

    gettimeofday(&tstop, NULL);

    difference = my_difftime(&tstart, &tstop);
    t = ((double)difference->secs + (double)difference->usecs / 1000000);
    free(difference);

    if (VFLAG > 2)
        printf("time to compute bucket sieve roots = %1.2f\n", t);

#ifdef INPLACE_BUCKET
    gettimeofday(&tstart, NULL);

    // inplace primes have special requirements because they operate on
    // the normal number line, and not in residue space
    for (; i < sdata->pboundi; i++)
    {
        uint64 starthit;
        uint32 startclass;
        uint64 startbit;
        uint32 rclass, bnum, rclassid;
        uint32 index = i - sdata->inplace_start_id;
        int a;

        // copy the prime into the special data structure
        //sdata->inplace_data[index].prime = sdata->sieve_p[i];

        // pull some computations involving a division out of the inner loop.
        // we need to know what prime/prodN and prime%prodN are.
        sdata->inplace_data[index].p_div =
            sdata->sieve_p[i] / prodN;
        rclass = sdata->sieve_p[i] % prodN;
        rclassid = resID_mod30[rclass];
        sdata->inplace_data[index].p_mod = rclass;

        // now compute the starting hit in our sieve interval...
        starthit = (sdata->lowlimit / sdata->sieve_p[i] + 1) * sdata->sieve_p[i];

        // ... that is in one of our residue classes
        startclass = starthit % prodN;

        // using a lookup table
        startclass = next_mod30[rclassid][startclass];

        starthit += ((uint64)sdata->sieve_p[i] * (uint64)(startclass >> 8));
        startclass = startclass & 0xff;

        // the starting accumulated error is equal to the starting class
        sdata->inplace_data[index].eacc = startclass;

        // now compute the starting bit and block location for this starting hit
        startbit = (starthit - sdata->lowlimit - (uint64)startclass) / (uint64)prodN;

        // sanity check
        if (((starthit - sdata->lowlimit - (uint64)startclass) % (uint64)prodN) != 0)
            printf("starting bit is invalid!\n");

        sdata->inplace_data[index].bitloc = startbit & FLAGSIZEm1;
        bnum = startbit >> FLAGBITS;

        // finally, add the prime to a linked list
        // if the next hit is within our interval
        if (bnum < sdata->blocks)
        {
            //then reassign this prime to its next hit
            if (sdata->inplace_ptrs[bnum][resID_mod30[startclass]] == -1)
            {
                // this is the first hit in this block and rclass, so set the pointer
                // to this prime, and set next_pid = 0 so that we know to stop here
                // when we sieve
                sdata->inplace_ptrs[bnum][resID_mod30[startclass]] = index;
                sdata->inplace_data[index].next_pid = 0;
            }
            else
            {
                // add this prime to a listed list within the inplace sieve array.
                // this is done by first setting the next id to the current prime
                // at the end of the list
                sdata->inplace_data[index].next_pid = sdata->inplace_ptrs[bnum][resID_mod30[startclass]];

                // and then setting the end of the list to this prime
                sdata->inplace_ptrs[bnum][resID_mod30[startclass]] = index;
            }
        }

    }

    gettimeofday(&tstop, NULL);

    difference = my_difftime(&tstart, &tstop);
    t = ((double)difference->secs + (double)difference->usecs / 1000000);
    free(difference);

    if (VFLAG > 2)
        printf("time to compute inplace sieve roots = %1.2f\n", t);

#endif

    return;
}
Esempio n. 4
0
void testfirstRoots(static_conf_t *sconf, dynamic_conf_t *dconf)
{
	//the roots are computed using a and b as follows:
	//(+/-t - b)(a)^-1 mod p
	//where the t values are the roots to t^2 = N mod p, found by shanks_tonelli
	//when constructing the factor base.
	//assume b > t

	//compute the roots as if we were actually going to use this, but don't save
	//anything.  We are just trying to determine the size needed for each large 
	//prime bucket by sieving over just the first bucket

	uint32 i,logp;
	int root1, root2, prime, amodp, bmodp, inv, bnum,numblocks;
	int lpnum,last_bound;

	//unpack stuff from the job data
	siqs_poly *poly = dconf->curr_poly;
	fb_list *fb = sconf->factor_base;
	lp_bucket *lp_bucket_p = dconf->buckets;
	uint32 *modsqrt = sconf->modsqrt_array;

	numblocks = sconf->num_blocks;

	lpnum = 0;
	dconf->buckets->alloc_slices = 1;

	//extreme estimate for number of slices
	i = (sconf->factor_base->B - sconf->factor_base->med_B) / 512;

	last_bound = fb->med_B;
	for (i=fb->med_B;i<fb->B;i++)
	{
		prime = fb->list->prime[i];
		root1 = modsqrt[i]; 
		root2 = prime - root1; 
		logp = fb->list->logprime[i];

		amodp = (int)mpz_tdiv_ui(poly->mpz_poly_a,prime);
		bmodp = (int)mpz_tdiv_ui(poly->mpz_poly_b,prime);

		//find a^-1 mod p = inv(a mod p) mod p
		inv = modinv_1(amodp,prime);

		root1 = (int)root1 - bmodp;
		if (root1 < 0) root1 += prime;

		root2 = (int)root2 - bmodp;
		if (root2 < 0) root2 += prime;
	
		root1 = (uint32)((uint64)inv * (uint64)root1 % (uint64)prime);
		root2 = (uint32)((uint64)inv * (uint64)root2 % (uint64)prime);

		//just need to do this once, because the next step of prime will be 
		//into a different bucket
		bnum = root1 >> BLOCKBITS;
		if (bnum == 0)
			lpnum++;

		//repeat for the other root
		bnum = root2 >> BLOCKBITS;
		if (bnum == 0)
			lpnum++;

		if ((uint32)lpnum > (double)BUCKET_ALLOC * 0.75)
		{
			//we want to allocate more slices than we will probably need
			//assume alloc/2 is a safe amount of slack
			lp_bucket_p->alloc_slices++;
			lpnum = 0;
		}

		if (i - last_bound == 65536)
		{
			//when prime are really big, we may cross this boundary
			//before the buckets fill up
			lp_bucket_p->alloc_slices++;
			lpnum = 0;
			last_bound = i;
		}
	}

	// extra cushion - may increase the memory usage a bit, but in very
	// rare circumstances not enough slices allocated causes crashes.
	lp_bucket_p->alloc_slices++;

	return;
}