Exemplo n.º 1
0
/* assuming slaves (workers)) are all homogenous, let them all do the calculations
 regarding primes sieving, calculating the smoothness base and the modular roots */
int main(int argc, char **argv) {
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	MPI_Comm_size(MPI_COMM_WORLD, &mpi_group_size);
	int len;
	MPI_Get_processor_name(processor_name, &len);

	gettimeofday(&start_global, NULL);
	print_lib_version();

	mpz_init(N);
	mpz_t B;
	mpz_init(B);

	unsigned long int uBase;
	int64_t nb_primes;
	modular_root_t *modular_roots;

	uint64_t i, j;

	if (argc < 2) {
		PRINT(my_rank, "usage: %s Number_to_factorize\n", argv[0]);
		exit(2);
	}

	if (mpz_init_set_str(N, argv[1], 10) == -1) {
		PRINT(my_rank, "Cannot load N %s\n", argv[1]);
		exit(2);
	}

	mpz_t sqrtN, rem;
	mpz_init(sqrtN);
	mpz_init(rem);
	mpz_sqrtrem(sqrtN, rem, N);

	if (mpz_cmp_ui(rem, 0) != 0) /* if not perfect square, calculate the ceiling */
		mpz_add_ui(sqrtN, sqrtN, 1);
	else /* N is a perfect square, factored! */
	{
		PRINT(my_rank, "\n<<<[FACTOR]>>> %s\n", mpz_get_str(NULL, 10, sqrtN));
		return 0;
	}

	if (mpz_probab_prime_p(N, 10) > 0) /* don't bother factoring */
	{
		PRINT(my_rank, "N:%s is prime\n", mpz_get_str(NULL, 10, N));
		exit(0);
	}

	OPEN_LOG_FILE("freq");

//--------------------------------------------------------
//  calculate the smoothness base for the given N
//--------------------------------------------------------
	get_smoothness_base(B, N); /* if N is too small, the program will surely fail, please consider a pen and paper instead */
	uBase = mpz_get_ui(B);
	PRINT(my_rank, "n: %s\tBase: %s\n",
			mpz_get_str(NULL, 10, N), mpz_get_str(NULL, 10, B));

//--------------------------------------------------------
// sieve primes that are less than the smoothness base using Eratosthenes sieve
//--------------------------------------------------------
	START_TIMER();
	nb_primes = sieve_primes_up_to((int64_t) (uBase));

	PRINT(my_rank, "\tPrimes found %" PRId64 " [Smoothness Base %lu]\n",
			nb_primes, uBase);
	STOP_TIMER_PRINT_TIME("\tEratosthenes Sieving done");

//--------------------------------------------------------
// fill the primes array with primes to which n is a quadratic residue
//--------------------------------------------------------
	START_TIMER();
	primes = calloc(nb_primes, sizeof(int64_t));
	nb_qr_primes = fill_primes_with_quadratic_residue(primes, N);

	/*for(i=0; i<nb_qr_primes; i++)
	 PRINT(my_rank, "%" PRId64 "\n", primes[i]);*/

	PRINT(my_rank, "\tN-Quadratic primes found %" PRId64 "\n", nb_qr_primes);
	STOP_TIMER_PRINT_TIME("\tQuadratic prime filtering done");

//--------------------------------------------------------
// calculate modular roots
//--------------------------------------------------------
	START_TIMER();
	modular_roots = calloc(nb_qr_primes, sizeof(modular_root_t));
	mpz_t tmp, r1, r2;
	mpz_init(tmp);
	mpz_init(r1);
	mpz_init(r2);

	for (i = 0; i < nb_qr_primes; i++) {
		mpz_set_ui(tmp, (unsigned long) primes[i]);
		mpz_sqrtm(r1, N, tmp); /* calculate the modular root */
		mpz_neg(r2, r1); /* -q mod n */
		mpz_mod(r2, r2, tmp);

		modular_roots[i].root1 = mpz_get_ui(r1);
		modular_roots[i].root2 = mpz_get_ui(r2);
	}
	mpz_clear(tmp);
	mpz_clear(r1);
	mpz_clear(r2);
	STOP_TIMER_PRINT_TIME("Modular roots calculation done");

//--------------------------------------------------------
//         ***** initialize the matrix *****
//--------------------------------------------------------
	if (my_rank == 0) /* only the master have the matrix */
	{
		START_TIMER();
		init_matrix(&matrix, nb_qr_primes + NB_VECTORS_OFFSET, nb_qr_primes);
		mpz_init2(tmp_matrix_row, nb_qr_primes);
		STOP_TIMER_PRINT_TIME("Matrix initialized");
	}

//--------------------------------------------------------
// [Sieving] - everyones sieves including the master
//--------------------------------------------------------
	START_TIMER();

	mpz_t x, sieving_index, next_sieving_index, relative_start, global_step;
	unsigned long ui_index, SIEVING_STEP = 50000; /* we sieve for 50000 elements at each loop */
	int LOCAL_SIEVING_ROUNDS = 10; /* number of iterations a worker sieves before communicating results to the master */
	unsigned long sieving_round = 0;
	unsigned long nb_big_rounds = 0;

	uint64_t p_pow;
	smooth_number_t *x_squared;

	x_squared = calloc(SIEVING_STEP, sizeof(smooth_number_t));

	if (my_rank == 0)
		smooth_numbers = calloc(nb_qr_primes + NB_VECTORS_OFFSET,
				sizeof(smooth_number_t));
	else
		temp_slaves_smooth_numbers = calloc(500, sizeof(smooth_number_t));
	/* TODO: this is not properly correct, using a linkedlist is better to keep track of temporary
	 * smooth numbers at the slaves nodes however it's pretty rare to find 500 smooth numbers in
	 * 50000 * 10 interval. */

	mpz_init_set(x, sqrtN);
	mpz_init(global_step);
	mpz_init(relative_start);
	mpz_init(sieving_index);
	mpz_init(next_sieving_index);

	mpz_t p;
	mpz_init(p);
	mpz_t str;
	mpz_init_set(str, sieving_index);
	PRINT(my_rank, "\n[%s] Sieving ...\n", processor_name);

//--------------------------------------------------------
// Init before sieving
//--------------------------------------------------------
	for (i = 0; i < SIEVING_STEP; i++) {
		mpz_init(x_squared[i].value_x);
		mpz_init(x_squared[i].value_x_squared);

		mpz_init2(x_squared[i].factors_vect, nb_qr_primes);
		mpz_add_ui(x, x, 1);
	}

	int nb_smooth_per_round = 0;
	char s[512];

//--------------------------------------------------------
// WHILE smooth numbers found less than the primes in the smooth base + NB_VECTORS_OFFSET for master
// Or master asked for more smooth numbers from slaves
//--------------------------------------------------------
	while (1) {
		mpz_set_ui(global_step, nb_big_rounds); /* calculates the coordinate where the workers start sieving from */
		mpz_mul_ui(global_step, global_step, (unsigned long) mpi_group_size);
		mpz_mul_ui(global_step, global_step, SIEVING_STEP);
		mpz_mul_ui(global_step, global_step, LOCAL_SIEVING_ROUNDS);
		mpz_add(global_step, global_step, sqrtN);

		mpz_set_ui(relative_start, SIEVING_STEP);
		mpz_mul_ui(relative_start, relative_start, LOCAL_SIEVING_ROUNDS);
		mpz_mul_ui(relative_start, relative_start, (unsigned long) my_rank);
		mpz_add(relative_start, relative_start, global_step);

		mpz_set(sieving_index, relative_start);
		mpz_set(next_sieving_index, relative_start);

		for (sieving_round = 0; sieving_round < LOCAL_SIEVING_ROUNDS; /* each slave sieves for LOCAL_SIEVING_ROUNDS rounds */
		sieving_round++) {
			nb_smooth_per_round = 0;
			mpz_set(x, next_sieving_index); /* sieve numbers from sieving_index to sieving_index + sieving_step */
			mpz_set(sieving_index, next_sieving_index);

			if (my_rank == 0) {
				printf("\r");
				printf(
						"\t\tSieving at: %s30 <--> Smooth numbers found: %" PRId64 "/%" PRId64 "",
						mpz_get_str(NULL, 10, sieving_index),
						nb_global_smooth_numbers_found, nb_qr_primes);
				fflush(stdout);
			}

			for (i = 0; i < SIEVING_STEP; i++) {
				mpz_set(x_squared[i].value_x, x);

				mpz_pow_ui(x_squared[i].value_x_squared, x, 2); /* calculate value_x_squared <- x²-n */
				mpz_sub(x_squared[i].value_x_squared,
						x_squared[i].value_x_squared, N);

				mpz_clear(x_squared[i].factors_vect);
				mpz_init2(x_squared[i].factors_vect, nb_qr_primes); /* reconstruct a new fresh 0ed vector of size nb_qr_primes bits */

				mpz_add_ui(x, x, 1);
			}
			mpz_set(next_sieving_index, x);

//--------------------------------------------------------
// eliminate factors in the x_squared array, those who are 'destructed' to 1 are smooth
//--------------------------------------------------------
			for (i = 0; i < nb_qr_primes; i++) {
				mpz_set_ui(p, (unsigned long) primes[i]);
				mpz_set(x, sieving_index);

				/* get the first multiple of p that is directly larger that sieving_index
				 * Quadratic SIEVING: all elements from this number and in positions multiples of root1 and root2
				 * are also multiples of p */
				get_sieving_start_index(x, x, p, modular_roots[i].root1);
				mpz_set(str, x);
				mpz_sub(x, x, sieving_index); /* x contains index of first number that is divisible by p */

				for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) {
					p_pow = mpz_remove(x_squared[j].value_x_squared,
							x_squared[j].value_x_squared, p); /* eliminate all factors of p */

					if (p_pow & 1) /* mark bit if odd power of p exists in this x_squared[j] */
					{
						mpz_setbit(x_squared[j].factors_vect, i);
					}

					if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) {
						save_smooth_number(x_squared[j]);
						nb_smooth_per_round++;
					}
					/* sieve next element located p steps from here */
				}

				/* same goes for root2 */
				if (modular_roots[i].root2 == modular_roots[i].root1)
					continue;

				mpz_set(x, sieving_index);

				get_sieving_start_index(x, x, p, modular_roots[i].root2);
				mpz_set(str, x);
				mpz_sub(x, x, sieving_index);

				for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) {
					p_pow = mpz_remove(x_squared[j].value_x_squared,
							x_squared[j].value_x_squared, p);

					if (p_pow & 1) {
						mpz_setbit(x_squared[j].factors_vect, i);
					}

					if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) {
						save_smooth_number(x_squared[j]);
						nb_smooth_per_round++;
					}
				}
			}
		}

		if (my_rank == 0) /* master gathers smooth numbers from slaves */
		{
			gather_smooth_numbers();
			notify_slaves();
		} else /* slaves send their smooth numbers to master */
		{
			send_smooth_numbers_to_master();
			nb_global_smooth_numbers_found = get_server_notification();
		}

		if (nb_global_smooth_numbers_found >= nb_qr_primes + NB_VECTORS_OFFSET)
			break;

		nb_big_rounds++;
	}

	STOP_TIMER_PRINT_TIME("\nSieving DONE");

	if (my_rank == 0) {
		uint64_t t = 0;

//--------------------------------------------------------
//the matrix ready, start Gauss elimination. The Matrix is filled on the call of save_smooth_number()
//--------------------------------------------------------
		START_TIMER();
		gauss_elimination(&matrix);
		STOP_TIMER_PRINT_TIME("\nGauss elimination done");

		uint64_t row_index = nb_qr_primes + NB_VECTORS_OFFSET - 1; /* last row in the matrix */
		int nb_linear_relations = 0;
		mpz_t linear_relation_z, solution_z;
		mpz_init(linear_relation_z);
		mpz_init(solution_z);

		get_matrix_row(linear_relation_z, &matrix, row_index--); /* get the last few rows in the Gauss eliminated matrix*/
		while (mpz_cmp_ui(linear_relation_z, 0) == 0) {
			nb_linear_relations++;
			get_matrix_row(linear_relation_z, &matrix, row_index--);
		}

		PRINT(my_rank, "\tLinear dependent relations found : %d\n",
				nb_linear_relations);

//--------------------------------------------------------
// Factor
//--------------------------------------------------------
		//We use the last linear relation to reconstruct our solution
		START_TIMER();
		PRINT(my_rank, "%s", "\nFactorizing..\n");
		mpz_t solution_X, solution_Y;
		mpz_init(solution_X);
		mpz_init(solution_Y);

		/* we start testing from the first linear relation encountered in the matrix */
		for (j = nb_linear_relations; j > 0; j--) {
			PRINT(my_rank, "Trying %d..\n", nb_linear_relations - j + 1);
			mpz_set_ui(solution_X, 1);
			mpz_set_ui(solution_Y, 1);

			get_identity_row(solution_z, &matrix,
					nb_qr_primes + NB_VECTORS_OFFSET - j + 1);

			for (i = 0; i < nb_qr_primes; i++) {
				if (mpz_tstbit(solution_z, i)) {
					mpz_mul(solution_X, solution_X, smooth_numbers[i].value_x);
					mpz_mod(solution_X, solution_X, N); /* reduce x to modulo N */

					mpz_mul(solution_Y, solution_Y,
							smooth_numbers[i].value_x_squared);
					/*TODO: handling huge stuff here, there is no modulo N like in the solution_X case!
					 * eliminate squares as long as you go*/
				}
			}

			mpz_sqrt(solution_Y, solution_Y);
			mpz_mod(solution_Y, solution_Y, N); /* y = sqrt(MUL(xi²-n)) mod N */

			mpz_sub(solution_X, solution_X, solution_Y);

			mpz_gcd(solution_X, solution_X, N);

			if (mpz_cmp(solution_X, N) != 0 && mpz_cmp_ui(solution_X, 1) != 0) /* factor can be 1 or N, try another relation */
				break;
		}
		mpz_cdiv_q(solution_Y, N, solution_X);

		PRINT(my_rank, "\n>>>>>>>>>>> FACTORED %s =\n",
				mpz_get_str(NULL, 10, N));
		PRINT(
				my_rank,
				"\tFactor 1: %s \n\tFactor 2: %s",
				mpz_get_str(NULL, 10, solution_X), mpz_get_str(NULL, 10, solution_Y));

		sprintf(s, "\n>>>>>>>>>>> FACTORED %s =\n", mpz_get_str(NULL, 10, N));
		APPEND_TO_LOG_FILE(s);
		sprintf(s, "\tFactor 1: %s \n\tFactor 2: %s",
				mpz_get_str(NULL, 10, solution_X),
				mpz_get_str(NULL, 10, solution_Y));
		APPEND_TO_LOG_FILE(s);

		gettimeofday(&end_global, NULL);
		timersub(&end_global, &start_global, &elapsed);
		sprintf(s, "****** TOTAL TIME: %.3f ms\n",
				elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000);
		APPEND_TO_LOG_FILE(s);

		STOP_TIMER_PRINT_TIME("\nFactorizing done");
	}

	PRINT(my_rank, "%s", "\nCleaning memory..\n");

	/********************** clear the x_squared array **********************/
	for (i = 0; i < SIEVING_STEP; i++) {
		mpz_clear(x_squared[i].value_x);
		mpz_clear(x_squared[i].value_x_squared);
		//free(x_squared[i].factors_exp);
		mpz_clear(x_squared[i].factors_vect);
	}
	free(x_squared);
	/********************** clear the x_squared array **********************/

	free(modular_roots);
	/********************** clear the smooth_numbers array **********************/
	if (my_rank == 0) {
		for (i = 0; i < nb_qr_primes + NB_VECTORS_OFFSET; i++) {
			mpz_clear(smooth_numbers[i].value_x);
			mpz_clear(smooth_numbers[i].value_x_squared);
			mpz_clear(smooth_numbers[i].factors_vect);
			//free(smooth_numbers[i].factors_exp);
		}
		free(smooth_numbers);
	} else {
		for (i = 0; i < 500; i++) {
			mpz_clear(temp_slaves_smooth_numbers[i].value_x);
			mpz_clear(temp_slaves_smooth_numbers[i].value_x_squared);
			mpz_clear(temp_slaves_smooth_numbers[i].factors_vect);
		}
		free(temp_slaves_smooth_numbers);
	}
	/********************** clear the smooth_numbers array **********************/

	free(primes);
	/********************** clear mpz _t **********************/mpz_clear(B);
	mpz_clear(N);
	sqrtN, rem;
	mpz_clear(x);
	mpz_clear(sieving_index);
	mpz_clear(next_sieving_index);
	mpz_clear(p);
	mpz_clear(str);
	/********************** clear mpz _t **********************/

	free_matrix(&matrix);

	gettimeofday(&end_global, NULL);
	timersub(&end_global, &start_global, &elapsed);
	PRINT(my_rank, "****** TOTAL TIME: %.3f ms\n",
			elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000);
	show_mem_usage();

	MPI_Finalize();

	return 0;
}
Exemplo n.º 2
0
int main(int argc, char **argv) {
	gettimeofday(&start_global, NULL);
	print_lib_version();

	mpz_init(N);
	mpz_t B;
	mpz_init(B);

	unsigned long int uBase;
	int64_t nb_primes;
	modular_root_t *modular_roots;

	uint64_t i, j;

	if (mpz_init_set_str(N, argv[1], 10) == -1) {
		printf("Cannot load N %s\n", argv[1]);
		exit(2);
	}

	mpz_t sqrtN, rem;
	mpz_init(sqrtN);
	mpz_init(rem);
	mpz_sqrtrem(sqrtN, rem, N);

	if (mpz_cmp_ui(rem, 0) != 0) /* if not perfect square, calculate the ceiling */
		mpz_add_ui(sqrtN, sqrtN, 1);
	else /* N is a perfect square, factored! */
	{
		printf("\n<<<[FACTOR]>>> %s\n", mpz_get_str(NULL, 10, sqrtN));
		return 0;
	}

	if (mpz_probab_prime_p(N, 10) > 0) /* don't bother factoring */
	{
		printf("N:%s is prime\n", mpz_get_str(NULL, 10, N));
		exit(0);
	}

	OPEN_LOG_FILE("freq");

//--------------------------------------------------------
//  calculate the smoothness base for the given N
//--------------------------------------------------------
	get_smoothness_base(B, N); /* if N is too small, the program will surely fail, please consider a pen and paper instead */
	uBase = mpz_get_ui(B);
	printf("n: %s\tBase: %s\n", mpz_get_str(NULL, 10, N),
			mpz_get_str(NULL, 10, B));

//--------------------------------------------------------
// sieve primes that are less than the smoothness base using Eratosthenes sieve
//--------------------------------------------------------
	START_TIMER();
	nb_primes = sieve_primes_up_to((int64_t) (uBase));

	printf("\nPrimes found %" PRId64 " [Smoothness Base %lu]\n", nb_primes,
			uBase);
	STOP_TIMER_PRINT_TIME("\tEratosthenes Sieving done");

//--------------------------------------------------------
// fill the primes array with primes to which n is a quadratic residue
//--------------------------------------------------------
	START_TIMER();
	primes = calloc(nb_primes, sizeof(int64_t));
	nb_qr_primes = fill_primes_with_quadratic_residue(primes, N);

	/*for(i=0; i<nb_qr_primes; i++)
	 printf("%" PRId64 "\n", primes[i]);*/

	printf("\nN-Quadratic primes found %" PRId64 "\n", nb_qr_primes);
	STOP_TIMER_PRINT_TIME("\tQuadratic prime filtering done");

//--------------------------------------------------------
// calculate modular roots
//--------------------------------------------------------
	START_TIMER();
	modular_roots = calloc(nb_qr_primes, sizeof(modular_root_t));
	mpz_t tmp, r1, r2;
	mpz_init(tmp);
	mpz_init(r1);
	mpz_init(r2);

	for (i = 0; i < nb_qr_primes; i++) {
		mpz_set_ui(tmp, (unsigned long) primes[i]);
		mpz_sqrtm(r1, N, tmp); /* calculate the modular root */
		mpz_neg(r2, r1); /* -q mod n */
		mpz_mod(r2, r2, tmp);

		modular_roots[i].root1 = mpz_get_ui(r1);
		modular_roots[i].root2 = mpz_get_ui(r2);
	}
	mpz_clear(tmp);
	mpz_clear(r1);
	mpz_clear(r2);
	STOP_TIMER_PRINT_TIME("\nModular roots calculation done");

	/*for(i=0; i<nb_qr_primes; i++)
	 {
	 printf("[%10" PRId64 "-> roots: %10u - %10u]\n", primes[i], modular_roots[i].root1, modular_roots[i].root2);
	 }*/

//--------------------------------------------------------
//         ***** initialize the matrix *****
//--------------------------------------------------------
	START_TIMER();
	init_matrix(&matrix, nb_qr_primes + NB_VECTORS_OFFSET, nb_qr_primes);
	mpz_init2(tmp_matrix_row, nb_qr_primes);
	STOP_TIMER_PRINT_TIME("\nMatrix initialized");

//--------------------------------------------------------
// [Sieving]
//--------------------------------------------------------
	START_TIMER();

	mpz_t x, sieving_index, next_sieving_index;
	unsigned long ui_index, SIEVING_STEP = 50000; /* we sieve for 50000 elements at each loop */
	uint64_t p_pow;
	smooth_number_t *x_squared;

	x_squared = calloc(SIEVING_STEP, sizeof(smooth_number_t));
	smooth_numbers = calloc(nb_qr_primes + NB_VECTORS_OFFSET,
			sizeof(smooth_number_t));

	mpz_init_set(x, sqrtN);
	mpz_init_set(sieving_index, x);
	mpz_init_set(next_sieving_index, x);

	mpz_t p;
	mpz_init(p);
	mpz_t str;
	mpz_init_set(str, sieving_index);
	printf("\nSieving ...\n");

//--------------------------------------------------------
// Init before sieving
//--------------------------------------------------------
	for (i = 0; i < SIEVING_STEP; i++) {
		mpz_init(x_squared[i].value_x);
		mpz_init(x_squared[i].value_x_squared);

		/* the factors_exp array is used to keep track of exponents */
		//x_squared[i].factors_exp = calloc(nb_qr_primes, sizeof(uint64_t));
		/* we use directly the exponents vector modulo 2 to preserve space */mpz_init2(
				x_squared[i].factors_vect, nb_qr_primes);
		mpz_add_ui(x, x, 1);
	}

	int nb_smooth_per_round = 0;
	char s[512];

//--------------------------------------------------------
// WHILE smooth numbers found less than the primes in the smooth base + NB_VECTORS_OFFSET
//--------------------------------------------------------
	while (nb_smooth_numbers_found < nb_qr_primes + NB_VECTORS_OFFSET) {
		nb_smooth_per_round = 0;
		mpz_set(x, next_sieving_index); /* sieve numbers from sieving_index to sieving_index + sieving_step */
		mpz_set(sieving_index, next_sieving_index);

		printf("\r");
		printf(
				"\t\tSieving at: %s30 <--> Smooth numbers found: %" PRId64 "/%" PRId64 "",
				mpz_get_str(NULL, 10, sieving_index), nb_smooth_numbers_found,
				nb_qr_primes);
		fflush(stdout);

		for (i = 0; i < SIEVING_STEP; i++) {
			mpz_set(x_squared[i].value_x, x);

			mpz_pow_ui(x_squared[i].value_x_squared, x, 2); /* calculate value_x_squared <- x²-n */
			mpz_sub(x_squared[i].value_x_squared, x_squared[i].value_x_squared,
					N);

			mpz_clear(x_squared[i].factors_vect);
			mpz_init2(x_squared[i].factors_vect, nb_qr_primes); /* reconstruct a new fresh 0ed vector of size nb_qr_primes bits */

			mpz_add_ui(x, x, 1);
		}
		mpz_set(next_sieving_index, x);

//--------------------------------------------------------
// eliminate factors in the x_squared array, those who are 'destructed' to 1 are smooth
//--------------------------------------------------------

		for (i = 0; i < nb_qr_primes; i++) {
			mpz_set_ui(p, (unsigned long) primes[i]);
			mpz_set(x, sieving_index);

			/* get the first multiple of p that is directly larger that sieving_index
			 * Quadratic SIEVING: all elements from this number and in positions multiples of root1 and root2
			 * are also multiples of p */
			get_sieving_start_index(x, x, p, modular_roots[i].root1);
			mpz_set(str, x);
			mpz_sub(x, x, sieving_index); /* x contains index of first number that is divisible by p */

			for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) {
				p_pow = mpz_remove(x_squared[j].value_x_squared,
						x_squared[j].value_x_squared, p); /* eliminate all factors of p */

				if (p_pow & 1) /* mark bit if odd power of p exists in this x_squared[j] */
				{
					mpz_setbit(x_squared[j].factors_vect, i);
				}

				if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) {
					save_smooth_number(x_squared[j]);
					nb_smooth_per_round++;
				}
				/* sieve next element located p steps from here */
			}

			/* same goes for root2 */
			if (modular_roots[i].root2 == modular_roots[i].root1)
				continue;

			mpz_set(x, sieving_index);

			get_sieving_start_index(x, x, p, modular_roots[i].root2);
			mpz_set(str, x);
			mpz_sub(x, x, sieving_index);

			for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) {
				p_pow = mpz_remove(x_squared[j].value_x_squared,
						x_squared[j].value_x_squared, p);

				if (p_pow & 1) {
					mpz_setbit(x_squared[j].factors_vect, i);
				}

				if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) {
					save_smooth_number(x_squared[j]);
					nb_smooth_per_round++;
				}
			}
		}
		//printf("\tSmooth numbers found %" PRId64 "\n", nb_smooth_numbers_found);
		/*sprintf(s, "[start: %s - end: %s - step: %" PRId64 "] nb_smooth_per_round: %d",
		 mpz_get_str(NULL, 10, sieving_index),
		 mpz_get_str(NULL, 10, next_sieving_index),
		 SIEVING_STEP,
		 nb_smooth_per_round);
		 APPEND_TO_LOG_FILE(s);*/
	}

	STOP_TIMER_PRINT_TIME("\nSieving DONE");

	uint64_t t = 0;

//--------------------------------------------------------
//the matrix ready, start Gauss elimination. The Matrix is filled on the call of save_smooth_number()
//--------------------------------------------------------
	START_TIMER();
	gauss_elimination(&matrix);
	STOP_TIMER_PRINT_TIME("\nGauss elimination done");
	//print_matrix_matrix(&matrix);
	//print_matrix_identity(&matrix);

	uint64_t row_index = nb_qr_primes + NB_VECTORS_OFFSET - 1; /* last row in the matrix */
	int nb_linear_relations = 0;
	mpz_t linear_relation_z, solution_z;
	mpz_init(linear_relation_z);
	mpz_init(solution_z);

	get_matrix_row(linear_relation_z, &matrix, row_index--); /* get the last few rows in the Gauss eliminated matrix*/
	while (mpz_cmp_ui(linear_relation_z, 0) == 0) {
		nb_linear_relations++;
		get_matrix_row(linear_relation_z, &matrix, row_index--);
	}

	printf("\tLinear dependent relations found : %d\n", nb_linear_relations);

//--------------------------------------------------------
// Factor
//--------------------------------------------------------
	//We use the last linear relation to reconstruct our solution
	START_TIMER();
	printf("\nFactorizing..\n");
	mpz_t solution_X, solution_Y;
	mpz_init(solution_X);
	mpz_init(solution_Y);

	/* we start testing from the first linear relation encountered in the matrix */
	for (j = nb_linear_relations; j > 0; j--) {
		printf("Trying %d..\n", nb_linear_relations - j + 1);
		mpz_set_ui(solution_X, 1);
		mpz_set_ui(solution_Y, 1);

		get_identity_row(solution_z, &matrix,
				nb_qr_primes + NB_VECTORS_OFFSET - j + 1);

		for (i = 0; i < nb_qr_primes; i++) {
			if (mpz_tstbit(solution_z, i)) {
				mpz_mul(solution_X, solution_X, smooth_numbers[i].value_x);
				mpz_mod(solution_X, solution_X, N); /* reduce x to modulo N */

				mpz_mul(solution_Y, solution_Y,
						smooth_numbers[i].value_x_squared);
				/*TODO: handling huge stuff here, there is no modulo N like in the solution_X case!
				 * eliminate squares as long as you go*/
			}
		}

		mpz_sqrt(solution_Y, solution_Y);
		mpz_mod(solution_Y, solution_Y, N); /* y = sqrt(MUL(xi²-n)) mod N */

		mpz_sub(solution_X, solution_X, solution_Y);

		mpz_gcd(solution_X, solution_X, N);

		if (mpz_cmp(solution_X, N) != 0 && mpz_cmp_ui(solution_X, 1) != 0) /* factor can be 1 or N, try another relation */
			break;
	}
	mpz_cdiv_q(solution_Y, N, solution_X);

	printf("\n>>>>>>>>>>> FACTORED %s =\n", mpz_get_str(NULL, 10, N));
	printf("\tFactor 1: %s \n\tFactor 2: %s", mpz_get_str(NULL, 10, solution_X),
			mpz_get_str(NULL, 10, solution_Y));

	/*sprintf(s, "\n>>>>>>>>>>> FACTORED %s =\n", mpz_get_str(NULL, 10, N));
	 APPEND_TO_LOG_FILE(s);
	 sprintf(s, "\tFactor 1: %s \n\tFactor 2: %s", mpz_get_str(NULL, 10, solution_X), mpz_get_str(NULL, 10, solution_Y));
	 APPEND_TO_LOG_FILE(s);

	 gettimeofday(&end_global, NULL);
	 timersub(&end_global, &start_global, &elapsed);
	 sprintf(s, "****** TOTAL TIME: %.3f ms\n", elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000);
	 APPEND_TO_LOG_FILE(s);*/

	STOP_TIMER_PRINT_TIME("\nFactorizing done");

	printf("Cleaning memory..\n");

	/********************** clear the x_squared array **********************/
	for (i = 0; i < SIEVING_STEP; i++) {
		mpz_clear(x_squared[i].value_x);
		mpz_clear(x_squared[i].value_x_squared);
		//free(x_squared[i].factors_exp);
		mpz_clear(x_squared[i].factors_vect);
	}
	free(x_squared);
	/********************** clear the x_squared array **********************/

	free(modular_roots);
	/********************** clear the smooth_numbers array **********************/
	for (i = 0; i < nb_qr_primes + NB_VECTORS_OFFSET; i++) {
		mpz_clear(smooth_numbers[i].value_x);
		mpz_clear(smooth_numbers[i].value_x_squared);
		//free(smooth_numbers[i].factors_exp);
	}
	free(smooth_numbers);
	/********************** clear the smooth_numbers array **********************/

	free(primes);
	/********************** clear mpz _t **********************/mpz_clear(B);
	mpz_clear(N);
	sqrtN, rem;
	mpz_clear(x);
	mpz_clear(sieving_index);
	mpz_clear(next_sieving_index);
	mpz_clear(p);
	mpz_clear(str);
	/********************** clear mpz _t **********************/

	free_matrix(&matrix);

	gettimeofday(&end_global, NULL);
	timersub(&end_global, &start_global, &elapsed);
	printf("****** TOTAL TIME: %.3f ms\n",
			elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000);
	show_mem_usage();
	return 0;
}
/******************************************************************************
 * appMain
 ******************************************************************************/
Int main(Int argc, Char *argv[])
{
    UInt32 framesize;
    Memory_AllocParams memParams = Memory_DEFAULTPARAMS;
    
    printf("******************************************************************************\n");
    printf("Sample application for testing kernels in C6Accel started.\n");
    printf("******************************************************************************\n");
    /* This call must be made before the Memory_xxx() functions as it is required for the tracing functions
     in all the codec engine APIs that are used*/
    CERuntime_init();

    /* Reset timeObj used for benchmarking*/
    Time_reset(&sTime);

    /* Create call generates a C6ACCEL handle */
    hC6 = C6accel_create(engineName, NULL,algName, NULL);

    /*Check for failure*/
    if ( hC6 == NULL)
       {printf("%s: C6accel_create() failed \n",progName);
        goto end;
    }

    /* Create buffers for use by algorithms */

    /* Want to use cached & contiguous memory to get best performance from cortex when it also uses the buffers.*/
    memParams.flags = Memory_CACHED;
    memParams.type = Memory_CONTIGHEAP;

    /* Size all buffers for 6 bytes, to cope with worst case 16 bit 422Planar*/
    framesize = (MAX_WIDTH * MAX_HEIGHT * sizeof(Int32)*3/2);

    /* Create 16bit buffers for use by algorithms*/
    pSrcBuf_16bpp = Memory_alloc(framesize, &memParams);
    if (pSrcBuf_16bpp == NULL) {
        goto end;
    }
    else {
       Memory_cacheWbInv(pSrcBuf_16bpp, framesize);
    }
    
      pOutBuf_16bpp = Memory_alloc(framesize, &memParams);
    if (pOutBuf_16bpp == NULL) {
        goto end;
    }
    else {
       Memory_cacheWbInv(pOutBuf_16bpp, framesize);
    }
    
    pRefBuf_16bpp = Memory_alloc(framesize, &memParams);
    if (pRefBuf_16bpp == NULL) {
        goto end;
    }
    else {
       Memory_cacheWbInv(pRefBuf_16bpp, framesize);
    }

      pWorkingBuf_16bpp = Memory_alloc(framesize, &memParams);
    if (pWorkingBuf_16bpp == NULL) {
        goto end;
    }
    else {
       Memory_cacheWbInv(pWorkingBuf_16bpp, framesize);
    }
    
    pWorkingBuf2_16bpp = Memory_alloc(framesize, &memParams);
    if (pWorkingBuf2_16bpp == NULL) {
        goto end;
    }
    else {
       Memory_cacheWbInv(pWorkingBuf2_16bpp, framesize);
    }
  
   #ifdef DEVICE_FLOAT
   pWorkingBuf3_16bpp = Memory_alloc(framesize, &memParams);
    if (pWorkingBuf3_16bpp == NULL) {
        goto end;
    }
    else {
       Memory_cacheWbInv(pWorkingBuf3_16bpp, framesize);
    }
    #endif
    
    /* open file for csv output*/
    OPEN_LOG_FILE("benchmarking.txt");
   
    /* Call test functions for kernels in C6accel*/
    LOG_STRING("IMGLib Functions\n");
    LOG_STRING("640x480 8bit/pixel b/w Test Image \n");
    
    printf("-----------------------------------------------------------------------------\n");
    printf("Test for Image processing functions in C6Accel: \n");
    printf("-----------------------------------------------------------------------------\n");

    c6accel_test_IMG_histogram(hC6,WIDTH,HEIGHT);
    c6accel_test_IMG_median(hC6,WIDTH,HEIGHT);
    c6accel_test_IMG_conv(hC6,WIDTH,HEIGHT);
    c6accel_test_IMG_corr(hC6,WIDTH,HEIGHT);
    c6accel_test_IMG_sobel(hC6,WIDTH,HEIGHT);
    c6accel_test_IMG_muls(hC6,WIDTH,HEIGHT);
    c6accel_test_IMG_adds(hC6,WIDTH,HEIGHT);
    c6accel_test_IMG_subs(hC6,WIDTH,HEIGHT);
    LOG_STRING("800x600 YUYV Test Image \n");
    c6accel_test_IMG_YC_demux(hC6,YUV_WIDTH,YUV_HEIGHT);
    c6accel_test_IMG_YUV422PLtoYUV422SP(hC6,2,16,16,16, 8);
    c6accel_test_IMG_YUV422SPtoYUV422ILE( hC6,2,16,16,32);
    c6accel_test_IMG_YUV422SPtoYUV420PL(hC6,2,16,16,16, 8);
    LOG_STRING("DSPLib Functions\n");
    LOG_STRING("64k sample FFT \n");
    
    printf("-----------------------------------------------------------------------------\n");
    printf("Test for Fixed point Signal processing functions in C6Accel \n");
    printf("-----------------------------------------------------------------------------\n");
    c6accel_test_DSP_FFT(hC6,N);
    c6accel_test_DSP_IFFT(hC6,N);
    c6accel_test_DSP_AUTOCOR(hC6,Nx,Nr);
    c6accel_test_DSP_DOTPROD(hC6,Nr);

    /* Implementation of this function limits
    the rows and columns of matrices to be multiples of 4 and r1 >8 */
    c6accel_test_DSP_MATMUL(hC6,ROW1,COL1,COL2,SHIFT);
    c6accel_test_DSP_FIR(hC6,NOUT,NCOEFF);
    c6accel_test_DSP_IIR(hC6,NXIN,NCOEFF);

  // No need to use these on floating point devices
  #ifndef DEVICE_FLOAT
    LOG_STRING_P1("MATH kernels tested with size of data block %d \n", NMAX+1);
    printf("-----------------------------------------------------------------------------\n");
    printf("Test for Fixed point Math functions in C6Accel\n");
    printf("-----------------------------------------------------------------------------\n");
    c6accel_test_MATH_RTSARITH(hC6,NMAX);
    c6accel_test_MATH_RTSCONV(hC6,NMAX);
    c6accel_test_MATH_IQCONV(hC6,NMAX,GLOBAL_Q, Q1);
    c6accel_test_MATH_IQMATH(hC6,NMAX,GLOBAL_Q);
    c6accel_test_MATH_IQARITH(hC6,NMAX,GLOBAL_Q);
    c6accel_test_MATH_IQTRIG(hC6,NMAX,GLOBAL_Q);
  #endif
  
  #ifdef DEVICE_FLOAT
  /*Test function calls for floating point kernels in C6accel*/ 
    printf("-----------------------------------------------------------------------------\n");
    printf("Test for Floating point Math Functions in C6Accel \n");
    printf("-----------------------------------------------------------------------------\n");
    c6accel_test_MATH_RTSARITH(hC6,NMAX);
    c6accel_test_MATH_RTSCONV(hC6,NMAX); 
    c6accel_test_MATH_RTSFLT(hC6,BUFSIZE) ;
    c6accel_test_MATH_RTSFLTDP(hC6,BUFSIZE) ;
    
    printf("-----------------------------------------------------------------------------\n");
    printf("Test for Floating point Signal processing Functions in C6accel\n");
    printf("-----------------------------------------------------------------------------\n");
    c6accel_test_DSPF_sp_fftSPxSP(hC6, Npt, rad, 0, Npt);
    c6accel_test_DSPF_VECMUL(hC6,  BUFSIZE );
    c6accel_test_DSPF_VECRECIP(hC6,  NumX );
    c6accel_test_DSPF_VECSUM_SQ(hC6,  Nelements );
    c6accel_test_DSPF_W_VEC(hC6, Mfactor,   BUFSIZE );
    c6accel_test_DSPF_DOTPRODFXNS(hC6,  Nelements);
    c6accel_test_DSPF_MATFXNS(hC6,  16,  16,  16 );
    c6accel_test_DSPF_MAT_MUL_CPLX(hC6,  4,  8,  8 ); 
    c6accel_test_DSPF_MAT_TRANS(hC6,  NumR,  NumR );
    c6accel_test_DSPF_AUTOCOR(hC6,NumX,NumR);
    c6accel_test_DSPF_CONVOL(hC6,NumH,NumR); 
    //c6accel_test_DSPF_IIR(hC6,  NumX);
    c6accel_test_DSPF_FIR(hC6, 128,  4);
    c6accel_test_DSPF_sp_ifftSPxSP(hC6, Npt, rad, 0, Npt);
    c6accel_test_DSPF_BIQUAD(hC6,  BUFSIZE);
  #endif   

    CLOSE_LOG_FILE();

end:
     // Tear down C6ACCEL
    if (hC6)
       C6accel_delete(hC6);

    if(pSrcBuf_16bpp)
        Memory_free(pSrcBuf_16bpp, framesize, &memParams);

    if(pOutBuf_16bpp)
        Memory_free(pOutBuf_16bpp, framesize, &memParams);

    if(pRefBuf_16bpp)
        Memory_free(pRefBuf_16bpp, framesize, &memParams);

    if(pWorkingBuf_16bpp)
        Memory_free(pWorkingBuf_16bpp, framesize, &memParams);

    if(pWorkingBuf2_16bpp)
        Memory_free(pWorkingBuf2_16bpp, framesize, &memParams);

    #ifdef DEVICE_FLOAT
    if(pWorkingBuf3_16bpp)
        Memory_free(pWorkingBuf3_16bpp, framesize, &memParams);
    #endif

    printf("******************************************************************************\n");
    printf("All tests done.\n");
    printf("******************************************************************************\n");
    printf("\n");

   return (0);
}