/* assuming slaves (workers)) are all homogenous, let them all do the calculations regarding primes sieving, calculating the smoothness base and the modular roots */ int main(int argc, char **argv) { MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &mpi_group_size); int len; MPI_Get_processor_name(processor_name, &len); gettimeofday(&start_global, NULL); print_lib_version(); mpz_init(N); mpz_t B; mpz_init(B); unsigned long int uBase; int64_t nb_primes; modular_root_t *modular_roots; uint64_t i, j; if (argc < 2) { PRINT(my_rank, "usage: %s Number_to_factorize\n", argv[0]); exit(2); } if (mpz_init_set_str(N, argv[1], 10) == -1) { PRINT(my_rank, "Cannot load N %s\n", argv[1]); exit(2); } mpz_t sqrtN, rem; mpz_init(sqrtN); mpz_init(rem); mpz_sqrtrem(sqrtN, rem, N); if (mpz_cmp_ui(rem, 0) != 0) /* if not perfect square, calculate the ceiling */ mpz_add_ui(sqrtN, sqrtN, 1); else /* N is a perfect square, factored! */ { PRINT(my_rank, "\n<<<[FACTOR]>>> %s\n", mpz_get_str(NULL, 10, sqrtN)); return 0; } if (mpz_probab_prime_p(N, 10) > 0) /* don't bother factoring */ { PRINT(my_rank, "N:%s is prime\n", mpz_get_str(NULL, 10, N)); exit(0); } OPEN_LOG_FILE("freq"); //-------------------------------------------------------- // calculate the smoothness base for the given N //-------------------------------------------------------- get_smoothness_base(B, N); /* if N is too small, the program will surely fail, please consider a pen and paper instead */ uBase = mpz_get_ui(B); PRINT(my_rank, "n: %s\tBase: %s\n", mpz_get_str(NULL, 10, N), mpz_get_str(NULL, 10, B)); //-------------------------------------------------------- // sieve primes that are less than the smoothness base using Eratosthenes sieve //-------------------------------------------------------- START_TIMER(); nb_primes = sieve_primes_up_to((int64_t) (uBase)); PRINT(my_rank, "\tPrimes found %" PRId64 " [Smoothness Base %lu]\n", nb_primes, uBase); STOP_TIMER_PRINT_TIME("\tEratosthenes Sieving done"); //-------------------------------------------------------- // fill the primes array with primes to which n is a quadratic residue //-------------------------------------------------------- START_TIMER(); primes = calloc(nb_primes, sizeof(int64_t)); nb_qr_primes = fill_primes_with_quadratic_residue(primes, N); /*for(i=0; i<nb_qr_primes; i++) PRINT(my_rank, "%" PRId64 "\n", primes[i]);*/ PRINT(my_rank, "\tN-Quadratic primes found %" PRId64 "\n", nb_qr_primes); STOP_TIMER_PRINT_TIME("\tQuadratic prime filtering done"); //-------------------------------------------------------- // calculate modular roots //-------------------------------------------------------- START_TIMER(); modular_roots = calloc(nb_qr_primes, sizeof(modular_root_t)); mpz_t tmp, r1, r2; mpz_init(tmp); mpz_init(r1); mpz_init(r2); for (i = 0; i < nb_qr_primes; i++) { mpz_set_ui(tmp, (unsigned long) primes[i]); mpz_sqrtm(r1, N, tmp); /* calculate the modular root */ mpz_neg(r2, r1); /* -q mod n */ mpz_mod(r2, r2, tmp); modular_roots[i].root1 = mpz_get_ui(r1); modular_roots[i].root2 = mpz_get_ui(r2); } mpz_clear(tmp); mpz_clear(r1); mpz_clear(r2); STOP_TIMER_PRINT_TIME("Modular roots calculation done"); //-------------------------------------------------------- // ***** initialize the matrix ***** //-------------------------------------------------------- if (my_rank == 0) /* only the master have the matrix */ { START_TIMER(); init_matrix(&matrix, nb_qr_primes + NB_VECTORS_OFFSET, nb_qr_primes); mpz_init2(tmp_matrix_row, nb_qr_primes); STOP_TIMER_PRINT_TIME("Matrix initialized"); } //-------------------------------------------------------- // [Sieving] - everyones sieves including the master //-------------------------------------------------------- START_TIMER(); mpz_t x, sieving_index, next_sieving_index, relative_start, global_step; unsigned long ui_index, SIEVING_STEP = 50000; /* we sieve for 50000 elements at each loop */ int LOCAL_SIEVING_ROUNDS = 10; /* number of iterations a worker sieves before communicating results to the master */ unsigned long sieving_round = 0; unsigned long nb_big_rounds = 0; uint64_t p_pow; smooth_number_t *x_squared; x_squared = calloc(SIEVING_STEP, sizeof(smooth_number_t)); if (my_rank == 0) smooth_numbers = calloc(nb_qr_primes + NB_VECTORS_OFFSET, sizeof(smooth_number_t)); else temp_slaves_smooth_numbers = calloc(500, sizeof(smooth_number_t)); /* TODO: this is not properly correct, using a linkedlist is better to keep track of temporary * smooth numbers at the slaves nodes however it's pretty rare to find 500 smooth numbers in * 50000 * 10 interval. */ mpz_init_set(x, sqrtN); mpz_init(global_step); mpz_init(relative_start); mpz_init(sieving_index); mpz_init(next_sieving_index); mpz_t p; mpz_init(p); mpz_t str; mpz_init_set(str, sieving_index); PRINT(my_rank, "\n[%s] Sieving ...\n", processor_name); //-------------------------------------------------------- // Init before sieving //-------------------------------------------------------- for (i = 0; i < SIEVING_STEP; i++) { mpz_init(x_squared[i].value_x); mpz_init(x_squared[i].value_x_squared); mpz_init2(x_squared[i].factors_vect, nb_qr_primes); mpz_add_ui(x, x, 1); } int nb_smooth_per_round = 0; char s[512]; //-------------------------------------------------------- // WHILE smooth numbers found less than the primes in the smooth base + NB_VECTORS_OFFSET for master // Or master asked for more smooth numbers from slaves //-------------------------------------------------------- while (1) { mpz_set_ui(global_step, nb_big_rounds); /* calculates the coordinate where the workers start sieving from */ mpz_mul_ui(global_step, global_step, (unsigned long) mpi_group_size); mpz_mul_ui(global_step, global_step, SIEVING_STEP); mpz_mul_ui(global_step, global_step, LOCAL_SIEVING_ROUNDS); mpz_add(global_step, global_step, sqrtN); mpz_set_ui(relative_start, SIEVING_STEP); mpz_mul_ui(relative_start, relative_start, LOCAL_SIEVING_ROUNDS); mpz_mul_ui(relative_start, relative_start, (unsigned long) my_rank); mpz_add(relative_start, relative_start, global_step); mpz_set(sieving_index, relative_start); mpz_set(next_sieving_index, relative_start); for (sieving_round = 0; sieving_round < LOCAL_SIEVING_ROUNDS; /* each slave sieves for LOCAL_SIEVING_ROUNDS rounds */ sieving_round++) { nb_smooth_per_round = 0; mpz_set(x, next_sieving_index); /* sieve numbers from sieving_index to sieving_index + sieving_step */ mpz_set(sieving_index, next_sieving_index); if (my_rank == 0) { printf("\r"); printf( "\t\tSieving at: %s30 <--> Smooth numbers found: %" PRId64 "/%" PRId64 "", mpz_get_str(NULL, 10, sieving_index), nb_global_smooth_numbers_found, nb_qr_primes); fflush(stdout); } for (i = 0; i < SIEVING_STEP; i++) { mpz_set(x_squared[i].value_x, x); mpz_pow_ui(x_squared[i].value_x_squared, x, 2); /* calculate value_x_squared <- x²-n */ mpz_sub(x_squared[i].value_x_squared, x_squared[i].value_x_squared, N); mpz_clear(x_squared[i].factors_vect); mpz_init2(x_squared[i].factors_vect, nb_qr_primes); /* reconstruct a new fresh 0ed vector of size nb_qr_primes bits */ mpz_add_ui(x, x, 1); } mpz_set(next_sieving_index, x); //-------------------------------------------------------- // eliminate factors in the x_squared array, those who are 'destructed' to 1 are smooth //-------------------------------------------------------- for (i = 0; i < nb_qr_primes; i++) { mpz_set_ui(p, (unsigned long) primes[i]); mpz_set(x, sieving_index); /* get the first multiple of p that is directly larger that sieving_index * Quadratic SIEVING: all elements from this number and in positions multiples of root1 and root2 * are also multiples of p */ get_sieving_start_index(x, x, p, modular_roots[i].root1); mpz_set(str, x); mpz_sub(x, x, sieving_index); /* x contains index of first number that is divisible by p */ for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) { p_pow = mpz_remove(x_squared[j].value_x_squared, x_squared[j].value_x_squared, p); /* eliminate all factors of p */ if (p_pow & 1) /* mark bit if odd power of p exists in this x_squared[j] */ { mpz_setbit(x_squared[j].factors_vect, i); } if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) { save_smooth_number(x_squared[j]); nb_smooth_per_round++; } /* sieve next element located p steps from here */ } /* same goes for root2 */ if (modular_roots[i].root2 == modular_roots[i].root1) continue; mpz_set(x, sieving_index); get_sieving_start_index(x, x, p, modular_roots[i].root2); mpz_set(str, x); mpz_sub(x, x, sieving_index); for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) { p_pow = mpz_remove(x_squared[j].value_x_squared, x_squared[j].value_x_squared, p); if (p_pow & 1) { mpz_setbit(x_squared[j].factors_vect, i); } if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) { save_smooth_number(x_squared[j]); nb_smooth_per_round++; } } } } if (my_rank == 0) /* master gathers smooth numbers from slaves */ { gather_smooth_numbers(); notify_slaves(); } else /* slaves send their smooth numbers to master */ { send_smooth_numbers_to_master(); nb_global_smooth_numbers_found = get_server_notification(); } if (nb_global_smooth_numbers_found >= nb_qr_primes + NB_VECTORS_OFFSET) break; nb_big_rounds++; } STOP_TIMER_PRINT_TIME("\nSieving DONE"); if (my_rank == 0) { uint64_t t = 0; //-------------------------------------------------------- //the matrix ready, start Gauss elimination. The Matrix is filled on the call of save_smooth_number() //-------------------------------------------------------- START_TIMER(); gauss_elimination(&matrix); STOP_TIMER_PRINT_TIME("\nGauss elimination done"); uint64_t row_index = nb_qr_primes + NB_VECTORS_OFFSET - 1; /* last row in the matrix */ int nb_linear_relations = 0; mpz_t linear_relation_z, solution_z; mpz_init(linear_relation_z); mpz_init(solution_z); get_matrix_row(linear_relation_z, &matrix, row_index--); /* get the last few rows in the Gauss eliminated matrix*/ while (mpz_cmp_ui(linear_relation_z, 0) == 0) { nb_linear_relations++; get_matrix_row(linear_relation_z, &matrix, row_index--); } PRINT(my_rank, "\tLinear dependent relations found : %d\n", nb_linear_relations); //-------------------------------------------------------- // Factor //-------------------------------------------------------- //We use the last linear relation to reconstruct our solution START_TIMER(); PRINT(my_rank, "%s", "\nFactorizing..\n"); mpz_t solution_X, solution_Y; mpz_init(solution_X); mpz_init(solution_Y); /* we start testing from the first linear relation encountered in the matrix */ for (j = nb_linear_relations; j > 0; j--) { PRINT(my_rank, "Trying %d..\n", nb_linear_relations - j + 1); mpz_set_ui(solution_X, 1); mpz_set_ui(solution_Y, 1); get_identity_row(solution_z, &matrix, nb_qr_primes + NB_VECTORS_OFFSET - j + 1); for (i = 0; i < nb_qr_primes; i++) { if (mpz_tstbit(solution_z, i)) { mpz_mul(solution_X, solution_X, smooth_numbers[i].value_x); mpz_mod(solution_X, solution_X, N); /* reduce x to modulo N */ mpz_mul(solution_Y, solution_Y, smooth_numbers[i].value_x_squared); /*TODO: handling huge stuff here, there is no modulo N like in the solution_X case! * eliminate squares as long as you go*/ } } mpz_sqrt(solution_Y, solution_Y); mpz_mod(solution_Y, solution_Y, N); /* y = sqrt(MUL(xi²-n)) mod N */ mpz_sub(solution_X, solution_X, solution_Y); mpz_gcd(solution_X, solution_X, N); if (mpz_cmp(solution_X, N) != 0 && mpz_cmp_ui(solution_X, 1) != 0) /* factor can be 1 or N, try another relation */ break; } mpz_cdiv_q(solution_Y, N, solution_X); PRINT(my_rank, "\n>>>>>>>>>>> FACTORED %s =\n", mpz_get_str(NULL, 10, N)); PRINT( my_rank, "\tFactor 1: %s \n\tFactor 2: %s", mpz_get_str(NULL, 10, solution_X), mpz_get_str(NULL, 10, solution_Y)); sprintf(s, "\n>>>>>>>>>>> FACTORED %s =\n", mpz_get_str(NULL, 10, N)); APPEND_TO_LOG_FILE(s); sprintf(s, "\tFactor 1: %s \n\tFactor 2: %s", mpz_get_str(NULL, 10, solution_X), mpz_get_str(NULL, 10, solution_Y)); APPEND_TO_LOG_FILE(s); gettimeofday(&end_global, NULL); timersub(&end_global, &start_global, &elapsed); sprintf(s, "****** TOTAL TIME: %.3f ms\n", elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000); APPEND_TO_LOG_FILE(s); STOP_TIMER_PRINT_TIME("\nFactorizing done"); } PRINT(my_rank, "%s", "\nCleaning memory..\n"); /********************** clear the x_squared array **********************/ for (i = 0; i < SIEVING_STEP; i++) { mpz_clear(x_squared[i].value_x); mpz_clear(x_squared[i].value_x_squared); //free(x_squared[i].factors_exp); mpz_clear(x_squared[i].factors_vect); } free(x_squared); /********************** clear the x_squared array **********************/ free(modular_roots); /********************** clear the smooth_numbers array **********************/ if (my_rank == 0) { for (i = 0; i < nb_qr_primes + NB_VECTORS_OFFSET; i++) { mpz_clear(smooth_numbers[i].value_x); mpz_clear(smooth_numbers[i].value_x_squared); mpz_clear(smooth_numbers[i].factors_vect); //free(smooth_numbers[i].factors_exp); } free(smooth_numbers); } else { for (i = 0; i < 500; i++) { mpz_clear(temp_slaves_smooth_numbers[i].value_x); mpz_clear(temp_slaves_smooth_numbers[i].value_x_squared); mpz_clear(temp_slaves_smooth_numbers[i].factors_vect); } free(temp_slaves_smooth_numbers); } /********************** clear the smooth_numbers array **********************/ free(primes); /********************** clear mpz _t **********************/mpz_clear(B); mpz_clear(N); sqrtN, rem; mpz_clear(x); mpz_clear(sieving_index); mpz_clear(next_sieving_index); mpz_clear(p); mpz_clear(str); /********************** clear mpz _t **********************/ free_matrix(&matrix); gettimeofday(&end_global, NULL); timersub(&end_global, &start_global, &elapsed); PRINT(my_rank, "****** TOTAL TIME: %.3f ms\n", elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000); show_mem_usage(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { gettimeofday(&start_global, NULL); print_lib_version(); mpz_init(N); mpz_t B; mpz_init(B); unsigned long int uBase; int64_t nb_primes; modular_root_t *modular_roots; uint64_t i, j; if (mpz_init_set_str(N, argv[1], 10) == -1) { printf("Cannot load N %s\n", argv[1]); exit(2); } mpz_t sqrtN, rem; mpz_init(sqrtN); mpz_init(rem); mpz_sqrtrem(sqrtN, rem, N); if (mpz_cmp_ui(rem, 0) != 0) /* if not perfect square, calculate the ceiling */ mpz_add_ui(sqrtN, sqrtN, 1); else /* N is a perfect square, factored! */ { printf("\n<<<[FACTOR]>>> %s\n", mpz_get_str(NULL, 10, sqrtN)); return 0; } if (mpz_probab_prime_p(N, 10) > 0) /* don't bother factoring */ { printf("N:%s is prime\n", mpz_get_str(NULL, 10, N)); exit(0); } OPEN_LOG_FILE("freq"); //-------------------------------------------------------- // calculate the smoothness base for the given N //-------------------------------------------------------- get_smoothness_base(B, N); /* if N is too small, the program will surely fail, please consider a pen and paper instead */ uBase = mpz_get_ui(B); printf("n: %s\tBase: %s\n", mpz_get_str(NULL, 10, N), mpz_get_str(NULL, 10, B)); //-------------------------------------------------------- // sieve primes that are less than the smoothness base using Eratosthenes sieve //-------------------------------------------------------- START_TIMER(); nb_primes = sieve_primes_up_to((int64_t) (uBase)); printf("\nPrimes found %" PRId64 " [Smoothness Base %lu]\n", nb_primes, uBase); STOP_TIMER_PRINT_TIME("\tEratosthenes Sieving done"); //-------------------------------------------------------- // fill the primes array with primes to which n is a quadratic residue //-------------------------------------------------------- START_TIMER(); primes = calloc(nb_primes, sizeof(int64_t)); nb_qr_primes = fill_primes_with_quadratic_residue(primes, N); /*for(i=0; i<nb_qr_primes; i++) printf("%" PRId64 "\n", primes[i]);*/ printf("\nN-Quadratic primes found %" PRId64 "\n", nb_qr_primes); STOP_TIMER_PRINT_TIME("\tQuadratic prime filtering done"); //-------------------------------------------------------- // calculate modular roots //-------------------------------------------------------- START_TIMER(); modular_roots = calloc(nb_qr_primes, sizeof(modular_root_t)); mpz_t tmp, r1, r2; mpz_init(tmp); mpz_init(r1); mpz_init(r2); for (i = 0; i < nb_qr_primes; i++) { mpz_set_ui(tmp, (unsigned long) primes[i]); mpz_sqrtm(r1, N, tmp); /* calculate the modular root */ mpz_neg(r2, r1); /* -q mod n */ mpz_mod(r2, r2, tmp); modular_roots[i].root1 = mpz_get_ui(r1); modular_roots[i].root2 = mpz_get_ui(r2); } mpz_clear(tmp); mpz_clear(r1); mpz_clear(r2); STOP_TIMER_PRINT_TIME("\nModular roots calculation done"); /*for(i=0; i<nb_qr_primes; i++) { printf("[%10" PRId64 "-> roots: %10u - %10u]\n", primes[i], modular_roots[i].root1, modular_roots[i].root2); }*/ //-------------------------------------------------------- // ***** initialize the matrix ***** //-------------------------------------------------------- START_TIMER(); init_matrix(&matrix, nb_qr_primes + NB_VECTORS_OFFSET, nb_qr_primes); mpz_init2(tmp_matrix_row, nb_qr_primes); STOP_TIMER_PRINT_TIME("\nMatrix initialized"); //-------------------------------------------------------- // [Sieving] //-------------------------------------------------------- START_TIMER(); mpz_t x, sieving_index, next_sieving_index; unsigned long ui_index, SIEVING_STEP = 50000; /* we sieve for 50000 elements at each loop */ uint64_t p_pow; smooth_number_t *x_squared; x_squared = calloc(SIEVING_STEP, sizeof(smooth_number_t)); smooth_numbers = calloc(nb_qr_primes + NB_VECTORS_OFFSET, sizeof(smooth_number_t)); mpz_init_set(x, sqrtN); mpz_init_set(sieving_index, x); mpz_init_set(next_sieving_index, x); mpz_t p; mpz_init(p); mpz_t str; mpz_init_set(str, sieving_index); printf("\nSieving ...\n"); //-------------------------------------------------------- // Init before sieving //-------------------------------------------------------- for (i = 0; i < SIEVING_STEP; i++) { mpz_init(x_squared[i].value_x); mpz_init(x_squared[i].value_x_squared); /* the factors_exp array is used to keep track of exponents */ //x_squared[i].factors_exp = calloc(nb_qr_primes, sizeof(uint64_t)); /* we use directly the exponents vector modulo 2 to preserve space */mpz_init2( x_squared[i].factors_vect, nb_qr_primes); mpz_add_ui(x, x, 1); } int nb_smooth_per_round = 0; char s[512]; //-------------------------------------------------------- // WHILE smooth numbers found less than the primes in the smooth base + NB_VECTORS_OFFSET //-------------------------------------------------------- while (nb_smooth_numbers_found < nb_qr_primes + NB_VECTORS_OFFSET) { nb_smooth_per_round = 0; mpz_set(x, next_sieving_index); /* sieve numbers from sieving_index to sieving_index + sieving_step */ mpz_set(sieving_index, next_sieving_index); printf("\r"); printf( "\t\tSieving at: %s30 <--> Smooth numbers found: %" PRId64 "/%" PRId64 "", mpz_get_str(NULL, 10, sieving_index), nb_smooth_numbers_found, nb_qr_primes); fflush(stdout); for (i = 0; i < SIEVING_STEP; i++) { mpz_set(x_squared[i].value_x, x); mpz_pow_ui(x_squared[i].value_x_squared, x, 2); /* calculate value_x_squared <- x²-n */ mpz_sub(x_squared[i].value_x_squared, x_squared[i].value_x_squared, N); mpz_clear(x_squared[i].factors_vect); mpz_init2(x_squared[i].factors_vect, nb_qr_primes); /* reconstruct a new fresh 0ed vector of size nb_qr_primes bits */ mpz_add_ui(x, x, 1); } mpz_set(next_sieving_index, x); //-------------------------------------------------------- // eliminate factors in the x_squared array, those who are 'destructed' to 1 are smooth //-------------------------------------------------------- for (i = 0; i < nb_qr_primes; i++) { mpz_set_ui(p, (unsigned long) primes[i]); mpz_set(x, sieving_index); /* get the first multiple of p that is directly larger that sieving_index * Quadratic SIEVING: all elements from this number and in positions multiples of root1 and root2 * are also multiples of p */ get_sieving_start_index(x, x, p, modular_roots[i].root1); mpz_set(str, x); mpz_sub(x, x, sieving_index); /* x contains index of first number that is divisible by p */ for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) { p_pow = mpz_remove(x_squared[j].value_x_squared, x_squared[j].value_x_squared, p); /* eliminate all factors of p */ if (p_pow & 1) /* mark bit if odd power of p exists in this x_squared[j] */ { mpz_setbit(x_squared[j].factors_vect, i); } if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) { save_smooth_number(x_squared[j]); nb_smooth_per_round++; } /* sieve next element located p steps from here */ } /* same goes for root2 */ if (modular_roots[i].root2 == modular_roots[i].root1) continue; mpz_set(x, sieving_index); get_sieving_start_index(x, x, p, modular_roots[i].root2); mpz_set(str, x); mpz_sub(x, x, sieving_index); for (j = mpz_get_ui(x); j < SIEVING_STEP; j += primes[i]) { p_pow = mpz_remove(x_squared[j].value_x_squared, x_squared[j].value_x_squared, p); if (p_pow & 1) { mpz_setbit(x_squared[j].factors_vect, i); } if (mpz_cmp_ui(x_squared[j].value_x_squared, 1) == 0) { save_smooth_number(x_squared[j]); nb_smooth_per_round++; } } } //printf("\tSmooth numbers found %" PRId64 "\n", nb_smooth_numbers_found); /*sprintf(s, "[start: %s - end: %s - step: %" PRId64 "] nb_smooth_per_round: %d", mpz_get_str(NULL, 10, sieving_index), mpz_get_str(NULL, 10, next_sieving_index), SIEVING_STEP, nb_smooth_per_round); APPEND_TO_LOG_FILE(s);*/ } STOP_TIMER_PRINT_TIME("\nSieving DONE"); uint64_t t = 0; //-------------------------------------------------------- //the matrix ready, start Gauss elimination. The Matrix is filled on the call of save_smooth_number() //-------------------------------------------------------- START_TIMER(); gauss_elimination(&matrix); STOP_TIMER_PRINT_TIME("\nGauss elimination done"); //print_matrix_matrix(&matrix); //print_matrix_identity(&matrix); uint64_t row_index = nb_qr_primes + NB_VECTORS_OFFSET - 1; /* last row in the matrix */ int nb_linear_relations = 0; mpz_t linear_relation_z, solution_z; mpz_init(linear_relation_z); mpz_init(solution_z); get_matrix_row(linear_relation_z, &matrix, row_index--); /* get the last few rows in the Gauss eliminated matrix*/ while (mpz_cmp_ui(linear_relation_z, 0) == 0) { nb_linear_relations++; get_matrix_row(linear_relation_z, &matrix, row_index--); } printf("\tLinear dependent relations found : %d\n", nb_linear_relations); //-------------------------------------------------------- // Factor //-------------------------------------------------------- //We use the last linear relation to reconstruct our solution START_TIMER(); printf("\nFactorizing..\n"); mpz_t solution_X, solution_Y; mpz_init(solution_X); mpz_init(solution_Y); /* we start testing from the first linear relation encountered in the matrix */ for (j = nb_linear_relations; j > 0; j--) { printf("Trying %d..\n", nb_linear_relations - j + 1); mpz_set_ui(solution_X, 1); mpz_set_ui(solution_Y, 1); get_identity_row(solution_z, &matrix, nb_qr_primes + NB_VECTORS_OFFSET - j + 1); for (i = 0; i < nb_qr_primes; i++) { if (mpz_tstbit(solution_z, i)) { mpz_mul(solution_X, solution_X, smooth_numbers[i].value_x); mpz_mod(solution_X, solution_X, N); /* reduce x to modulo N */ mpz_mul(solution_Y, solution_Y, smooth_numbers[i].value_x_squared); /*TODO: handling huge stuff here, there is no modulo N like in the solution_X case! * eliminate squares as long as you go*/ } } mpz_sqrt(solution_Y, solution_Y); mpz_mod(solution_Y, solution_Y, N); /* y = sqrt(MUL(xi²-n)) mod N */ mpz_sub(solution_X, solution_X, solution_Y); mpz_gcd(solution_X, solution_X, N); if (mpz_cmp(solution_X, N) != 0 && mpz_cmp_ui(solution_X, 1) != 0) /* factor can be 1 or N, try another relation */ break; } mpz_cdiv_q(solution_Y, N, solution_X); printf("\n>>>>>>>>>>> FACTORED %s =\n", mpz_get_str(NULL, 10, N)); printf("\tFactor 1: %s \n\tFactor 2: %s", mpz_get_str(NULL, 10, solution_X), mpz_get_str(NULL, 10, solution_Y)); /*sprintf(s, "\n>>>>>>>>>>> FACTORED %s =\n", mpz_get_str(NULL, 10, N)); APPEND_TO_LOG_FILE(s); sprintf(s, "\tFactor 1: %s \n\tFactor 2: %s", mpz_get_str(NULL, 10, solution_X), mpz_get_str(NULL, 10, solution_Y)); APPEND_TO_LOG_FILE(s); gettimeofday(&end_global, NULL); timersub(&end_global, &start_global, &elapsed); sprintf(s, "****** TOTAL TIME: %.3f ms\n", elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000); APPEND_TO_LOG_FILE(s);*/ STOP_TIMER_PRINT_TIME("\nFactorizing done"); printf("Cleaning memory..\n"); /********************** clear the x_squared array **********************/ for (i = 0; i < SIEVING_STEP; i++) { mpz_clear(x_squared[i].value_x); mpz_clear(x_squared[i].value_x_squared); //free(x_squared[i].factors_exp); mpz_clear(x_squared[i].factors_vect); } free(x_squared); /********************** clear the x_squared array **********************/ free(modular_roots); /********************** clear the smooth_numbers array **********************/ for (i = 0; i < nb_qr_primes + NB_VECTORS_OFFSET; i++) { mpz_clear(smooth_numbers[i].value_x); mpz_clear(smooth_numbers[i].value_x_squared); //free(smooth_numbers[i].factors_exp); } free(smooth_numbers); /********************** clear the smooth_numbers array **********************/ free(primes); /********************** clear mpz _t **********************/mpz_clear(B); mpz_clear(N); sqrtN, rem; mpz_clear(x); mpz_clear(sieving_index); mpz_clear(next_sieving_index); mpz_clear(p); mpz_clear(str); /********************** clear mpz _t **********************/ free_matrix(&matrix); gettimeofday(&end_global, NULL); timersub(&end_global, &start_global, &elapsed); printf("****** TOTAL TIME: %.3f ms\n", elapsed.tv_sec * 1000 + elapsed.tv_usec / (double) 1000); show_mem_usage(); return 0; }
/****************************************************************************** * appMain ******************************************************************************/ Int main(Int argc, Char *argv[]) { UInt32 framesize; Memory_AllocParams memParams = Memory_DEFAULTPARAMS; printf("******************************************************************************\n"); printf("Sample application for testing kernels in C6Accel started.\n"); printf("******************************************************************************\n"); /* This call must be made before the Memory_xxx() functions as it is required for the tracing functions in all the codec engine APIs that are used*/ CERuntime_init(); /* Reset timeObj used for benchmarking*/ Time_reset(&sTime); /* Create call generates a C6ACCEL handle */ hC6 = C6accel_create(engineName, NULL,algName, NULL); /*Check for failure*/ if ( hC6 == NULL) {printf("%s: C6accel_create() failed \n",progName); goto end; } /* Create buffers for use by algorithms */ /* Want to use cached & contiguous memory to get best performance from cortex when it also uses the buffers.*/ memParams.flags = Memory_CACHED; memParams.type = Memory_CONTIGHEAP; /* Size all buffers for 6 bytes, to cope with worst case 16 bit 422Planar*/ framesize = (MAX_WIDTH * MAX_HEIGHT * sizeof(Int32)*3/2); /* Create 16bit buffers for use by algorithms*/ pSrcBuf_16bpp = Memory_alloc(framesize, &memParams); if (pSrcBuf_16bpp == NULL) { goto end; } else { Memory_cacheWbInv(pSrcBuf_16bpp, framesize); } pOutBuf_16bpp = Memory_alloc(framesize, &memParams); if (pOutBuf_16bpp == NULL) { goto end; } else { Memory_cacheWbInv(pOutBuf_16bpp, framesize); } pRefBuf_16bpp = Memory_alloc(framesize, &memParams); if (pRefBuf_16bpp == NULL) { goto end; } else { Memory_cacheWbInv(pRefBuf_16bpp, framesize); } pWorkingBuf_16bpp = Memory_alloc(framesize, &memParams); if (pWorkingBuf_16bpp == NULL) { goto end; } else { Memory_cacheWbInv(pWorkingBuf_16bpp, framesize); } pWorkingBuf2_16bpp = Memory_alloc(framesize, &memParams); if (pWorkingBuf2_16bpp == NULL) { goto end; } else { Memory_cacheWbInv(pWorkingBuf2_16bpp, framesize); } #ifdef DEVICE_FLOAT pWorkingBuf3_16bpp = Memory_alloc(framesize, &memParams); if (pWorkingBuf3_16bpp == NULL) { goto end; } else { Memory_cacheWbInv(pWorkingBuf3_16bpp, framesize); } #endif /* open file for csv output*/ OPEN_LOG_FILE("benchmarking.txt"); /* Call test functions for kernels in C6accel*/ LOG_STRING("IMGLib Functions\n"); LOG_STRING("640x480 8bit/pixel b/w Test Image \n"); printf("-----------------------------------------------------------------------------\n"); printf("Test for Image processing functions in C6Accel: \n"); printf("-----------------------------------------------------------------------------\n"); c6accel_test_IMG_histogram(hC6,WIDTH,HEIGHT); c6accel_test_IMG_median(hC6,WIDTH,HEIGHT); c6accel_test_IMG_conv(hC6,WIDTH,HEIGHT); c6accel_test_IMG_corr(hC6,WIDTH,HEIGHT); c6accel_test_IMG_sobel(hC6,WIDTH,HEIGHT); c6accel_test_IMG_muls(hC6,WIDTH,HEIGHT); c6accel_test_IMG_adds(hC6,WIDTH,HEIGHT); c6accel_test_IMG_subs(hC6,WIDTH,HEIGHT); LOG_STRING("800x600 YUYV Test Image \n"); c6accel_test_IMG_YC_demux(hC6,YUV_WIDTH,YUV_HEIGHT); c6accel_test_IMG_YUV422PLtoYUV422SP(hC6,2,16,16,16, 8); c6accel_test_IMG_YUV422SPtoYUV422ILE( hC6,2,16,16,32); c6accel_test_IMG_YUV422SPtoYUV420PL(hC6,2,16,16,16, 8); LOG_STRING("DSPLib Functions\n"); LOG_STRING("64k sample FFT \n"); printf("-----------------------------------------------------------------------------\n"); printf("Test for Fixed point Signal processing functions in C6Accel \n"); printf("-----------------------------------------------------------------------------\n"); c6accel_test_DSP_FFT(hC6,N); c6accel_test_DSP_IFFT(hC6,N); c6accel_test_DSP_AUTOCOR(hC6,Nx,Nr); c6accel_test_DSP_DOTPROD(hC6,Nr); /* Implementation of this function limits the rows and columns of matrices to be multiples of 4 and r1 >8 */ c6accel_test_DSP_MATMUL(hC6,ROW1,COL1,COL2,SHIFT); c6accel_test_DSP_FIR(hC6,NOUT,NCOEFF); c6accel_test_DSP_IIR(hC6,NXIN,NCOEFF); // No need to use these on floating point devices #ifndef DEVICE_FLOAT LOG_STRING_P1("MATH kernels tested with size of data block %d \n", NMAX+1); printf("-----------------------------------------------------------------------------\n"); printf("Test for Fixed point Math functions in C6Accel\n"); printf("-----------------------------------------------------------------------------\n"); c6accel_test_MATH_RTSARITH(hC6,NMAX); c6accel_test_MATH_RTSCONV(hC6,NMAX); c6accel_test_MATH_IQCONV(hC6,NMAX,GLOBAL_Q, Q1); c6accel_test_MATH_IQMATH(hC6,NMAX,GLOBAL_Q); c6accel_test_MATH_IQARITH(hC6,NMAX,GLOBAL_Q); c6accel_test_MATH_IQTRIG(hC6,NMAX,GLOBAL_Q); #endif #ifdef DEVICE_FLOAT /*Test function calls for floating point kernels in C6accel*/ printf("-----------------------------------------------------------------------------\n"); printf("Test for Floating point Math Functions in C6Accel \n"); printf("-----------------------------------------------------------------------------\n"); c6accel_test_MATH_RTSARITH(hC6,NMAX); c6accel_test_MATH_RTSCONV(hC6,NMAX); c6accel_test_MATH_RTSFLT(hC6,BUFSIZE) ; c6accel_test_MATH_RTSFLTDP(hC6,BUFSIZE) ; printf("-----------------------------------------------------------------------------\n"); printf("Test for Floating point Signal processing Functions in C6accel\n"); printf("-----------------------------------------------------------------------------\n"); c6accel_test_DSPF_sp_fftSPxSP(hC6, Npt, rad, 0, Npt); c6accel_test_DSPF_VECMUL(hC6, BUFSIZE ); c6accel_test_DSPF_VECRECIP(hC6, NumX ); c6accel_test_DSPF_VECSUM_SQ(hC6, Nelements ); c6accel_test_DSPF_W_VEC(hC6, Mfactor, BUFSIZE ); c6accel_test_DSPF_DOTPRODFXNS(hC6, Nelements); c6accel_test_DSPF_MATFXNS(hC6, 16, 16, 16 ); c6accel_test_DSPF_MAT_MUL_CPLX(hC6, 4, 8, 8 ); c6accel_test_DSPF_MAT_TRANS(hC6, NumR, NumR ); c6accel_test_DSPF_AUTOCOR(hC6,NumX,NumR); c6accel_test_DSPF_CONVOL(hC6,NumH,NumR); //c6accel_test_DSPF_IIR(hC6, NumX); c6accel_test_DSPF_FIR(hC6, 128, 4); c6accel_test_DSPF_sp_ifftSPxSP(hC6, Npt, rad, 0, Npt); c6accel_test_DSPF_BIQUAD(hC6, BUFSIZE); #endif CLOSE_LOG_FILE(); end: // Tear down C6ACCEL if (hC6) C6accel_delete(hC6); if(pSrcBuf_16bpp) Memory_free(pSrcBuf_16bpp, framesize, &memParams); if(pOutBuf_16bpp) Memory_free(pOutBuf_16bpp, framesize, &memParams); if(pRefBuf_16bpp) Memory_free(pRefBuf_16bpp, framesize, &memParams); if(pWorkingBuf_16bpp) Memory_free(pWorkingBuf_16bpp, framesize, &memParams); if(pWorkingBuf2_16bpp) Memory_free(pWorkingBuf2_16bpp, framesize, &memParams); #ifdef DEVICE_FLOAT if(pWorkingBuf3_16bpp) Memory_free(pWorkingBuf3_16bpp, framesize, &memParams); #endif printf("******************************************************************************\n"); printf("All tests done.\n"); printf("******************************************************************************\n"); printf("\n"); return (0); }