int main(int argc, char* argv[]) { if (argc != 2) fprintf(stderr, "Usage: Requires number of threads."); thread_count = atoi(argv[1]); Lab3LoadInput(&A, &size); x = CreateVec(size); double start, end; int i = 0; GET_TIME(start); # pragma omp parallel num_threads(thread_count) \ shared(A) { gaussian_elimination(); jordan_elimination(); # pragma omp for for (i = 0; i < size; ++i) { x[i] = A[i][size] / A[i][i]; } } GET_TIME(end); Lab3SaveOutput(x, size, end-start); printf("time is: %e\n", end-start); DestroyVec(x); DestroyMat(A, size); return EXIT_SUCCESS; }
void fux_anysurface :: findHomography(ofPoint src[4], ofPoint dst[4], float homography[16]) { // create the equation system to be solved // // from: Multiple View Geometry in Computer Vision 2ed // Hartley R. and Zisserman A. // // x' = xH // where H is the homography: a 3 by 3 matrix // that transformed to inhomogeneous coordinates for each point // gives the following equations for each point: // // x' * (h31*x + h32*y + h33) = h11*x + h12*y + h13 // y' * (h31*x + h32*y + h33) = h21*x + h22*y + h23 // // as the homography is scale independent we can let h33 be 1 (indeed any of the terms) // so for 4 points we have 8 equations for 8 terms to solve: h11 - h32 // after ordering the terms it gives the following matrix // that can be solved with gaussian elimination: float P[8][9]= { {-src[0].x, -src[0].y, -1, 0, 0, 0, src[0].x*dst[0].x, src[0].y*dst[0].x, -dst[0].x }, // h11 { 0, 0, 0, -src[0].x, -src[0].y, -1, src[0].x*dst[0].y, src[0].y*dst[0].y, -dst[0].y }, // h12 {-src[1].x, -src[1].y, -1, 0, 0, 0, src[1].x*dst[1].x, src[1].y*dst[1].x, -dst[1].x }, // h13 { 0, 0, 0, -src[1].x, -src[1].y, -1, src[1].x*dst[1].y, src[1].y*dst[1].y, -dst[1].y }, // h21 {-src[2].x, -src[2].y, -1, 0, 0, 0, src[2].x*dst[2].x, src[2].y*dst[2].x, -dst[2].x }, // h22 { 0, 0, 0, -src[2].x, -src[2].y, -1, src[2].x*dst[2].y, src[2].y*dst[2].y, -dst[2].y }, // h23 {-src[3].x, -src[3].y, -1, 0, 0, 0, src[3].x*dst[3].x, src[3].y*dst[3].x, -dst[3].x }, // h31 { 0, 0, 0, -src[3].x, -src[3].y, -1, src[3].x*dst[3].y, src[3].y*dst[3].y, -dst[3].y }, // h32 }; gaussian_elimination(&P[0][0],9); // gaussian elimination gives the results of the equation system // in the last column of the original matrix. // opengl needs the transposed 4x4 matrix: float aux_H[]= { P[0][8],P[3][8],0,P[6][8], // h11 h21 0 h31 P[1][8],P[4][8],0,P[7][8], // h12 h22 0 h32 0 , 0,0,0, // 0 0 0 0 P[2][8],P[5][8],0,1 }; // h13 h23 0 h33 for(int i=0; i<16; i++) homography[i] = aux_H[i]; }
// {{{ linear_algebra() void linear_algebra( GNFS::Polynomial &polynomial, GNFS::Target &target, FactorBase &fb, Matrix &matrix, const std::vector<int> &av, const std::vector<int> &bv) { NTL::ZZ aZ; NTL::ZZ bZ; NTL::ZZ pZ; NTL::ZZ valZ; NTL::ZZ numZ; int i; int j; int k; int u = polynomial.d*target.t; // Initialize sM for(j = 0; j <= matrix.sM.NumCols()-1; j++) { // Initialize row for(k = 0; k < matrix.sM.NumCols()-1; k++) matrix.sM[k][j] = 0; // Set the first column aZ = av[j]; bZ = bv[j]; valZ = aZ + bZ * polynomial.m; if(valZ < 0) { valZ *= -1; matrix.sM[0][j] = 1; } // Set a RFB row i = 0; while(i < target.t && valZ != 1) { pZ = fb.RFB[i]; if(valZ % pZ == 0) { if(matrix.sM[1+i][j]==0) matrix.sM[1+i][j]=1; else matrix.sM[1+i][j]=0; valZ = valZ / pZ; } else i++; } // Set a AFB row valZ = algebraic_norm(polynomial, av[j], bv[j]); if(valZ < 0) valZ *= -1; i = 0; while(i<u && valZ!=1) { pZ = fb.AFB[i]; if(valZ % pZ == 0) { numZ = fb.AFBr[i]; //while((aZ + bZ * numZ) % pZ != 0 && i<target.t) // TODO while((aZ + bZ * numZ) % pZ != 0) // TODO { pZ = fb.AFB[++i]; numZ = fb.AFBr[i]; } if(matrix.sM[1+target.t+i][j]==0) matrix.sM[1+target.t+i][j]=1; else matrix.sM[1+target.t+i][j]=0; valZ = valZ / pZ; } else i++; } // Set a QCB row for(i=0; i<target.digits; i++) { numZ = fb.QCB[i]; valZ = fb.QCBs[i]; valZ = aZ + bZ * valZ; if(Legendre(valZ, numZ) != 1) { matrix.sM[1+target.t+u+i][j]=1; } } } std::cout << "\tSize: " << matrix.sM.NumRows() << "x" << matrix.sM.NumCols() << std::endl; gaussian_elimination(matrix.sM); matrix.sfreeCols = get_freecols(matrix.sM); }
unsigned long quadratic_sieve(mpz_t N, unsigned int n, unsigned interval, unsigned int max_fact, unsigned int block_size, mpz_t m, unsigned int print_fact) { double t1, t2; int rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, & comm_size); /* Controllo con test di pseudoprimalità di rabin */ if(mpz_probab_prime_p(N, 25)) { return NUM_PRIMO; } /* Radice intera di N */ mpz_t s; mpz_init(s); mpz_sqrt(s, N); t1 = MPI_Wtime(); /* Individuazione primi in [2, n] */ unsigned int * primes = malloc(sizeof(unsigned int) * n); eratosthenes_sieve(primes, n); /* Compattiamo i numeri primi in primes */ unsigned j = 0; for(int i = 2; i < n; ++i) if(primes[i] == 1) { primes[j++] = i; } unsigned int n_all_primes = j; /* Fattorizzazione eseguita da tutti, gli slave ritornano IM_A_SLAVE mentre il main il fattore */ unsigned int simple_factor = trivial_fact(N, primes, n_all_primes); if(simple_factor != 0) { mpz_set_ui(m, simple_factor); return rank == 0 ? OK : IM_A_SLAVE; } /* Calcolo base di fattori e soluzioni dell'eq x^2 = N mod p */ pair * solutions = malloc(sizeof(pair) * n_all_primes); unsigned int * factor_base = primes; unsigned n_primes = base_fattori(N, s, factor_base, solutions, primes, n_all_primes); t2 = MPI_Wtime(); double t_base = t2 - t1; if(rank == 0) printf("#Dimensione base di fattori: %d\n", n_primes); /* Vettore degli esponenti in Z */ unsigned int ** exponents; /* Vettore degli (Ai + s) */ mpz_t * As; /* Parte di crivello: troviamo le k+n fattorizzazioni complete */ unsigned int n_fatt; t1 = MPI_Wtime(); if(rank == 0){ /* Inizializzazioni vettori */ init_matrix(& exponents, n_primes + max_fact, n_primes); init_vector_mpz(& As, n_primes + max_fact); /* Procedura master che riceve le fatt. complete */ n_fatt = master(n_primes, max_fact, exponents, As, comm_size, print_fact); } else { mpz_t begin; mpz_init(begin); mpz_t counter; mpz_init(counter); mpz_set_ui(begin, interval * (rank - 1)); //gmp_printf("%d) begin=%Zd interval=%d\n", rank, begin, interval); int stop_flag = 0; do { //gmp_printf("\t%d) [%Zd, %Zd+%d] - (flag=%d)\n", rank, begin, begin, interval, flag); stop_flag = smart_sieve(N, factor_base, n_primes, solutions, begin, interval, block_size, max_fact); mpz_add_ui(begin, begin, interval * (comm_size-1)); } while(!stop_flag); //printf("#%d) Termina\n", rank); return IM_A_SLAVE; } t2 = MPI_Wtime(); double t_sieve = t2 - t1; printf("#Numero fattorizzazioni complete trovate: %d\n", n_fatt); t1 = MPI_Wtime(); /* Matrice di esponenti in Z_2 organizzata a blocchi di bit */ word ** M; /* Numero di blocchi di bit da utilizzare */ unsigned long n_blocchi = n_primes / N_BITS + 1; /* Inizializzazione egli esponenti mod 2 */ init_matrix_l(& M, n_fatt, n_blocchi); for(int i = 0; i < n_fatt; ++i) for(int j = 0; j < n_primes; ++j) { unsigned int a = get_matrix(exponents, i, j); set_k_i(M, i, j, a); } /* Vettore con le info (bit piu' a dx e num bit a 1) su M */ struct row_stats * wt = malloc(sizeof(struct row_stats) * n_fatt); for(int i = 0; i < n_fatt; ++i) get_wt_k(M, i, n_primes, & wt[i]); /* In gauss gli esponenti sommati possono andare in overflow, li converto dunque in mpz */ mpz_t ** exponents_mpz; mpz_t temp; mpz_init_set_ui(temp, 2); unsigned int a; init_matrix_mpz(& exponents_mpz, n_fatt, n_primes); for(unsigned i = 0; i < n_fatt; ++i) for(unsigned j = 0; j < n_primes; ++j) { a = get_matrix(exponents, i, j); mpz_set_ui(temp, a); set_matrix_mpz(exponents_mpz, i, j, temp); } /* Eliminazione gaussiana */ gaussian_elimination(exponents_mpz, M, As, N, n_fatt, n_primes, n_blocchi, wt); t2 = MPI_Wtime(); double t_gauss = t2 - t1; /* In m ritorno un fattore non banale di N */ unsigned int n_fact_non_banali = factorization(N, factor_base, M, exponents_mpz, As, wt, n_fatt, n_primes, m); printf("#time_base time_sieve time_gauss time_totale\n"); printf("%.6f ", t_base); printf("%.6f ", t_sieve); printf("%.6f ", t_gauss); printf("%.6f\n", t_base + t_gauss + t_sieve); if(n_fact_non_banali > 0) { return OK; } else { return SOLO_FATTORIZZAZIONI_BANALI; } }