int main (int argc, char *argv[]) { int x; int i; for (x = -128; x <= 128; x++) ASSERT (ffs (x) == naive (x)); for (i = 0; i < NBITS; i++) { ASSERT (ffs (1U << i) == naive (1U << i)); ASSERT (ffs (1U << i) == i + 1); ASSERT (ffs (-1U << i) == i + 1); } for (i = 0; i < NBITS - 1; i++) { ASSERT (ffs (3U << i) == i + 1); ASSERT (ffs (-3U << i) == i + 1); } for (i = 0; i < NBITS - 2; i++) { ASSERT (ffs (5U << i) == i + 1); ASSERT (ffs (-5U << i) == i + 1); ASSERT (ffs (7U << i) == i + 1); ASSERT (ffs (-7U << i) == i + 1); } return 0; }
int main (int argc, char *argv[]) { long int x; int i; for (i = -128; i <= 128; i++) ASSERT (ffsl (i) == naive (i)); for (i = 0; i < NBITS; i++) { ASSERT (ffsl (1UL << i) == naive (1UL << i)); ASSERT (ffsl (1UL << i) == i + 1); ASSERT (ffsl (-1UL << i) == i + 1); } for (i = 0; i < NBITS - 1; i++) { ASSERT (ffsl (3UL << i) == i + 1); ASSERT (ffsl (-3UL << i) == i + 1); } for (i = 0; i < NBITS - 2; i++) { ASSERT (ffsl (5UL << i) == i + 1); ASSERT (ffsl (-5UL << i) == i + 1); ASSERT (ffsl (7UL << i) == i + 1); ASSERT (ffsl (-7UL << i) == i + 1); } return 0; }
int main (int argc, char *argv[]) { unsigned long x; int i; for (x = 0; x <= 256; x++) ASSERT (integer_length_l (x) == naive (x)); for (i = 0; i < NBITS; i++) { ASSERT (integer_length_l (1UL << i) == naive (1UL << i)); ASSERT (integer_length_l (1UL << i) == i + 1); ASSERT (integer_length_l (-1UL << i) == NBITS); } for (i = 0; i < NBITS - 1; i++) ASSERT (integer_length_l (3UL << i) == i + 2); for (i = 0; i < NBITS - 2; i++) ASSERT (integer_length_l (-3UL << i) == NBITS); for (i = 0; i < NBITS - 2; i++) { ASSERT (integer_length_l (5UL << i) == i + 3); ASSERT (integer_length_l (7UL << i) == i + 3); } for (i = 0; i < NBITS - 3; i++) { ASSERT (integer_length_l (-5UL << i) == NBITS); ASSERT (integer_length_l (-7UL << i) == NBITS); } return 0; }
int main(void) { naive((int *)arr1, 5, 3); naive((int *)arr2, 3, 2); dynamic((int *)arr1, 5, 3); dynamic((int *)arr2, 3, 2); return 0; }
int main(int argc, char **argv){ FILE *fp; if(argc != 2) { printf("Niepoprawna liczba argumentów!\n"); exit(0); } N = atoi(argv[1]); int **A, **B, **C; A = calloc(N, sizeof(int*)); B = calloc(N, sizeof(int*)); C = calloc(N, sizeof(int*)); for(i = 0; i<N; i++){ A[i] = calloc(N, sizeof(int)); B[i] = calloc(N, sizeof(int)); C[i] = calloc(N, sizeof(int)); } gsl_matrix *matrix1 = gsl_matrix_calloc(N, N); gsl_matrix *matrix2 = gsl_matrix_calloc(N, N); gsl_matrix *result = gsl_matrix_calloc(N, N); CBLAS_TRANSPOSE_t TransA = CblasNoTrans; srand(time(NULL)); fp = fopen("result.txt", "a"); //fill matrix for(i=0; i<N; i++){ for(j=0; j<N; j++){ tmp = rand() % 100; A[i][j] = tmp; gsl_matrix_set(matrix1, j, k, tmp); tmp = rand() % 100; B[i][j] = tmp; gsl_matrix_set(matrix2, j, k, tmp); } } for(l = 0; l < 20; l++){ //algorithms start = clock(); naive(A,B,C); fprintf(fp, "%d,alg1,%f\n", N, (double)(clock() - start)/CLOCKS_PER_SEC); //printf("Algorytm 1: %g [s]\n", (double)(clock() - start)/CLOCKS_PER_SEC); start = clock(); ver2(A,B,C); fprintf(fp, "%d,alg2,%f\n", N, (double)(clock() - start)/CLOCKS_PER_SEC); //printf("Algorytm 2: %g [s]\n", (double)(clock() - start)/CLOCKS_PER_SEC); start = clock(); gsl_blas_dgemm (TransA, TransA, 1, matrix1, matrix2, 1, result); fprintf(fp, "%d,blas,%f\n", N, (double)(clock() - start)/CLOCKS_PER_SEC); //printf("Algorytm 3: %g [s]\n", (double)(clock() - start)/CLOCKS_PER_SEC); } return 0; }
unsigned body() { unsigned r( 0 ); unsigned b( 0 ); do{ long offset = caret.fetch_add( BLOCKSIZE ); if( offset >= range ) { std::cout << "Thread finished. " << b << " blocks. " << r << " numbers.\n"; primes += r; return r; } b ++; for( long c = offset; c < offset + BLOCKSIZE; c ++ ) { r += naive( c ); } }while( 1 ); }
/* convenience function for Matrix Matrix Multiplication @param A input matrix @param B input matrix (COLUMN MAJOR) @param C output matrix */ inline void MMM(Matrix& A, Matrix& B, Matrix& C){ /*int LD = A.dimRows; if (LD<A.dimCols) LD = A.dimCols; if (LD<B.dimCols) LD = B.dimCols; LD--; LD |= LD >> 1; LD |= LD >> 2; LD |= LD >> 4; LD |= LD >> 8; LD |= LD >> 16; LD++;*/ double* temp = (double*) aligned_alloc(ALIGNMENT, sizeof(double) * LD * LD); Matrix BT(temp, temp, B.getDimN(), B.getDimM(), 0, 0); Matrix P( (double*) aligned_alloc(ALIGNMENT, sizeof(double) * LD * LD), nullptr, A.getDimM(), A.getDimM(), 0, 0); Matrix PS( (double*) aligned_alloc(ALIGNMENT, sizeof(double) * LD * LD), nullptr, A.getDimM(), A.getDimM(), 0, 0); Matrix S( (double*) aligned_alloc(ALIGNMENT, sizeof(double) * LD * LD), nullptr, A.getDimM(), A.getDimM(), 0, 0); Matrix T(nullptr, (double*) aligned_alloc(ALIGNMENT, sizeof(double) * LD * LD), A.getDimM(), A.getDimM(), 0, 0); A.dimRows = LD - PADDING; transpose(B, BT); if (A.dimRows<TRUNCATION_POINT){ naive(A, BT, C); } else { strassen(A, BT, C, P, PS, S, T); } free(BT.data); free(P.data); free(PS.data); free(S.data); free(T.dataT); }
/* Strassen implementation of Matrix Matrix Multiplication @param A input row major matrix @param B input COLUMN MAJOR matrix @param C output row major matrix @param P temporary row major matrix @param Ps temporary row major matrix @param S temporary row major matrix @param T temporary COLUMN MAJOR matrix */ void strassen(Matrix& A, Matrix& B, Matrix& C, Matrix& P, Matrix& Ps, Matrix& S, Matrix& T){ //get matrix dimensions and calculate size of blocks int dim = A.getDimM(); //equal for all matrix dimensions int dim2 = dim * 0.5; //std::cout << dim << "\t" << dim2 << std::endl; //get blocks Matrix A1 = A.getSubMatrix(0, 0, dim2, dim2); Matrix A2 = A.getSubMatrix(0, dim2, dim2, dim2); Matrix A3 = A.getSubMatrix(dim2, 0, dim2, dim2); Matrix A4 = A.getSubMatrix(dim2, dim2, dim2, dim2); Matrix B1 = B.getSubMatrix(0, 0, dim2, dim2); Matrix B2 = B.getSubMatrix(0, dim2, dim2, dim2); Matrix B3 = B.getSubMatrix(dim2, 0, dim2, dim2); Matrix B4 = B.getSubMatrix(dim2, dim2, dim2, dim2); Matrix C1 = C.getSubMatrix(0, 0, dim2, dim2); Matrix C2 = C.getSubMatrix(0, dim2, dim2, dim2); Matrix C3 = C.getSubMatrix(dim2, 0, dim2, dim2); Matrix C4 = C.getSubMatrix(dim2, dim2, dim2, dim2); Matrix S1 = S.getSubMatrix(0, 0, dim2, dim2); Matrix S2 = S.getSubMatrix(0, dim2, dim2, dim2); Matrix S3 = S.getSubMatrix(dim2, 0, dim2, dim2); Matrix S4 = S.getSubMatrix(dim2, dim2, dim2, dim2); Matrix T1 = T.getSubMatrix(0, 0, dim2, dim2); Matrix T2 = T.getSubMatrix(0, dim2, dim2, dim2); Matrix T3 = T.getSubMatrix(dim2, 0, dim2, dim2); Matrix T4 = T.getSubMatrix(dim2, dim2, dim2, dim2); Matrix P1 = P.getSubMatrix(0, 0, dim2, dim2); Matrix P2 = P.getSubMatrix(0, dim2, dim2, dim2); Matrix P3 = P.getSubMatrix(dim2, 0, dim2, dim2); Matrix P4 = P.getSubMatrix(dim2, dim2, dim2, dim2); Matrix P5 = Ps.getSubMatrix(0, 0, dim2, dim2); Matrix P6 = Ps.getSubMatrix(0, dim2, dim2, dim2); Matrix P7 = Ps.getSubMatrix(dim2, 0, dim2, dim2); //Matrix P8 = Ps.getSubMatrix(dim2, dim2, dim2, dim2); //std::cout << "submatrices" << std::endl; //compute temporary S and T matrices for (int i=0; i<dim2; ++i){ for (int j=0; j<dim2; j+=4){ //std::cout << "S" << std::endl; __m256d* pA1 = A1.get(i, j); __m256d* pA2 = A2.get(i, j); __m256d* pA3 = A3.get(i, j); __m256d* pA4 = A4.get(i, j); __m256d* pS2 = S2.get(i, j); S1.set(i, j, (*pA3)+(*pA4)); S2.set(i, j, (*pA3)+(*pA4)-(*pA1)); S3.set(i, j, (*pA1)-(*pA3)); S4.set(i, j, (*pA2)-(*pS2)); pA1++; pA2++; pA3++; pA4++; pS2++; //S1(i, j) = A3(i, j) + A4(i, j); //S2(i, j) = S1(i, j) - A1(i, j); //S3(i, j) = A1(i ,j) - A3(i, j); //S4(i, j) = A2(i, j) - S2(i, j); //std::cout << "T" << std::endl; __m256d* pB1 = B1.getT(j, i); __m256d* pB2 = B2.getT(j, i); __m256d* pB3 = B3.getT(j, i); __m256d* pB4 = B4.getT(j, i); __m256d* pT2 = T2.getT(j, i); T1.setT(j, i, (*pB2)-(*pB1)); T2.setT(j, i, (*pB4)-((*pB2)-(*pB1))); T3.setT(j, i, (*pB4)-(*pB2)); T4.setT(j, i, (*pB3)-(*pT2)); pB1++; pB2++; pB3++; pB4++; pT2++; //T1.T(j, i) = B2.T(j, i) - B1.T(j, i); //T2.T(j, i) = B4.T(j, i) - T1.T(j, i); //T3.T(j, i) = B4.T(j ,i) - B2.T(j, i); //T4.T(j, i) = B3.T(j, i) - T2.T(j, i); } } //calculate products if (dim2<TRUNCATION_POINT) { naive(A1, B1, P1); naive(A2, B3, P2); naive(S1, T1, P3); naive(S2, T2, P4); naive(S3, T3, P5); naive(S4, B4, P6); naive(A4, T4, P7); } else { strassen(A1, B1, P1, C1, C2, C3, B2); strassen(A2, B3, P2, C1, C2, C3, B2); strassen(S1, T1, P3, C1, C2, C3, B2); strassen(S2, T2, P4, C1, C2, C3, B2); strassen(S3, T3, P5, C1, C2, C3, B2); strassen(S4, B4, P6, C1, C2, C3, B2); strassen(A4, T4, P7, C1, C2, C3, B2); } //assemble final matrix for (int i=0; i<dim2; ++i){ for (int j=0; j<dim2; j+=4){ __m256d* pP1 = P1.get(i, j); __m256d* pP2 = P2.get(i, j); __m256d* pP3 = P3.get(i, j); __m256d* pP4 = P4.get(i, j); __m256d* pP5 = P5.get(i, j); __m256d* pP6 = P6.get(i, j); __m256d* pP7 = P7.get(i, j); C1.set(i, j, (*pP1) + (*pP2)); C3.set(i, j, (*pP1) + (*pP4) + (*pP5) + (*pP7)); C4.set(i, j, (*pP1) + (*pP4) + (*pP5) + (*pP3)); C2.set(i, j, (*pP1) + (*pP4) + (*pP3) + (*pP6)); //C1(i, j) = P1(i, j) + P2(i, j); //C3(i, j) = P1(i, j) + P4(i, j) + P5(i, j) + P7(i, j); //C4(i, j) = P1(i, j) + P4(i, j) + P5(i, j) + P3(i, j); //C2(i, j) = P1(i, j) + P3(i, j) + P4(i, j) + P6(i, j); pP1++; pP2++; pP3++; pP4++; pP5++; pP6++; pP7++; } } }
float closestpoints(Points ps) { if (ps.size < 100) { return naive(ps); } float y = select_y(ps); // aboves = filter (above y) pts // belows = filter (below y) pts Points aboves = alloc_points(ps.size); int aboves_size = 0; Points belows = alloc_points(ps.size); int belows_size = 0; for (int i = 0; i != ps.size; ++i) { if (ps.ys[i] < y) { aboves.xs[aboves_size] = ps.xs[i]; aboves.ys[aboves_size] = ps.ys[i]; aboves_size++; } if (ps.ys[i] >= y) { belows.xs[belows_size] = ps.xs[i]; belows.ys[belows_size] = ps.ys[i]; belows_size++; } } aboves.size = aboves_size; belows.size = belows_size; float above_ = closestpoints(aboves); float below_ = closestpoints(belows); free_points(aboves); free_points(belows); float border = minf(above_, below_); // aboveB = filter (above y && below (y-border)) pts // belowB = filter (below y && above (y+border)) pts Points aboveB = alloc_points(ps.size); int aboveB_size = 0; Points belowB = alloc_points(ps.size); int belowB_size = 0; for (int i = 0; i != ps.size; ++i) { if (ps.ys[i] < y && ps.ys[i] >= (y-border)) { aboveB.xs[aboveB_size] = ps.xs[i]; aboveB.ys[aboveB_size] = ps.ys[i]; aboveB_size++; } if (ps.ys[i] >= y && ps.ys[i] < (y+border)) { belowB.xs[belowB_size] = ps.xs[i]; belowB.ys[belowB_size] = ps.ys[i]; belowB_size++; } } aboveB.size = aboveB_size; belowB.size = belowB_size; // dists = V.concatMap (\p -> V.map (dist p) belowB) aboveB float min = border; for (int i = 0; i != aboveB.size; ++i) { for (int j = 0; j != belowB.size; ++j) { float d = dist( aboveB.xs[i], aboveB.ys[i] , belowB.xs[j], belowB.ys[j] ); min = minf(min, d); } } free_points(aboveB); free_points(belowB); return min; }