int proverka (int chislo) { int i, _chislo, found; found = YES; for (i = 2; i <= 9 && found; i++) { _chislo = chislo * i; if (summa (_chislo) != summa (chislo)) found = NO; } return found; }
int main() { int mas[N][M]; double x; getmas(mas); //Запрашиваем ввод массива //printmas(mas); sum(mas); //Считаем сумму столбцов, где нет отрицательных символов printf("\nМинимум среды сумм элементов диагоналей, параллельных побочной диагонали матрицы = %d",pobmin(mas)); printf("Программа вычисляет сумму членов бесконечного ряда для заданного значения x с точностью до эпсель = 0.00001: 2 * Сигма n=0/infinity (x^(2n+1))/(2n+1))\nВведите x: "); scanf("%lf",&x); printf("\nСумма членов ряда = %lf",summa(x)); return 0; }
int compute(){ //Assuming that the processes form a square int n_procs_row = sqrt(number_of_processes); int n_procs_col = n_procs_row; if (n_procs_col * n_procs_row != number_of_processes) { std::cerr << "number of proccessors must be a perfect square!" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } int n_dims = 2; int dims[n_dims] = {n_procs_row, n_procs_col}; int periods[n_dims] = {0, 0}; int repeat = 0; //create comm_groups MPI_Comm comm_cart; MPI_Cart_create(MPI_COMM_WORLD, n_dims, dims, periods, repeat, &comm_cart); int m_block = m / n_procs_row; int n_block = n / n_procs_col; int k_block = k / n_procs_col; if (m_block * n_procs_row != m) { std::cerr << "m must be dividable by n_procs_row" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } if (n_block * n_procs_col != n) { std::cerr << "n must be dividable by n_procs_col" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } if (k_block * n_procs_col != k) { std::cerr << "k must be dividable by n_procs_col" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } double * A_local = (double *) calloc(m_block * n_block, sizeof(double)); double * B_local = (double *) calloc(n_block * k_block, sizeof(double)); double * C_local = (double *) calloc(m_block * k_block, sizeof(double)); double * A = (double *) calloc(m * n, sizeof(double)); double * B = (double *) calloc(n * k, sizeof(double)); initMatrices(A_local, B_local, C_local, m_block, n_block, comm_cart); /** testing the scatter function */ if(rank == 0){ double * A_1 = (double *) calloc(n * k, sizeof(double)); double * B_1 = (double *) calloc(n * k, sizeof(double)); double * C_1 = (double *) calloc(n * k, sizeof(double)); initMatrices(A_1, B_1, C_1, n, n, comm_cart); distributeSquareMatrix(A_1, n, C_1); } gatherMatrix(m_block, n_block, A_local, m, n, A); gatherMatrix(n_block, k_block, B_local, n, k, B); /*if (rank == 3) { std::cout << "A" << std::endl; printMatrix(m_block,m_block, A_local); std::cout << "B" << std::endl; printMatrix(m_block,m_block, B_local); std::cout << "C" << std::endl; printMatrix(m_block,m_block, C_local); }*/ double start_time, end_time; start_time = MPI_Wtime(); summa(comm_cart, m_block, n_block, k_block, A_local, B_local, C_local); end_time = MPI_Wtime(); getTimes(start_time, end_time); double * C = (double *) calloc(m * n, sizeof(double)); double * C_naive = (double *) calloc(m * n, sizeof(double)); gatherMatrix(m_block, k_block, C_local, m, k, C); if (rank == 0) { multMatricesLineByLine(m, n, k, A, B, C_naive); double eps = validate(n, k, C, C_naive); if (eps > 1e-4) { std::cerr << "ERROR: Invalid matrix -> eps = " << eps << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } else { std::cout << "Valid matrix" << std::endl; } } }
/** * Creates random A, B, and C matrices and uses summa() to * calculate the product. Output of summa() is compared * to CC, the true solution. **/ bool random_matrix_test(int m, int n, int k, int px, int py, int panel_size) { int proc = 0, passed_test = 0, group_passed = 0; int num_procs = px * py; int rank = 0; double *A, *B, *C, *CC, *A_block, *B_block, *C_block, *CC_block; A = NULL; B = NULL; C = NULL; CC = NULL; MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* Get process id */ if (rank == 0) { /* Allocate matrices */ A = random_matrix(m, k); B = random_matrix(k, n); C = zeros_matrix(m, n); /* Stores the solution */ CC = zeros_matrix(m, n); /* * Solve the problem locally and store the * solution in CC */ local_mm(m, n, k, 1.0, A, m, B, k, 0.0, CC, m); } /* * Allocate memory for matrix blocks */ A_block = malloc(sizeof(double) * (m * k) / num_procs); assert(A_block); B_block = malloc(sizeof(double) * (k * n) / num_procs); assert(B_block); C_block = malloc(sizeof(double) * (m * n) / num_procs); assert(C_block); CC_block = malloc(sizeof(double) * (m * n) / num_procs); assert(CC_block); /* Distrute the matrices */ distribute_matrix(px, py, m, k, A, A_block, rank); distribute_matrix(px, py, k, n, B, B_block, rank); distribute_matrix(px, py, m, n, C, C_block, rank); distribute_matrix(px, py, m, n, CC, CC_block, rank); if (rank == 0) { /* * blocks of A, B, C, and CC have been distributed to * each of the processes, now we can safely deallocate the * matrices */ deallocate_matrix(A); deallocate_matrix(B); deallocate_matrix(C); deallocate_matrix(CC); } #ifdef DEBUG /* flush output and synchronize the processes */ fflush(stdout); sleep(1); MPI_Barrier( MPI_COMM_WORLD); #endif /* * * Call SUMMA * */ summa(m, n, k, A_block, B_block, C_block, px, py, 1); #ifdef DEBUG /* flush output and synchronize the processes */ fflush(stdout); sleep(1); MPI_Barrier( MPI_COMM_WORLD); #endif #ifdef DEBUG /* Verify each C_block sequentially */ for (proc=0; proc < num_procs; proc++) { if (rank == proc) { bool isCorrect = verify_matrix_bool(m / px, n / py, C_block, CC_block); if (isCorrect) { printf("CBlock on rank=%d is correct\n",rank); fflush(stdout); } else { printf("**\tCBlock on rank=%d is wrong\n",rank); printf("CBlock on rank=%d is\n",rank); print_matrix(m / px, n / py, C_block); printf("CBlock on rank=%d should be\n",rank); print_matrix(m / px, n / py, CC_block); printf("**\n\n"); fflush(stdout); passed_test = 1; sleep(1); } } MPI_Barrier(MPI_COMM_WORLD); /* keep all processes synchronized */ }/* proc */ #else /* each process will verify its C_block in parallel */ if (verify_matrix_bool(m / px, n / py, C_block, CC_block) == false) { passed_test = 1; } #endif /* free A_block, B_block, C_block, and CC_block */ free(A_block); free(B_block); free(C_block); free(CC_block); /* * * passed_test == 0 if the process PASSED the test * passed_test == 1 if the process FAILED the test * * therefore a MPI_Reduce of passed_test will count the * number of processes that failed * * After the MPI_Reduce/MPI_Scatter, if group_passed == 0 then every process passed */ MPI_Reduce(&passed_test, &group_passed, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Bcast(&group_passed, 1, MPI_INT, 0, MPI_COMM_WORLD); if (rank == 0 && group_passed == 0) { printf( "random_matrix_test m=%d n=%d k=%d px=%d py=%d pb=%d............PASSED\n", m, n, k, px, py, panel_size); } if (rank == 0 && group_passed != 0) { printf( "random_matrix_test m=%d n=%d k=%d px=%d py=%d pb=%d............FAILED\n", m, n, k, px, py, panel_size); } /* If group_passed==0 then every process passed the test*/ if (group_passed == 0) { return true; } else { return false; } }
int main(int argc, char* argv[]){ int i=0, nRow, nCol=NY+2, myid, nProcs, NlocX, rest; float *temp, *temp_new; double tinit, tstart, tstop, tio; char filename_0[25]; char filename_1[25]; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &nProcs); NlocX = NY / nProcs; rest = NY % nProcs; if(myid == nProcs - 1) NlocX += rest; for(i=0;i<25;i++){ filename_0[i] = 0; filename_1[i] = 0; } sprintf(filename_0, "transport-mpi_%d.dat\n", myid); printf("I am process %d and my dimension is %d x %d. Filename will be %s\n", myid, NlocX, NY, filename); printf("-----------------------\n"); nRow = NlocX+2; temp = (float *) malloc (nRow*nCol*sizeof(float)); temp_new = (float *) malloc (nRow*nCol*sizeof(float)); if(0 && myid == 0){ tinit = seconds(); temp = (float *) malloc (nRow*nCol*sizeof(float)); temp_new = (float *) malloc (nRow*nCol*sizeof(float)); init_transport(temp, myid); update_boundaries_PBC(temp); #ifdef __DEBUG float before=summa(NX, NY, temp); printf(" sum temp before: %f\n",before); #endif tstop = seconds(); printf("\ninitialization done\n"); printf("cpu time in seconds %.3g\n", tstop-tinit ); tstart = seconds(); save_gnuplot("transport-serial.dat", temp); tstop = seconds(); tio = tstop - tstart; tstart = seconds(); for(i=1;i<=STEPS;++i) { evolve(DT, temp, temp_new); update_boundaries_PBC(temp); } tstop = seconds(); printf("\nevolution done\n"); printf("cpu time in seconds %.3g\n", tstop-tstart); #ifdef __DEBUG float after=summa(NX, NY, temp); printf(" sum temp after: %f\n",after); #endif tstart = seconds(); save_gnuplot("transport-end-serial.dat", temp); tstop = seconds(); tio = tio + tstop - tstart; printf("\nsave_data done\n"); printf("IO time in seconds %.3g\n", tio); printf("\ntotal cpu time in seconds %.3g\n", tstop - tinit); free(temp); free(temp_new); } MPI_Finalize(); return 0; }