int main(int argc, char **argv) { int my_rank=0; /* My process rank */ int p; /* The number of processes */ //clock time recording variables double start_time,end_time,comm_time_start,comm_time_end,total_comm_time=0.0; /* Let the system do what it needs to start up MPI */ MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); float dataA[N][N], dataB[N][N]; complex A[N][N], B[N][N]; //create MPI type for complex datatype MPI_Aint displ[2]; displ[0] = 0; displ[1] = sizeof(float); MPI_Datatype mpi_complex,types[2]; types[0] = MPI_FLOAT; types[1] = MPI_FLOAT; int block_len[2]; block_len[0]= 1; block_len[1]= 1; MPI_Type_struct(2, block_len, displ, types,&mpi_complex); MPI_Type_commit(&mpi_complex); //create 4 communicators by splitting the default one MPI_Comm new_comm; /* Find out how many processes are being used */ int color = my_rank / 2; MPI_Comm_split(MPI_COMM_WORLD, color, my_rank, &new_comm); int row_rank, row_size; MPI_Comm_rank(new_comm, &row_rank); MPI_Comm_size(new_comm, &row_size); //we will read the files by corresponding processes if(my_rank==0) { readFromFile("data/1_im1", dataA); convertToComplex(A, dataA); } else if(my_rank==2) { readFromFile("data/1_im2", dataB); convertToComplex(B, dataB); } if(my_rank==6) { //Start clock and record the start time. start_time = MPI_Wtime(); } int local_n = N/row_size; int rows_per_process = N/row_size; complex local_A[N/row_size][N]; complex local_B[N/row_size][N]; //broadcast the number of ros each process will work on MPI_Bcast(&rows_per_process , 1, MPI_INT, 0,MPI_COMM_WORLD); //process 0 and 1 work on Matrix A if(my_rank<2) { if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } c_rowwise_fft2d(&local_A[0][0],rows_per_process); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Gather(&local_A, N*rows_per_process, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } if(row_rank==0) transpose(A); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } c_rowwise_fft2d(&local_A[0][0],rows_per_process); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Gather(&local_A, N*rows_per_process, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } MPI_Barrier(new_comm); if(row_rank==0) { transpose(A); comm_time_start=MPI_Wtime(); //send the intermediate matrix to process 4 MPI_Send(A, N*N, mpi_complex, 4,11,MPI_COMM_WORLD); comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } } if(my_rank>=2 && my_rank<4) //process 2 and 3 work on Matrix B { //printf(" %d ",rows_per_process); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Scatter(&B[0][0], N*rows_per_process, mpi_complex, &local_B, N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } c_rowwise_fft2d(&local_B[0][0],rows_per_process); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Gather(&local_B, N*local_n, mpi_complex, &B[0][0], N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } if(row_rank==0) transpose(B); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Scatter(&B[0][0], N*rows_per_process, mpi_complex, &local_B, N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } c_rowwise_fft2d(&local_B[0][0],rows_per_process); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Gather(&local_B, N*local_n, mpi_complex, &B[0][0], N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } MPI_Barrier(new_comm); if(row_rank==0) { transpose(B); //lets send intermediate matrix B to process 4 MPI_Send(B, N*N, mpi_complex, 4, 12,MPI_COMM_WORLD); } } if(my_rank==4) //process 4 multiplies the two matrices { MPI_Status status; comm_time_start=MPI_Wtime(); MPI_Recv(A, N*N, mpi_complex, 0, 11, MPI_COMM_WORLD, &status); MPI_Recv(B, N*N, mpi_complex, 2, 12, MPI_COMM_WORLD, &status); comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; mmul_point(A,B); if(row_rank==0) { comm_time_start=MPI_Wtime(); MPI_Send(A, N*N, mpi_complex, 6, 13,MPI_COMM_WORLD); comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } } if(my_rank>=6 && my_rank<8) //process 6 and 7 does inverse transform on result { MPI_Status status; if(row_rank==0) { comm_time_start=MPI_Wtime(); MPI_Recv(A, N*N, mpi_complex, 4, 13, MPI_COMM_WORLD, &status); comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } c_rowwise_inv_fft2d(&local_A[0][0],rows_per_process); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Gather(&local_A, N*rows_per_process, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } if(row_rank==0) transpose(A); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } c_rowwise_inv_fft2d(&local_A[0][0],rows_per_process); if(row_rank==0) { comm_time_start=MPI_Wtime(); } MPI_Gather(&local_A, N*rows_per_process, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, new_comm); if(row_rank==0) { comm_time_end=MPI_Wtime(); total_comm_time+=comm_time_end-comm_time_start; } if(row_rank==0) { transpose(A); //Stop clock as operation is finished. end_time = MPI_Wtime(); //print the execution time for performance analysis purpose. printf("\n\nThe total execution time as recorded on process 0 = %f seconds!!\n!",end_time-start_time); convertToReal(dataA,A); writeToFile("final_output.txt", dataA); } } MPI_Barrier(MPI_COMM_WORLD); //get a total of communication cost recorded by all processes MPI_Reduce(&total_comm_time, &total_comm_time, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); if(my_rank ==0) { printf("\n\nThe total Communication time = %f seconds!!\n!",total_comm_time); } MPI_Comm_free(&new_comm); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int my_rank=0; /* My process rank */ int p; /* The number of processes */ //clock time recording variables double start_time,end_time,comm_time_start,comm_time_end,total_comm_time=0.0; /* Let the system do what it needs to start up MPI */ MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); float dataA[N][N], dataB[N][N]; complex A[N][N], B[N][N]; MPI_Aint displ[2]; displ[0] = 0; displ[1] = sizeof(float); MPI_Datatype mpi_complex,types[2]; types[0] = MPI_FLOAT; types[1] = MPI_FLOAT; int block_len[2]; block_len[0]= 1; block_len[1]= 1; MPI_Type_struct(2, block_len, displ, types,&mpi_complex); MPI_Type_commit(&mpi_complex); if(my_rank==0) { readFromFile("data/1_im1", dataA); readFromFile("data/1_im2", dataB); convertToComplex(A, dataA); convertToComplex(B, dataB); //Start clock and record the start time. start_time = MPI_Wtime(); } int local_n = N/p; int rows_per_process = N/p; complex local_A[N/p][N]; MPI_Bcast(&rows_per_process , 1, MPI_INT, 0,MPI_COMM_WORLD); //scatter A matrix aross all processes startCommTimer(my_rank,&comm_time_start); MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*local_n, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); c_rowwise_fft2d(&local_A[0][0],local_n); startCommTimer(my_rank,&comm_time_start); MPI_Gather(&local_A, N*local_n, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); complex local_B[N/p][N]; //scatter A matrix aross all processes startCommTimer(my_rank,&comm_time_start); MPI_Scatter(&B[0][0], N*rows_per_process, mpi_complex, &local_B, N*local_n, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); c_rowwise_fft2d(&local_B[0][0],local_n); startCommTimer(my_rank,&comm_time_start); MPI_Gather(&local_B, N*local_n, mpi_complex, &B[0][0], N*rows_per_process, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); if(my_rank==0) { transpose(A); transpose(B); } //scatter A matrix aross all processes for inverse transform startCommTimer(my_rank,&comm_time_start); MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*local_n, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); c_rowwise_fft2d(&local_A[0][0],local_n); startCommTimer(my_rank,&comm_time_start); MPI_Gather(&local_A, N*local_n, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); startCommTimer(my_rank,&comm_time_start); MPI_Scatter(&B[0][0], N*rows_per_process, mpi_complex, &local_B, N*local_n, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); c_rowwise_fft2d(&local_B[0][0],local_n); startCommTimer(my_rank,&comm_time_start); MPI_Gather(&local_B, N*local_n, mpi_complex, &B[0][0], N*rows_per_process, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); if(my_rank==0) { transpose(A); transpose(B); mmul_point(A,B); } startCommTimer(my_rank,&comm_time_start); MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*local_n, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); c_rowwise_inv_fft2d(&local_A[0][0],local_n); startCommTimer(my_rank,&comm_time_start); MPI_Gather(&local_A, N*local_n, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); if(my_rank==0) { transpose(A); } startCommTimer(my_rank,&comm_time_start); MPI_Scatter(&A[0][0], N*rows_per_process, mpi_complex, &local_A, N*local_n, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); c_rowwise_inv_fft2d(&local_A[0][0],local_n); startCommTimer(my_rank,&comm_time_start); MPI_Gather(&local_A, N*local_n, mpi_complex, &A[0][0], N*rows_per_process, mpi_complex, 0, MPI_COMM_WORLD); stopCommTimer(my_rank,&total_comm_time,comm_time_start); if(my_rank==0) { transpose(A); //Stop clock as operation is finished. end_time = MPI_Wtime(); //print the execution time for performance analysis purpose. printf("\n\nThe total execution time as recorded on process 0 = %f seconds!!\n!",end_time-start_time); printf("\n\nThe total communication time = %f seconds!!\n!",total_comm_time); convertToReal(dataA,A); } writeToFile("final_output.txt", dataA); MPI_Finalize(); return 0; }
void CMLcdPixDefault_Noai::updatePixel(int start_x, int start_y, enum UpdateRegion updateRegion) { int x, y; int end_y, end_x; int place_x, place_y; start_x = convertToReal( start_x ); start_y = convertToReal( start_y ); if(updateRegion == PART) { end_x = getRealWidth102(); end_y = getRealHeight64(); } else { end_y = getRealHeight240(); end_x = getRealWidth320(); } for( y = 0; y < end_y; y++ ) { place_y = start_y + y; for( x = 0; x < end_x; x++ ) { place_x = start_x + x; switch(LcdBlRvMgr::getPixelStatus( convertFromReal( place_x ), convertFromReal( place_y ))) { case ON: fgPixelData[y][x] = pixelData[place_y][place_x]; break; case OFF: fgPixelData[y][x] = 0; break; case REVERSE: fgPixelData[y][x] = 255 - pixelData[place_y][place_x]; break; } } } #if 0 int x, y; int end_y, end_x; if(updateRegion == PART) { start_x = convertToReal( start_x ); start_y = convertToReal( start_y ); end_x = start_x + getRealWidth102(); end_y = start_y + getRealHeight64(); } else { end_y = getRealHeight240(); end_x = getRealWidth320(); } for( y = start_y; y < end_y; y++ ) { for( x = start_x; x < end_x; x++ ) { switch(LcdBlRvMgr::getPixelStatus( convertFromReal( x ), convertFromReal( y ))) { case ON: fgPixelData[y - start_y ][x - start_x ] = pixelData[y][x]; break; case OFF: fgPixelData[y - start_y ][x - start_x ] = 0; break; case REVERSE: fgPixelData[y - start_y ][x - start_x ] = 255 - (int)pixelData[y][x]; break; } } } #endif }