//removes the lowest energy vertical seam from the image void removeVerticalSeam() { double energies[3]; double min_energy; int prev_x; int prev_y; // split up work between processes double *my_path_costs; double *my_previous_x; double *my_previous_y; double *temp_path_costs; double *temp_previous_x; double *temp_previous_y; int my_cols = current_width / numprocs; int low_cols = my_cols; int extra_cols = current_width % numprocs; int start; int x_offset; int recv_cols; double left_end_cost, right_end_cost, temp_end_cost; if (rank < extra_cols) { my_cols++; start = rank * my_cols; } else { start = (extra_cols * (my_cols + 1)) + ((rank - extra_cols) * my_cols); } //printf("%d %d %d\n", rank, start, my_cols); my_path_costs = (double *) malloc(my_cols * current_height * sizeof(double)); my_previous_x = (double *) malloc(my_cols * current_height * sizeof(double)); my_previous_y = (double *) malloc(my_cols * current_height * sizeof(double)); //find the lowest cost seam by computing the lowest cost paths to each pixel for (int y = 0; y < current_height; y++) { //compute the path costs for my columns for (int x = start; x < start + my_cols; x++) { //printf("%d %d %d %d %d\n", rank, x, y, (x - start) * current_height + y, my_cols * current_height); if (y == 0) { path_costs[x * initial_height] = image_energy[x * initial_height]; my_path_costs[(x - start) * current_height + y] = path_costs[x * initial_height]; previous_x[x * initial_height] = -1; my_previous_x[(x - start) * current_height + y] = previous_x[x * initial_height]; previous_y[x * initial_height] = -1; my_previous_y[(x - start) * current_height + y] = previous_y[x * initial_height]; } else { //the pixel directly above energies[1] = path_costs[x * initial_height + y - 1]; //pixel above to the left if (x != 0) { energies[0] = path_costs[(x - 1) * initial_height + y - 1]; } else { energies[0] = DBL_MAX; } //pixel above to the right if (x != current_width - 1) { energies[2] = path_costs[(x + 1) * initial_height + y - 1]; } else { energies[2] = DBL_MAX; } //find the one with the least path cost min_energy = energies[0]; prev_x = x - 1; prev_y = y - 1; if (energies[1] < min_energy) { min_energy = energies[1]; prev_x = x; } if (energies[2] < min_energy) { min_energy = energies[2]; prev_x = x + 1; } //set the minimum path cost for this pixel path_costs[x * initial_height + y] = min_energy + image_energy[x * initial_height + y]; my_path_costs[(x - start) * current_height + y] = path_costs[x * initial_height + y]; //set the previous pixel on the minimum path's coordinates for this pixel previous_x[x * initial_height + y] = prev_x; my_previous_x[(x - start) * current_height + y] = previous_x[x * initial_height + y]; previous_y[x * initial_height + y] = prev_y; my_previous_y[(x - start) * current_height + y] = previous_y[x * initial_height + y]; } } //send path cost needed to neighboring processes if (numprocs > 1) { if (rank != numprocs - 1) { //send rightmost cost to following process right_end_cost = path_costs[(start + my_cols - 1) * initial_height + y]; MPI_Send(&right_end_cost, 1, MPI_DOUBLE, rank + 1, 0, MPI_COMM_WORLD); //receive following process's leftmost cost MPI_Recv(&temp_end_cost, 1, MPI_DOUBLE, rank + 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); path_costs[(start + my_cols) * initial_height + y] = temp_end_cost; } if (rank != 0) { //send leftmost cost to preceding process left_end_cost = path_costs[start * initial_height + y]; MPI_Send(&left_end_cost, 1, MPI_DOUBLE, rank - 1, 0, MPI_COMM_WORLD); //receive preceding process's rightmost cost MPI_Recv(&temp_end_cost, 1, MPI_DOUBLE, rank - 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); path_costs[(start - 1) * initial_height + y] = temp_end_cost; } } } //update path costs and previous for all processes for (int i = 0; i < numprocs; i++) { if (rank == i) { continue; } if (i < extra_cols) { x_offset = i * (low_cols + 1); recv_cols = (low_cols + 1); } else { x_offset = (extra_cols * (low_cols + 1)) + ((i - extra_cols) * low_cols); recv_cols = low_cols; } //printf("%d %d\n", low_cols, extra_cols); //printf("%d %d %d\n", rank, x_offset, recv_cols); temp_path_costs = (double *) malloc(recv_cols * current_height * sizeof(double)); temp_previous_x = (double *) malloc(recv_cols * current_height * sizeof(double)); temp_previous_y = (double *) malloc(recv_cols * current_height * sizeof(double)); MPI_Sendrecv(my_path_costs, my_cols * current_height, MPI_DOUBLE, i, 0, temp_path_costs, recv_cols * current_height, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(my_previous_x, my_cols * current_height, MPI_DOUBLE, i, 1, temp_previous_x, recv_cols * current_height, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(my_previous_y, my_cols * current_height, MPI_DOUBLE, i, 2, temp_previous_y, recv_cols * current_height, MPI_DOUBLE, i, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); /* problem is here */ for (int j = 0; j < recv_cols * current_height; j++) { int x = x_offset + (j % recv_cols); int y = j / recv_cols; //printf("%d %d %d %d %d\n", rank, x, y, x * initial_height + y, recv_cols * current_height); //printf("%d\n", initial_height * initial_width); path_costs[x * initial_height + y] = temp_path_costs[(x - x_offset) * current_height + y]; previous_x[x * initial_height + y] = temp_previous_x[(x - x_offset) * current_height + y]; previous_y[x * initial_height + y] = temp_previous_y[(x - x_offset) * current_height + y]; } free(temp_path_costs); free(temp_previous_x); free(temp_previous_y); } free(my_path_costs); free(my_previous_x); free(my_previous_y); //printf("here\n"); //find the xcoord the lowest cost seam starts at the bottom of the current image int x_coord = 0; for (int x = 0; x < current_width; x++) { if (path_costs[x * initial_height + current_height - 1] < path_costs[x_coord * initial_height + current_height - 1]) { x_coord = x; } } //printf("here\n"); //delete the seam from the bottom up for (int y = current_height - 1; y >= 0; y--) { //delete this pixel by copying over it and all those following to the right for (int x = x_coord; x < current_width - 1; x++) { image[x * initial_height + y] = image[(x + 1) * initial_height + y]; } //next pixel //printf("%d\n", x_coord * initial_height + y); x_coord = previous_x[x_coord * initial_height + y]; //printf("%d %d\n", rank, x_coord); } //decrease the current width of the image current_width--; }
int main(int argc, char** argv) { MPI_Init(&argc, &argv); int size, rank; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf("I am process number - %d\n", rank); int rowsA = 9; int columnA = 4; int columnB = 9; int rowsB = columnA; TMatrix matrixC = createMatrix(rowsA, columnB); if(rank == 0) { double start, end; TMatrix matrixA = createMatrix(rowsA, columnA); TMatrix matrixB = createMatrix(rowsB, columnB); fillSimpleMatrix(matrixA.data, matrixA.rows, matrixA.columns); fillSimpleMatrix(matrixB.data, matrixB.rows, matrixB.columns); printMatrix(matrixA); printf("\n"); printMatrix(matrixB); printf("\n"); int numberOfSlaves = size - 1; start = MPI_Wtime(); int rowsPerWorker = rowsA / numberOfSlaves; int remainingRows = rowsA % numberOfSlaves; int offsetRow = 0; int messageType = FROM_MASTER; for(int destination = 1; destination <= numberOfSlaves; destination++) { int rows = (destination <= remainingRows) ? rowsPerWorker + 1 : rowsPerWorker; MPI_Send((void *)&offsetRow, 1, MPI_INT, destination, messageType, MPI_COMM_WORLD); MPI_Send((void *)&rows, 1, MPI_INT, destination, messageType, MPI_COMM_WORLD); double* temp = matrixA.data + offsetRow; MPI_Send((void *)temp, rows * columnA, MPI_DOUBLE, destination, messageType, MPI_COMM_WORLD); MPI_Send((void *)matrixB.data, rowsB * columnB, MPI_DOUBLE, destination, messageType, MPI_COMM_WORLD); offsetRow += rows; } messageType = FROM_SLAVE; for(int source = 1; source <= numberOfSlaves; source++) { int rowOffset; MPI_Status status; MPI_Recv((void*)&rowOffset, 1, MPI_INT, source, messageType, MPI_COMM_WORLD, &status); int rows; MPI_Recv((void*)&rows, 1, MPI_INT, source, messageType, MPI_COMM_WORLD, &status); double* temp = (double*)malloc(rows * columnB * sizeof(double)); MPI_Recv((void*)temp, rows * columnB, MPI_DOUBLE, source, FROM_SLAVE, MPI_COMM_WORLD, &status); for(int j = 0; j < rows * columnB; j++) { matrixC.data[rowOffset * columnB + j] = temp[j]; } } printMatrix(matrixC); printf("\n"); end = MPI_Wtime(); printf("time: %.4f\n", end - start); } else { int offsetRow; MPI_Status status; MPI_Recv((void*)&offsetRow, 1, MPI_INT, 0, FROM_MASTER, MPI_COMM_WORLD, &status); int rows; MPI_Recv((void*)&rows, 1, MPI_INT, 0, FROM_MASTER, MPI_COMM_WORLD, &status); printf("Process - %d, Offset - %d, Rows - %d\n", rank, offsetRow, rows); TMatrix aMatrix = createMatrix(rows, columnA); MPI_Recv((void*)aMatrix.data, rows * columnA, MPI_DOUBLE, 0, FROM_MASTER, MPI_COMM_WORLD, &status); TMatrix bMatrix = createMatrix(rowsB, columnB); MPI_Recv((void*)bMatrix.data, rowsB * columnB, MPI_DOUBLE, 0, FROM_MASTER, MPI_COMM_WORLD, &status); double* c =(double*)malloc(rows * columnB * sizeof(double)); for(int i = 0; i < rows; i++) { for(int j = 0; j < columnB; j++) { c[i * columnB + j] = 0.0; for(int k = 0; k < columnA; k++) { c[i * columnB + j] += aMatrix.data[i * columnA + k] * bMatrix.data[j * columnA + k]; } } } MPI_Send((void*)&offsetRow, 1, MPI_INT, 0, FROM_SLAVE, MPI_COMM_WORLD); MPI_Send((void*)&rows, 1, MPI_INT, 0, FROM_SLAVE, MPI_COMM_WORLD); MPI_Send((void*)c, rows * columnB, MPI_DOUBLE, 0, FROM_SLAVE, MPI_COMM_WORLD); } MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { //precision to work to sscanf(argv[1], "%lf", &PRECISION); fill_array(); long int sum, partial_sum; MPI_Status status; int ID, root_process, ierr, i, num_procs, an_id, sender, length, flag; ierr = MPI_Init(&argc, &argv); root_process = 0; /* find out MY process ID, and how many processes were started. */ ierr = MPI_Comm_rank(MPI_COMM_WORLD, &ID); ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs); THREAD_NUM = num_procs-1; int INNER_ARRAY_SIZE = n-2; int TOTAL_NUMS = (INNER_ARRAY_SIZE)*(INNER_ARRAY_SIZE); //splits total amount of numbers in matrix into number of threads beng used, and finds the remainder int NUMS_PER_THREAD = (int)floor(TOTAL_NUMS / THREAD_NUM); int NUMS_REMAINDER = (TOTAL_NUMS % THREAD_NUM); if(ID == root_process) { // ROOT PROCESS //copies original array into a temporary array memcpy(temparr, arr, sizeof(arr)); matrix_work(num_procs, INNER_ARRAY_SIZE, NUMS_PER_THREAD, NUMS_REMAINDER, TOTAL_NUMS); while(get_diff()!=0){ memcpy(arr, temparr, sizeof(arr)); matrix_work(num_procs, INNER_ARRAY_SIZE, NUMS_PER_THREAD, NUMS_REMAINDER, TOTAL_NUMS); } flag=1; for(an_id = 1; an_id < num_procs; an_id++) { ierr = MPI_Send(&flag, 1, MPI_INT, an_id, send_data_tag, MPI_COMM_WORLD); } } else { // SLAVE PROCESS //while program not done, calculate averages from range in array and send back to root process while(get_flag()==0){ ierr = MPI_Recv(&(arr[0][0]), n*n, MPI_DOUBLE, root_process, send_data_tag, MPI_COMM_WORLD, &status); //Calculates the start and end numbers in the matrix for the thread to work on int START_NUM = 0 + ((ID-1)*NUMS_PER_THREAD); int END_NUM = (ID*NUMS_PER_THREAD)-1; if (ID == THREAD_NUM){ END_NUM = END_NUM+ NUMS_REMAINDER; } int range = END_NUM - START_NUM; double localnums[range]; //loops through numbers from start to end that need to be worked on and overwrites the temp array int i; int count = 0; for(i = START_NUM; i <= END_NUM; i++) { //get the x & y values for corresponding number in the matrix int x = (i%INNER_ARRAY_SIZE)+1; int y = ((i-(i%INNER_ARRAY_SIZE))/INNER_ARRAY_SIZE)+1; //get four surrounding numbers and average them //above double a = arr[y-1][x]; //right double b = arr[y][x+1]; //below double c = arr[y+1][x]; //left double d = arr[y][x-1]; double average = (a+b+c+d)/4; localnums[count] = average; count++; } //and finally, send array of new numbers back to the root process ierr = MPI_Send(&count, 1, MPI_INT, root_process, return_data_tag, MPI_COMM_WORLD); ierr = MPI_Send(&localnums, count, MPI_DOUBLE, root_process, return_data_tag, MPI_COMM_WORLD); } } ierr = MPI_Finalize(); }
double timeStepper::computeDt(int &numReads, int &numWrites) { // Time step control array minSpeedTemp,maxSpeedTemp; array minSpeed,maxSpeed; elemOld->computeMinMaxCharSpeeds(directions::X1, minSpeedTemp, maxSpeedTemp, numReads,numWrites ); minSpeedTemp = minSpeedTemp/XCoords->dX1; maxSpeedTemp = maxSpeedTemp/XCoords->dX1; maxSpeed = af::max(maxSpeedTemp,af::abs(minSpeedTemp)); if(params::dim>1) { elemOld->computeMinMaxCharSpeeds(directions::X2, minSpeedTemp, maxSpeedTemp, numReads,numWrites ); minSpeedTemp = minSpeedTemp/XCoords->dX2; maxSpeedTemp = maxSpeedTemp/XCoords->dX2; maxSpeed += af::max(maxSpeedTemp,af::abs(minSpeedTemp)); } if(params::dim>2) { elemOld->computeMinMaxCharSpeeds(directions::X3, minSpeedTemp, maxSpeedTemp, numReads,numWrites); minSpeedTemp = minSpeedTemp/XCoords->dX3; maxSpeedTemp = maxSpeedTemp/XCoords->dX3; maxSpeed += af::max(maxSpeedTemp,af::abs(minSpeedTemp)); } array maxInvDt_af = af::max(af::max(af::max(maxSpeed,2),1),0); double maxInvDt = maxInvDt_af.host<double>()[0]; /* Use MPI to find minimum over all processors */ if (world_rank == 0) { double temp; for(int i=1;i<world_size;i++) { MPI_Recv(&temp, 1, MPI_DOUBLE, i, i, PETSC_COMM_WORLD,MPI_STATUS_IGNORE); if( maxInvDt < temp) { maxInvDt = temp; } } } else { MPI_Send(&maxInvDt, 1, MPI_DOUBLE, 0, world_rank, PETSC_COMM_WORLD); } MPI_Barrier(PETSC_COMM_WORLD); MPI_Bcast(&maxInvDt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD); MPI_Barrier(PETSC_COMM_WORLD); double newDt = params::CourantFactor/maxInvDt; if (newDt > params::maxDtIncrement*dt) { newDt = params::maxDtIncrement*dt; } dt = newDt; }
int MTestTestIntercomm(MPI_Comm comm) { int local_size, remote_size, rank, **bufs, *bufmem, rbuf[2], j; int errs = 0, wrank, nsize; char commname[MPI_MAX_OBJECT_NAME + 1]; MPI_Request *reqs; MPI_Comm_rank(MPI_COMM_WORLD, &wrank); MPI_Comm_size(comm, &local_size); MPI_Comm_remote_size(comm, &remote_size); MPI_Comm_rank(comm, &rank); MPI_Comm_get_name(comm, commname, &nsize); MTestPrintfMsg(1, "Testing communication on intercomm '%s', remote_size=%d\n", commname, remote_size); reqs = (MPI_Request *) malloc(remote_size * sizeof(MPI_Request)); if (!reqs) { printf("[%d] Unable to allocated %d requests for testing intercomm %s\n", wrank, remote_size, commname); errs++; return errs; } bufs = (int **) malloc(remote_size * sizeof(int *)); if (!bufs) { printf("[%d] Unable to allocated %d int pointers for testing intercomm %s\n", wrank, remote_size, commname); errs++; return errs; } bufmem = (int *) malloc(remote_size * 2 * sizeof(int)); if (!bufmem) { printf("[%d] Unable to allocated %d int data for testing intercomm %s\n", wrank, 2 * remote_size, commname); errs++; return errs; } /* Each process sends a message containing its own rank and the * rank of the destination with a nonblocking send. Because we're using * nonblocking sends, we need to use different buffers for each isend */ /* NOTE: the send buffer access restriction was relaxed in MPI-2.2, although * it doesn't really hurt to keep separate buffers for our purposes */ for (j = 0; j < remote_size; j++) { bufs[j] = &bufmem[2 * j]; bufs[j][0] = rank; bufs[j][1] = j; MPI_Isend(bufs[j], 2, MPI_INT, j, 0, comm, &reqs[j]); } MTestPrintfMsg(2, "isends posted, about to recv\n"); for (j = 0; j < remote_size; j++) { MPI_Recv(rbuf, 2, MPI_INT, j, 0, comm, MPI_STATUS_IGNORE); if (rbuf[0] != j) { printf("[%d] Expected rank %d but saw %d in %s\n", wrank, j, rbuf[0], commname); errs++; } if (rbuf[1] != rank) { printf("[%d] Expected target rank %d but saw %d from %d in %s\n", wrank, rank, rbuf[1], j, commname); errs++; } } if (errs) fflush(stdout); MTestPrintfMsg(2, "my recvs completed, about to waitall\n"); MPI_Waitall(remote_size, reqs, MPI_STATUSES_IGNORE); free(reqs); free(bufs); free(bufmem); return errs; }
void sync_display(void) { #ifdef CONF_MPI MPI_Status stat; int size = 0; #endif int rank = 0; char name[MAXNAME]; int i = 0; gethostname(name, MAXNAME); #ifdef CONF_MPI assert_mpi(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); assert_mpi(MPI_Comm_size(MPI_COMM_WORLD, &size)); if (rank) { /* Send the name to the root, recieve the host index. */ MPI_Send(name, MAXNAME, MPI_BYTE, 0,0, MPI_COMM_WORLD); MPI_Recv(&i, 4, MPI_BYTE, 0,0, MPI_COMM_WORLD, &stat); } else { int j, k; /* Find a host definition for the root. */ i = find_display(name); /* Recieve a name from each client, send a host definition index. */ for (j = 1; j < size; ++j) { MPI_Recv(name, MAXNAME, MPI_BYTE, j,0, MPI_COMM_WORLD, &stat); k = find_display(name); MPI_Send(&k, 4, MPI_BYTE, j,0, MPI_COMM_WORLD); } } #else i = find_display(name); #endif /* If no host definition was found, create a default. */ if (i == 0) { i = add_host(DEFAULT_NAME, DEFAULT_X, DEFAULT_Y, DEFAULT_W, DEFAULT_H); add_tile(i, DEFAULT_X, DEFAULT_Y, DEFAULT_W, DEFAULT_H); host[i].flags = HOST_FRAMED; } /* Note the indexed host definition as current. */ current_host = host[i]; /* Position the server window, if necessary. */ if (rank || (current_host.flags & HOST_FRAMED) == 0) set_window_pos(current_host.win_x, current_host.win_y); }
unsigned int master(unsigned int base_dim, unsigned int max_fact, unsigned int** exponents, mpz_t * As, int comm_size, unsigned int print_fact) { unsigned int fact_count = 0; MPI_Status status; int count; int source; /* Buffer per ricevere gli esponenti */ unsigned int* buffer_exp; /* Buffer per ricevere (A + s) */ unsigned char buffer_As[BUFFER_DIM]; init_vector(& buffer_exp, base_dim); double t1 = MPI_Wtime(); double t2; int fact_per_rank[comm_size]; for(int i = 0; i < comm_size; ++i) fact_per_rank[i] = 0; while(fact_count < max_fact + base_dim) { /* Ricevo il vettore di esponenti */ MPI_Recv(buffer_exp, base_dim, MPI_UNSIGNED, MPI_ANY_SOURCE, ROW_TAG, MPI_COMM_WORLD, &status); source = status.MPI_SOURCE; for(unsigned int i = 0; i < base_dim; ++i) set_matrix(exponents, fact_count, i, buffer_exp[i]); /* Ricevo l'mpz contenente (A + s) */ MPI_Recv(buffer_As, BUFFER_DIM, MPI_UNSIGNED_CHAR, source, AS_TAG, MPI_COMM_WORLD, &status); MPI_Get_count(&status, MPI_UNSIGNED_CHAR, &count); mpz_import(As[fact_count], count, 1, 1, 1, 0, buffer_As); ++fact_count; ++fact_per_rank[source]; if(fact_count % print_fact == 0) { t2 = MPI_Wtime() - t1; printf("#%d/%d in %.6f seconds\n", fact_count, max_fact + base_dim, t2); } } /* Spedisco '1' agli slave per indicare la terminazione */ char stop_signal = '1'; for(unsigned int i = 1; i < comm_size; ++i) MPI_Send(&stop_signal, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD); printf("#Sending stop_signal\n"); printf("#Fattorizzazioni per ranks:\n#"); for(int i = 1; i < comm_size; ++i) printf("%d \t", i); printf("\n#"); for(int i = 1; i < comm_size; ++i) printf("%d \t", fact_per_rank[i]); printf("\n"); return fact_count; }
int TransmitProteinsToChildProcesses() { int numProteins = (int) proteins.size(); vector< simplethread_handle_t > workerHandles; int sourceProcess, batchSize; bool IsFinished = false; Timer searchTime( true ); float totalSearchTime = 0.01f; float lastUpdate = 0.0f; int i = 0; int numChildrenFinished = 0; while( numChildrenFinished < g_numChildren ) { #ifdef MPI_DEBUG cout << g_hostString << " is listening for a child process to offer to search some proteins." << endl; #endif // Listen for a process requesting proteins. // Extract the number of CPUs available on the process. MPI_Recv( &sourceProcess, 1, MPI_INT, MPI_ANY_SOURCE, 0xFF, MPI_COMM_WORLD, &st ); int sourceCPUs = 0; MPI_Recv( &sourceCPUs, 1, MPI_INT, sourceProcess, 0xFF, MPI_COMM_WORLD, &st ); int pOffset = i; // Scale the batchSize with the number of cpus in the requested process. batchSize = min( numProteins-i, g_rtConfig->ProteinBatchSize*sourceCPUs ); stringstream packStream; binary_oarchive packArchive( packStream ); try { packArchive & pOffset; string proteinStream; for( int j = i; j < i + batchSize; ++j ) { proteinStream += ">" + proteins[j].getName() + " " + proteins[j].getDescription() + "\n" + proteins[j].getSequence() + "\n"; } packArchive & proteinStream; } catch( exception& e ) { cerr << g_hostString << " had an error: " << e.what() << endl; exit(1); } #ifdef MPI_DEBUG cout << "Process #" << sourceProcess << " has " << sourceCPUs << " cpus. Sending " << batchSize << " proteins." << endl; #endif if( i < numProteins ) { MPI_Ssend( &batchSize, 1, MPI_INT, sourceProcess, 0x99, MPI_COMM_WORLD ); #ifdef MPI_DEBUG cout << g_hostString << " is sending " << batchSize << " proteins." << endl; Timer sendTime(true); #endif string pack = packStream.str(); int len = (int) pack.length(); MPI_Send( &len, 1, MPI_INT, sourceProcess, 0x00, MPI_COMM_WORLD ); MPI_Send( (void*) pack.c_str(), len, MPI_CHAR, sourceProcess, 0x01, MPI_COMM_WORLD ); #ifdef MPI_DEBUG cout << g_hostString << " finished sending " << batchSize << " proteins; " << sendTime.End() << " seconds elapsed." << endl; #endif i += batchSize; } else { batchSize = 0; MPI_Ssend( &batchSize, 1, MPI_INT, sourceProcess, 0x99, MPI_COMM_WORLD ); #ifdef MPI_DEBUG cout << "Process #" << sourceProcess << " has been informed that all proteins have been searched." << endl; #endif ++numChildrenFinished; } totalSearchTime = searchTime.TimeElapsed(); if( !IsFinished && ( ( totalSearchTime - lastUpdate > g_rtConfig->StatusUpdateFrequency ) || i+1 == numProteins ) ) { if( i+1 == numProteins ) IsFinished = true; float proteinsPerSec = float(i+1) / totalSearchTime; bpt::time_duration estimatedTimeRemaining(0, 0, round((numProteins - i) / proteinsPerSec)); cout << "Searched " << i << " of " << numProteins << " proteins; " << round(proteinsPerSec) << " per second, " << format_date_time("%H:%M:%S", bpt::time_duration(0, 0, round(totalSearchTime))) << " elapsed, " << format_date_time("%H:%M:%S", estimatedTimeRemaining) << " remaining." << endl; lastUpdate = totalSearchTime; } } return 0; }
int ReceiveResultsFromChildProcesses(bool firstBatch = false) { int numSpectra; int sourceProcess; Timer ResultsTime( true ); float totalResultsTime = 0.01f; float lastUpdate = 0.0f; for( int p=0; p < g_numChildren; ++p ) { MPI_Recv( &sourceProcess, 1, MPI_INT, MPI_ANY_SOURCE, 0xEE, MPI_COMM_WORLD, &st ); #ifdef MPI_DEBUG cout << g_hostString << " is receiving search results." << endl; Timer receiveTime(true); #endif string pack; int len; MPI_Recv( &len, 1, MPI_INT, sourceProcess, 0x00, MPI_COMM_WORLD, &st ); pack.resize( len ); MPI_Recv( (void*) pack.data(), len, MPI_CHAR, sourceProcess, 0x01, MPI_COMM_WORLD, &st ); stringstream compressedStream( pack ); stringstream packStream; boost::iostreams::filtering_ostream decompressorStream; decompressorStream.push( boost::iostreams::zlib_decompressor() ); decompressorStream.push( packStream ); boost::iostreams::copy( compressedStream, decompressorStream ); decompressorStream.reset(); binary_iarchive packArchive( packStream ); try { SearchStatistics childSearchStats; packArchive & numSpectra; packArchive & childSearchStats; if(firstBatch) { searchStatistics = searchStatistics + childSearchStats; } else { searchStatistics.numPeptidesGenerated += childSearchStats.numPeptidesGenerated; searchStatistics.numVariantsGenerated += childSearchStats.numVariantsGenerated; searchStatistics.numComparisonsDone += childSearchStats.numComparisonsDone; searchStatistics.numPeptidesSkipped += childSearchStats.numPeptidesSkipped; } //cout << g_hostString << " is unpacking results for " << numSpectra << " spectra." << endl; for( SpectraList::iterator sItr = spectra.begin(); sItr != spectra.end(); ++sItr ) { Spectrum* childSpectrum = new Spectrum; Spectrum* rootSpectrum = *sItr; packArchive & *childSpectrum; rootSpectrum->numTargetComparisons += childSpectrum->numTargetComparisons; rootSpectrum->numDecoyComparisons += childSpectrum->numDecoyComparisons; rootSpectrum->processingTime += childSpectrum->processingTime; rootSpectrum->resultsByCharge.resize(childSpectrum->resultsByCharge.size()); for (size_t z=0; z < childSpectrum->resultsByCharge.size(); ++z) { Spectrum::SearchResultSetType& rootResults = rootSpectrum->resultsByCharge[z]; Spectrum::SearchResultSetType& childResults = childSpectrum->resultsByCharge[z]; BOOST_FOREACH(const Spectrum::SearchResultPtr& result, childResults) rootResults.add( result ); if (childResults.bestFullySpecificTarget().get()) rootResults.add(childResults.bestFullySpecificTarget()); if (childResults.bestFullySpecificDecoy().get()) rootResults.add(childResults.bestFullySpecificDecoy()); if (childResults.bestSemiSpecificTarget().get()) rootResults.add(childResults.bestSemiSpecificTarget()); if (childResults.bestSemiSpecificDecoy().get()) rootResults.add(childResults.bestSemiSpecificDecoy()); if (childResults.bestNonSpecificTarget().get()) rootResults.add(childResults.bestNonSpecificTarget()); if (childResults.bestNonSpecificDecoy().get()) rootResults.add(childResults.bestNonSpecificDecoy()); } for(flat_map<int,int>::iterator itr = childSpectrum->mvhScoreDistribution.begin(); itr != childSpectrum->mvhScoreDistribution.end(); ++itr) rootSpectrum->mvhScoreDistribution[(*itr).first] += (*itr).second; for(flat_map<int,int>::iterator itr = childSpectrum->mzFidelityDistribution.begin(); itr != childSpectrum->mzFidelityDistribution.end(); ++itr) rootSpectrum->mzFidelityDistribution[(*itr).first] += (*itr).second; rootSpectrum->scoreHistogram += childSpectrum->scoreHistogram; delete childSpectrum; } //cout << g_hostString << " is finished unpacking results." << endl; } catch( exception& e ) { cerr << g_hostString << " had an error: " << e.what() << endl; exit(1); } #ifdef MPI_DEBUG cout << g_hostString << " finished receiving " << numSpectra << " search results; " << receiveTime.End() << " seconds elapsed."; #endif totalResultsTime = ResultsTime.TimeElapsed(); if( ( totalResultsTime - lastUpdate > g_rtConfig->StatusUpdateFrequency ) || p+1 == g_numChildren ) { float nodesPerSec = float(p+1) / totalResultsTime; float estimatedTimeRemaining = float(g_numChildren-p-1) / nodesPerSec; cout << "Received results from " << p+1 << " of " << g_numChildren << " worker nodes; " << nodesPerSec << " per second, " << estimatedTimeRemaining << " seconds remaining." << endl; lastUpdate = totalResultsTime; } } return 0; }
void two_d_partitioning(MPI_Comm *comm_new, float *A, int local_rank, int num_procs) { MPI_Status status; int k, i, j, startingRow, endingRow, numRows, startingColumn, endingColumn, numColumns; int n_startingRow, n_startingColumn, n_local_coords[2]; //long double determinant; double start, end, dt; int p = (int) sqrt(num_procs); int dis, left_rank, right_rank, up_rank, down_rank; MPI_Request req; numRows = n / p; numColumns = numRows; startingRow = local_coords[1] * numRows; endingRow = startingRow + numRows; startingColumn = local_coords[0] * numRows; endingColumn = startingColumn + numColumns; start = MPI_Wtime(); for( k = 0; k < n; k++ ) { float Akk[1]; int local_k = k % numRows; // Send A(k,k) to the right start = MPI_Wtime(); if( k >= startingColumn && k < endingColumn && k >= startingRow && k < endingRow ) { send_to(comm_new, 0, A, 1, local_k, local_k, numRows); Akk[0] = A[local_k * numRows + local_k]; } else if( k < startingColumn && k >= startingRow && k < endingRow ) { receive_from_left(comm_new, 0, Akk, 1, 0, 0, numRows, k); } end = MPI_Wtime(); dt = end - start; comm_time += dt; // Now calculate the row start = MPI_Wtime(); if( k >= startingColumn && k < endingColumn && k >= startingRow && k < endingRow ) { for( j = local_k + 1; j < numColumns; j++ ) { A[local_k * numRows + j] /= Akk[0]; } } else if( k >= startingRow && k < endingRow && k < startingColumn ) { for( j = 0; j < numColumns; j++ ) { A[local_k * numRows + j] /= Akk[0]; } } end = MPI_Wtime(); dt = end - start; proc_time += dt; // Now calculate the box int m, bOutside = 1; float top_row[numRows]; start = MPI_Wtime(); // k is West of this Partition if( k >= startingRow && k < endingRow & k < startingColumn ) { send_to(comm_new, 1, A, numColumns, local_k, 0, numRows); for( m = 0; m < numColumns; m++ ) { top_row[m] = A[local_k * numRows + m]; } bOutside = -1; } // k is in this BOX else if( k >= startingRow && k < endingRow && k >= startingColumn && k < endingColumn ) { int size = numColumns - (local_k + 1); if( size != 0 ) { send_to(comm_new, 1, A, size, local_k, local_k + 1, numRows); for( m = 0; m < size; m++ ) { top_row[m] = A[local_k * numRows + local_k + 1 + m]; } bOutside = -1; } } // k is NW of this box else if( k < startingRow && k < startingColumn ) { int sender_row = k / numRows; int sender_column = k / numColumns; int sender_rank = local_coords[0] * sqrt(num_procs) + sender_row; MPI_Recv(top_row, numColumns, MPI_FLOAT, sender_rank, 0, *comm_new, &status); bOutside = -1; } // k is N of this box else if( k < startingRow && k >= startingColumn && k < endingColumn ) { int sender_row = k / numRows; int sender_column = k / numColumns; int sender_rank = sender_column * sqrt(num_procs) + sender_row; int size = numColumns - (local_k + 1); if( size != 0 ) { //top_row = (float *)malloc(sizeof(float) * numberToReceive); //printf("%d Waiting to receive from:%d\n", local_rank, sender_rank); MPI_Recv(top_row, size, MPI_FLOAT, sender_rank, 0, *comm_new, &status); bOutside = -1; } } float left_row[numRows]; // k is N of this Box if( k >= startingColumn && k < endingColumn & k < startingRow ) { for(m = 0; m < numRows; m++ ) { left_row[m] = A[m * numColumns + local_k]; } send_to(comm_new, 0, left_row, numRows, 0, 0, 0); bOutside = -1; } // k is IN this box else if( k >= startingRow && k < endingRow && k >= startingColumn && k < endingColumn ) { //int local_k = k % numRows; int size = numColumns - (local_k + 1); if( size != 0 ) { for(m = 0; m < size; m++ ) { left_row[m] = A[(local_k + 1) * numColumns + local_k]; } send_to(comm_new, 0, left_row, size, 0, 0, 0); bOutside = -1; } } // k is SW from this box else if( k < startingRow && k < startingColumn ) { int sender_row = k / numRows; int sender_column = k / numColumns; int sender_rank = sender_column * sqrt(num_procs) + local_coords[1]; MPI_Recv(left_row, numColumns, MPI_FLOAT, sender_rank, 0, *comm_new, &status); bOutside = -1; } // k is W of this box else if( k < startingColumn && k >= startingRow && k < endingRow ) { int sender_row = k / numRows; int sender_column = k / numColumns; int sender_rank = sender_column * sqrt(num_procs) + local_coords[1]; int local_k = k % numRows; int numberToReceive = numColumns - (local_k + 1); if( numberToReceive != 0 ) { MPI_Recv(left_row, numberToReceive, MPI_FLOAT, sender_rank, 0, *comm_new, &status); bOutside = -1; } } end = MPI_Wtime(); dt = end - start; comm_time += dt; // Now process the box if( bOutside < 0 ) { start = MPI_Wtime(); process_row_and_column(A, left_row, top_row, k, startingRow, endingRow, startingColumn, endingColumn, numRows, numColumns, local_k); end = MPI_Wtime(); dt = end - start; proc_time += dt; } } // end for float determinant[1]; float result[1]; determinant[0] = 1; if( local_coords[0] == local_coords[1] ) { start = MPI_Wtime(); for(i = 0; i < numRows; i++ ) { determinant[0] *= A[i * numRows + i]; } end = MPI_Wtime(); dt = end - start; proc_time += dt; } start = MPI_Wtime(); MPI_Reduce(determinant, result, 1, MPI_FLOAT, MPI_PROD, 0, *comm_new); end = MPI_Wtime(); dt = end - start; comm_time += dt; if( !computerStats && local_rank == 0 ) { printf("Determinant is %f\n", result[0]); } }
int TransmitUnpreparedSpectraToChildProcesses() { int numSpectra = (int) spectra.size(); int sourceProcess, batchSize; bool IsFinished = false; Timer PrepareTime( true ); float totalPrepareTime = 0.01f; float lastUpdate = 0.0f; int i = 0; int numChildrenFinished = 0; while( numChildrenFinished < g_numChildren ) { stringstream packStream; binary_oarchive packArchive( packStream ); // For every batch, listen for a worker process that is ready to receive it #ifdef MPI_DEBUG cout << g_hostString << " is listening for a child process to offer to prepare some spectra." << endl; #endif if( i < numSpectra ) { batchSize = min( numSpectra-i, g_rtConfig->SpectraBatchSize ); try { packArchive & batchSize; SpectraList::iterator sItr = spectra.begin(); advance( sItr, i ); for( int j = i; j < i + batchSize; ++j, ++sItr ) { packArchive & **sItr; } } catch( exception& e ) { cerr << g_hostString << " had an error: " << e.what() << endl; exit(1); } i += batchSize; } else { batchSize = 0; packArchive & batchSize; #ifdef MPI_DEBUG cout << "Process #" << sourceProcess << " has been informed that preparation is complete." << endl; #endif ++numChildrenFinished; } MPI_Recv( &sourceProcess, 1, MPI_INT, MPI_ANY_SOURCE, 0xFF, MPI_COMM_WORLD, &st ); stringstream compressedStream; boost::iostreams::filtering_ostream compressorStream; compressorStream.push( boost::iostreams::zlib_compressor() ); compressorStream.push( compressedStream ); boost::iostreams::copy( packStream, compressorStream ); compressorStream.reset(); string pack = compressedStream.str(); int len = (int) pack.length(); MPI_Send( &len, 1, MPI_INT, sourceProcess, 0x00, MPI_COMM_WORLD ); MPI_Send( (void*) pack.c_str(), len, MPI_CHAR, sourceProcess, 0x01, MPI_COMM_WORLD ); totalPrepareTime = PrepareTime.TimeElapsed(); if( !IsFinished && ( ( totalPrepareTime - lastUpdate > g_rtConfig->StatusUpdateFrequency ) || i == numSpectra ) ) { if( i == numSpectra ) IsFinished = true; float spectraPerSec = float(i) / totalPrepareTime; float estimatedTimeRemaining = float(numSpectra-i) / spectraPerSec; cout << "Prepared " << i << " of " << numSpectra << " spectra; " << spectraPerSec << " per second, " << estimatedTimeRemaining << " seconds remaining." << endl; lastUpdate = totalPrepareTime; } } return 0; }
int main(int argc, char **argv) { int proc_id, n_procs, i, envios; int buffer0[SIZE0], buffer1[SIZE1], buffer2[SIZE2], buffer3[SIZE3], buffer4[SIZE4]; int buffer5[SIZE5], buffer6[SIZE6], buffer7[SIZE7], buffer8[SIZE8], buffer9[SIZE9]; MPI_Status estado; /* Inicio del entorno MPI */ MPI_Init (&argc, &argv); /* Obtener rango y tamaño del comm_world */ MPI_Comm_rank(MPI_COMM_WORLD, &proc_id); MPI_Comm_size(MPI_COMM_WORLD, &n_procs); if (n_procs != 2) { printf("## Este programa utiliza 2 procesos\n"); exit(EXIT_FAILURE); } /* Sincronizar a todos los procesos */ MPI_Barrier(MPI_COMM_WORLD); /* Unicamente el maestro imprime en pantalla los datos iniciales */ if (proc_id == MASTER) { system("clear"); printf("## Cálculo de RTT / Envío Maestro-Esclavo\n\n"); printf("## Total de procesadores: %d\n", n_procs); obtener_info_sist(); /* Leer de stdin la cantidad de envíos a hacer */ do { printf("%s", MENU_ENVIOS); scanf("%d", &envios); } while ((envios != 1) && (envios != 2) && (envios != 3)); switch (envios) { case 1: envios = 100; break; case 2: envios = 1000; break; case 3: envios = 10000; break; default: break; } /* Enviar dato a los procesos */ MPI_Send(&envios, sizeof(envios), MPI_INT, ESCLAVO, 0, MPI_COMM_WORLD); printf("## Comienzo del envío con: %d envios;\n", envios); printf("## Tamaño de datos:\n"); printf("## \tSIZE0 = %d\n", SIZE0); printf("## \tSIZE1 = %d\n", SIZE1); printf("## \tSIZE2 = %d\n", SIZE2); printf("## \tSIZE3 = %d\n", SIZE3); printf("## \tSIZE4 = %d\n", SIZE4); printf("## \tSIZE5 = %d\n", SIZE5); printf("## \tSIZE6 = %d\n", SIZE6); printf("## \tSIZE7 = %d\n", SIZE7); printf("## \tSIZE8 = %d\n", SIZE8); printf("## \tSIZE9 = %d\n", SIZE9); printf("\n"); } MPI_Barrier(MPI_COMM_WORLD); if (proc_id == MASTER) { printf("## | tam (b) | envios | t_total (seg) | t/envio (s) | kB/s |\n"); printf("## |---------|--------|---------------|--------------|------------|\n"); master_func(SIZE0, envios, estado); master_func(SIZE1, envios, estado); master_func(SIZE2, envios, estado); master_func(SIZE3, envios, estado); master_func(SIZE4, envios, estado); master_func(SIZE5, envios, estado); master_func(SIZE6, envios, estado); master_func(SIZE7, envios, estado); master_func(SIZE8, envios, estado); master_func(SIZE9, envios, estado); printf("## |_________|________|_______________|______________|____________|\n"); } else { /* Primero, recibir del maestro el dato de envios */ MPI_Recv(&envios, sizeof(envios), MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); /* Ahora realizar el recibo y envío con esa cantidad */ for(i = 0; i < envios; ++i) { MPI_Recv(buffer0, SIZE0, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer0[0] += 1; MPI_Send(buffer0, SIZE0, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer1, SIZE1, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer1[0] += 1; MPI_Send(buffer1, SIZE1, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer2, SIZE2, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer2[0] += 1; MPI_Send(buffer2, SIZE2, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer3, SIZE3, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer3[0] += 1; MPI_Send(buffer3, SIZE3, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer4, SIZE4, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer4[0] += 1; MPI_Send(buffer4, SIZE4, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer5, SIZE5, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer5[0] += 1; MPI_Send(buffer5, SIZE5, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer6, SIZE6, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer6[0] += 1; MPI_Send(buffer6, SIZE6, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer7, SIZE7, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer7[0] += 1; MPI_Send(buffer7, SIZE7, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer8, SIZE8, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer8[0] += 1; MPI_Send(buffer8, SIZE8, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } for(i = 0; i < envios; ++i) { MPI_Recv(buffer9, SIZE9, MPI_INT, MASTER, 0, MPI_COMM_WORLD, &estado); buffer9[0] += 1; MPI_Send(buffer9, SIZE9, MPI_INT, MASTER, 0, MPI_COMM_WORLD); } } /* Terminar entorno */ MPI_Finalize (); exit(EXIT_SUCCESS); }
//*************************************************************************************************************** int Bellerophon::getChimeras() { try { //create breaking points vector<int> midpoints; midpoints.resize(iters, window); for (int i = 1; i < iters; i++) { midpoints[i] = midpoints[i-1] + increment; } #ifdef USE_MPI int pid, numSeqsPerProcessor; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); numSeqsPerProcessor = iters / processors; //each process hits this only once unsigned long long startPos = pid * numSeqsPerProcessor; if(pid == processors - 1){ numSeqsPerProcessor = iters - pid * numSeqsPerProcessor; } lines.push_back(linePair(startPos, numSeqsPerProcessor)); //fill pref with scores driverChimeras(midpoints, lines[0]); if (m->control_pressed) { return 0; } //each process must send its parts back to pid 0 if (pid == 0) { //receive results for (int j = 1; j < processors; j++) { vector<string> MPIBestSend; for (int i = 0; i < numSeqs; i++) { if (m->control_pressed) { return 0; } MPI_Status status; //receive string int length; MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status); char* buf = new char[length]; MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status); string temp = buf; if (temp.length() > length) { temp = temp.substr(0, length); } delete buf; MPIBestSend.push_back(temp); } fillPref(j, MPIBestSend); if (m->control_pressed) { return 0; } } }else { //takes best window for each sequence and turns Preference to string that can be parsed by pid 0. //played with this a bit, but it may be better to try user-defined datatypes with set string lengths?? vector<string> MPIBestSend = getBestWindow(lines[0]); pref.clear(); //send your result to parent for (int i = 0; i < numSeqs; i++) { if (m->control_pressed) { return 0; } int bestLength = MPIBestSend[i].length(); char* buf = new char[bestLength]; memcpy(buf, MPIBestSend[i].c_str(), bestLength); MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD); delete buf; } MPIBestSend.clear(); } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //divide breakpoints between processors #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ lines.push_back(linePair(0, iters)); //fill pref with scores driverChimeras(midpoints, lines[0]); }else{ int numSeqsPerProcessor = iters / processors; for (int i = 0; i < processors; i++) { unsigned long long startPos = i * numSeqsPerProcessor; if(i == processors - 1){ numSeqsPerProcessor = iters - i * numSeqsPerProcessor; } lines.push_back(linePair(startPos, numSeqsPerProcessor)); } createProcesses(midpoints); } #else lines.push_back(linePair(0, iters)); ///fill pref with scores driverChimeras(midpoints, lines[0]); #endif #endif return 0; } catch(exception& e) { m->errorOut(e, "Bellerophon", "getChimeras"); exit(1); } }
//removes the lowest energy vertical seam from the image void removeHorizontalSeam() { double energies[3]; double min_energy; int prev_x; int prev_y; // split up work between processes double *my_path_costs; double *my_previous_x; double *my_previous_y; double *temp_path_costs; double *temp_previous_x; double *temp_previous_y; int my_rows = current_height / numprocs; int low_rows = my_rows; int extra_rows = current_height % numprocs; int start; int y_offset; int recv_rows; double top_end_cost, bottom_end_cost, temp_end_cost; if (rank < extra_rows) { my_rows++; start = rank * my_rows; } else { start = (extra_rows * (my_rows + 1)) + ((rank - extra_rows) * my_rows); } my_path_costs = (double *) malloc(my_rows * current_width * sizeof(double)); my_previous_x = (double *) malloc(my_rows * current_width * sizeof(double)); my_previous_y = (double *) malloc(my_rows * current_width * sizeof(double)); //find the lowest cost seam by computing the lowest cost paths to each pixel for (int x = 0; x < current_width; x++) { //compute the path costs for my rows for (int y = start; y < start + my_rows; y++) { if (x == 0) { path_costs[x * initial_height + y] = image_energy[x * initial_height + y]; my_path_costs[(y - start) * current_width + x] = path_costs[x * initial_height + y]; previous_x[x * initial_height + y] = -1; my_previous_x[(y - start) * current_width + x] = previous_x[x * initial_height + y]; previous_y[x * initial_height + y] = -1; my_previous_y[(y - start) * current_width + x] = previous_y[x * initial_height + y]; } else { //the pixel directly left energies[1] = path_costs[(x - 1) * initial_height + y]; //pixel left and above if (y != 0) { energies[0] = path_costs[(x - 1) * initial_height + y - 1]; } else { energies[0] = DBL_MAX; } //pixel left and below if (y != current_height - 1) { energies[2] = path_costs[(x - 1) * initial_height + y + 1]; } else { energies[2] = DBL_MAX; } //find the one with the least path cost min_energy = energies[0]; prev_x = x - 1; prev_y = y - 1; if (energies[1] < min_energy) { min_energy = energies[1]; prev_y = y; } if (energies[2] < min_energy) { min_energy = energies[2]; prev_y = y + 1; } //set the minimum path cost for this pixel path_costs[x * initial_height + y] = min_energy + image_energy[x * initial_height + y]; my_path_costs[(y - start) * current_width + x] = path_costs[x * initial_height + y]; //set the previous pixel on the minimum path's coordinates for this pixel previous_x[x * initial_height + y] = prev_x; my_previous_x[(y - start) * current_width + x] = previous_x[x * initial_height + y]; previous_y[x * initial_height + y] = prev_y; my_previous_y[(y - start) * current_width + x] = previous_y[x * initial_height + y]; } } //send path cost needed to neighboring processes if (numprocs > 1) { if (rank != numprocs - 1) { //send bottom most cost to following process bottom_end_cost = path_costs[x * initial_height + (start + my_rows - 1)]; MPI_Send(&bottom_end_cost, 1, MPI_DOUBLE, rank + 1, 0, MPI_COMM_WORLD); //receive following process's top most cost MPI_Recv(&temp_end_cost, 1, MPI_DOUBLE, rank + 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); path_costs[x * initial_height + (start + my_rows)] = temp_end_cost; } if (rank != 0) { //send top most cost to preceding process top_end_cost = path_costs[x * initial_height + start]; MPI_Send(&top_end_cost, 1, MPI_DOUBLE, rank - 1, 0, MPI_COMM_WORLD); //receive preceding process's bottom most cost MPI_Recv(&temp_end_cost, 1, MPI_DOUBLE, rank - 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); path_costs[x * initial_height + (start - 1)] = temp_end_cost; } } } //update paths costs for all processes for (int i = 0; i < numprocs; i++) { if (rank == i) { continue; } if (i < extra_rows) { y_offset = i * (low_rows + 1); recv_rows = low_rows + 1; } else { y_offset = (extra_rows * (low_rows + 1)) + ((i - extra_rows) * low_rows); recv_rows = low_rows; } //printf("%d %d\n", low_rows, extra_rows); //printf("%d %d %d\n", rank, y_offset, recv_rows); temp_path_costs = (double *) malloc(recv_rows * current_width * sizeof(double)); temp_previous_x = (double *) malloc(recv_rows * current_width * sizeof(double)); temp_previous_y = (double *) malloc(recv_rows * current_width * sizeof(double)); MPI_Sendrecv(my_path_costs, my_rows * current_width, MPI_DOUBLE, i, 0, temp_path_costs, recv_rows * current_width, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(my_previous_x, my_rows * current_width, MPI_DOUBLE, i, 1, temp_previous_x, recv_rows * current_width, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(my_previous_y, my_rows * current_width, MPI_DOUBLE, i, 2, temp_previous_y, recv_rows * current_width, MPI_DOUBLE, i, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); for (int j = 0; j < recv_rows; j++) { int x = j / recv_rows; int y = y_offset + (j % recv_rows); //printf("%d %d %d %d %d\n", rank, x, y, x * initial_height + y, recv_rows * current_width); //printf("%d\n", initial_height * initial_width); path_costs[x * initial_height + y] = temp_path_costs[(y - y_offset) * current_width + x]; previous_x[x * initial_height + y] = temp_previous_x[(y - y_offset) * current_width + x]; previous_y[x * initial_height + y] = temp_previous_y[(y - y_offset) * current_width + x]; } free(temp_path_costs); free(temp_previous_x); free(temp_previous_y); } free(my_path_costs); free(my_previous_x); free(my_previous_y); //find the ycoord the lowest cost seam starts at the right of the current image int y_coord = 0; for (int y = 0; y < current_height; y++) { if (path_costs[(current_width - 1) * initial_height + y] < path_costs[(current_width - 1) * initial_height + y_coord]) { y_coord = y; } } //delete the seam from right to left for (int x = current_width - 1; x >= 0; x--) { //delete this pixel by copying over it and all those following to the bottom for (int y = y_coord; y < current_height - 1; y++) { image[x * initial_height + y] = image[x * initial_height + y + 1]; } //next pixel y_coord = previous_y[x * initial_height + y_coord]; } //decrease the current height of the image current_height--; }
int32_t main(int32_t argc, char *argv[]) { int32_t rankID = 0, nRanks = 1; char rankName[MAX_STRING_LENGTH]; gethostname(rankName, MAX_STRING_LENGTH); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rankID); MPI_Comm_size(MPI_COMM_WORLD, &nRanks); char *target_path = argv[1], *target = NULL; int64_t target_length = 0; target_length = get_filesize(target_path); if(target_length < 0) { printf("\nError: Cannot read target file [ %s ]\n", target_path); MPI_Finalize(); exit(-1); } if(rankID == 0) { printf("--------------------------------------------------\n"); printf("- Read target: [ %s ]\n", target_path); target = (char*)malloc(sizeof(char)*target_length); double read_time = 0; read_time -= MPI_Wtime(); read_targetfile(target, target_length, target_path); read_time += MPI_Wtime(); printf("- Target length: %ld (read time: %lf secs)\n", target_length, read_time); printf("--------------------------------------------------\n"); } char *pattern = argv[2]; int64_t pattern_length = 0; if(pattern == NULL) { printf("\nError: Cannot read pattern [ %s ]\n", pattern); free(target); MPI_Finalize(); exit(-1); } pattern_length = strlen(pattern); if(rankID == 0) { printf("- Pattern: [ %s ]\n", pattern); printf("- Pattern length: %ld\n", pattern_length); printf("--------------------------------------------------\n"); } int32_t* BCS = (int32_t*)malloc(ALPHABET_LEN * sizeof(int32_t)); int32_t* GSS = (int32_t*)malloc(pattern_length * sizeof(int32_t));; make_BCS(BCS, pattern, pattern_length); make_GSS(GSS, pattern, pattern_length); int64_t found_count = 0; double search_time = 0; if(rankID == 0) { search_time -= MPI_Wtime(); } // DO NOT EDIT UPPER CODE // //==============================================================================================================// int64_t mpi_found_count = 0; char* chunk = NULL; if(argv[3] == NULL) { printf("\nError: Check chunk size [ %s ]\n", argv[3]); free(target); free(BCS); free(GSS); MPI_Finalize(); exit(-1); } if(rankID == 0) printf("\ttarget_length = %ld\n", target_length); int64_t nChunksPerRank = atoi(argv[3]); //각 rank에 몇개의 문자열 덩어리를 줄것인가 결정 int64_t nTotalChunks = (nRanks-1) * nChunksPerRank; //rank 0은 검사하지않으므로 nRanks - 1 //문자열은 총 nTotalChunks개로 쪼개진다. if(rankID == 0) printf("\tnTotalChunks = %ld\n", nTotalChunks); int64_t overlap_length = (pattern_length - 1) * (nTotalChunks - 1); //쪼개진 덩어리 중 마지막 1개는 겹치는 부분이 없으므로 nTotalChunks - 1 //문자열은 최악의 경우 pattern의 첫글자가 하나의 코어 //나머지 글자가 하나의 코어에 분배되는 경우이므로 pattern_length - 1 if(rankID == 0) printf("\toverlap_length = %ld\n", overlap_length); int64_t quotient = (target_length + overlap_length) / nTotalChunks; //각 코어당 최악의 경우를 방지하기 위해 덩어리마다 pattern_length - 1을 추가 //즉 target_length + overlap_length가 되고 이를 정해진 nChunksPerRank씩 //각 코어에 분배하기 위하여 nTotalChunks로 나누어 준다. if(rankID == 0) printf("\tquotient = %ld\n", quotient); int64_t remainder = (target_length + overlap_length) - (quotient * nTotalChunks); //나누는 경우에 나누어 떨어지지 않는 경우가 있으므로 나머지를 따로 처리해준다. if(rankID == 0) printf("\tremainder = %ld\n\n", remainder); int64_t chunkID = 0; int64_t* chunk_length = (int64_t*)malloc((nTotalChunks+1)*sizeof(int64_t)); int64_t* chunk_start_idx = (int64_t*)malloc((nTotalChunks+1)*sizeof(int64_t)); //remainder의 경우를 위해 nTotalChunks에 + 1 을 한다. int64_t i; for(i=0; i<nTotalChunks; i++) chunk_length[i] = quotient; for(i=0; i<remainder; i++) chunk_length[i] += 1; chunk_start_idx[0] = 0; for(i=1; i<nTotalChunks; i++) chunk_start_idx[i] = chunk_start_idx[i-1] + chunk_length[i-1] - (pattern_length-1); //마지막에 - (pattern_length - 1) 을 해줌으로서 첫 번째 chunk를 제외하고 //모든 chunk는 이전 chunk의 마지막 문자열의 -4번째 포인터를 chunk_start_idx로 가진다. //따라서 첫번째 chunk를 제외한 모든 chunk 이전 chunk의 마지막 4글자를 무조건 포함한다. chunk_start_idx[nTotalChunks] = 0; chunk_length[nTotalChunks] = 0; //chunk가 끝났다는 것을 표시하기 위해 nTotalChunk + 1번째 chunk의 //start idx 와 length는 모두 0으로 지정한다. MPI_Request MPI_req[2]; MPI_Status MPI_stat[2]; int32_t MPI_tag = 0; int32_t request_rankID = -1; if(rankID == 0) { int64_t nFinishRanks = 0; while(nFinishRanks < nRanks-1) { MPI_Recv(&request_rankID, 1, MPI_INT32_T, MPI_ANY_SOURCE, MPI_tag, MPI_COMM_WORLD, &MPI_stat[0]); //Rank 0 은 다른 Rank들의 송신을 기다린다. //MPI_stat[0]로 다른 Rank가 이제 일을 시작한다는 것을 확인한다. MPI_Isend(&target[chunk_start_idx[chunkID]], chunk_length[chunkID], MPI_CHAR, request_rankID, chunkID, MPI_COMM_WORLD, &MPI_req[1]); //위에서 각 rank가 할당 받는 chunk_length의 길이를 구했었다. //chunkID를 tag로 받는 것에 주의할 것 //파일 전체를 읽어서(target) rank 마다 검사할 위치의 시작점을 정해주고 다시 해당 rank에 전송한다. //파일 전체 위치에서 chunk_length[chunkID]길이 만큼만 보낸다는 것을 주의할 것 //req[1]을 보냄으로서 검사를 시작해라는 요청을 보낸다. printf("\trequest_rankID = %d\n", request_rankID); printf("\tchunkID = %ld\n", chunkID); printf("\tchunk_start_idx[chunkID] = %ld\n", chunk_start_idx[chunkID]); printf("\ttarget[chunk_start_idx[chunkID] = %c\n\n", target[chunk_start_idx[chunkID]]); if(chunkID < nTotalChunks) chunkID++; else nFinishRanks++; } } else { chunk = (char *)malloc(chunk_length[0] * sizeof(char)); //chunk 는 문자열임을 기억한다. int64_t chunk_found_count = 0; int64_t call_count = 0; while(chunkID < nTotalChunks) { //선언될 때 chunkID = 0인 상태이다. MPI_Isend(&rankID, 1, MPI_INT32_T, 0, MPI_tag, MPI_COMM_WORLD, &MPI_req[0]); //Rank 0에게 현재 어떤 Rank가 일을 하는지 알려준다. //MPI_requ[0]은 검사를 준비하는 상태라는 의미이다. MPI_Recv(chunk, chunk_length[0], MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &MPI_stat[1]); //각 rank가 부여받은 chunk 사이즈의 크기는 균일하게 나눈 뒤 나머지를 각 chunk에 1씩 더해 주었으므로 //받는 크기는 chunk_length[0]으로 고정한다. chunkID = MPI_stat[1].MPI_TAG; printf("\trank = %d chunk = %s chunkID = %d\n", rankID, chunk, chunkID); if(chunkID < nTotalChunks) { chunk_found_count = do_search(chunk, target_length, 0, chunk_length[chunkID], pattern, pattern_length, BCS, GSS); if(found_count < 0) { free(chunk); free(BCS); free(GSS); free(chunk_length); free(chunk_start_idx); MPI_Finalize(); exit(-1); } mpi_found_count += chunk_found_count; call_count++; } } printf("- [%02d: %s] call_count: %ld\n", rankID, rankName, call_count); } MPI_Reduce(&mpi_found_count, &found_count, 1, MPI_INT64_T, MPI_SUM, 0, MPI_COMM_WORLD); //Rank 0 의 mpi_found_count 변수에 각 프로세서들의 mpi_found_count값을 더해서 모은다. free(chunk); free(chunk_length); free(chunk_start_idx); //==============================================================================================================// // DO NOT EDIT LOWER CODE // if(rankID == 0) { search_time += MPI_Wtime(); printf("- Found_count: %ld\n", found_count); printf("--------------------------------------------------\n"); printf("- Time: %lf secs\n", search_time); printf("--------------------------------------------------\n"); } free(target); free(BCS); free(GSS); MPI_Finalize(); return 0; }
void master(int argc, char *argv[], int *nprocs) { FILE *fp; fp = OpenFile(argc, argv); int N, logical_time = 0; int vector_time[N]; int pid, pid1, pid2; int i; int exec_count; int send_count; int recv_count; int end_count; char *read_track; char strline[999]; // each line of the string fgets(strline,sizeof(strline),fp); // get the first line N = atoi(strline); // reads number of processes as N if((*nprocs) != N + 1) // number of processes doesn't match { fprintf(stderr, "We need %d processes for this simulation.\nTherefore, usage: mpirun -np %d %s %s\n", N+1, N+1, argv[0], argv[1]); fclose(fp); exit(0); } MPI_Status status; printf("-----There are %d processes in the system-----\n", N); // read the file from second line until EOF while(fgets(strline,sizeof(strline),fp) != NULL) { if(strline[strlen(strline)-1] == '\n') strline[strlen(strline)-1] = 0; // event type, which is delimited by space read_track = strtok(strline, " "); if (strcmp (read_track,"exec") == 0) { // exec event // read the process number read_track = strtok (NULL," "); // assign it to pid pid = atoi(read_track); // inform the pid to have exec event MPI_Send(NULL, 0, MPI_INT, pid, 11, MPI_COMM_WORLD); //tag 11 for exec event // wait for the acknowledgement from pid MPI_Recv(&logical_time, 1, MPI_INT, pid, 100, MPI_COMM_WORLD, &status); //tag 100 for acknowledgement // print out the msg and logical time printf( "Execution event in process %d\n", pid ); printf( "Logical time at process %d is %d\n", pid, logical_time ); } else if (strcmp (read_track,"send") == 0) { // send event // read the sending process number read_track = strtok (NULL," "); // assign it to pid1 pid1 = atoi(read_track); // read the recving process number read_track = strtok (NULL,"\""); // assign it to pid2 pid2 = atoi(read_track); // read the message read_track = strtok (NULL,"\""); // inform the pid1 to have send event MPI_Send(&pid2, 1, MPI_INT, pid1, 22, MPI_COMM_WORLD); // tag 22 for send event // inform the pid2 to have recv event MPI_Send(&pid1, 1, MPI_INT, pid2, 33, MPI_COMM_WORLD); // tag 33 for recv event // wait for the acknowledgement from pid1 MPI_Recv(&logical_time, 1, MPI_INT, pid1, 100, MPI_COMM_WORLD, &status); // tag 100 for acknowledgement // print out the msg and logical time for sending printf( "Message sent from process %d to process %d: %s\n", pid1, pid2, read_track ); printf( "Logical time at sender process %d is %d\n", pid1, logical_time ); // wait for the acknowledgement from pid2 MPI_Recv(&logical_time, 1, MPI_INT, pid2, 100, MPI_COMM_WORLD, &status); // tag 100 for acknowledgement // print out the msg and logical time for recving printf( "Message received from process %d by process %d: %s\n", pid1, pid2, read_track ); printf( "Logical time at receiver process %d is %d\n", pid2, logical_time ); } else if (strcmp (read_track,"end") == 0) { //end event printf("\n-----Logical time will be printed in ascending task ID order...-----\n"); // collect the logical time for every slaves for(i = 1; i <= N; i++) { // inform each slave to send back the final logical time MPI_Send(NULL, 0, MPI_INT, i, 99, MPI_COMM_WORLD); // tag 99 for end event // wait for the acknowledgement from each slave MPI_Recv(&logical_time, 1, MPI_INT, i, 99, MPI_COMM_WORLD, &status); // tag 99 for end event // print them in ascending task ID order printf("task ID: %d --- logical time: %d\n", i, logical_time); } break; } else { // other event handler printf("This simulation only supports two event types: exec and send\n"); fclose(fp); exit(0); } } //end while fclose(fp); }
int main(int argc, char *argv[]) { int size, rank; int N; struct timeval *start_time = NULL, *stop_time = NULL; double seconds = 0; int i, j; char *buffer; int check; srandom(RAND_SEED); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (0 == rank) { start_time = (struct timeval *) malloc(sizeof(struct timeval)); stop_time = (struct timeval *) malloc(sizeof(struct timeval)); } for (N = N_START; N <= N_STOP; N = N_NEXT) { buffer = malloc(sizeof(char) * N); if (0 == rank) { for (j = 0; j < N; j++) { buffer[j] = (char) (random() % 256); } if (-1 == gettimeofday(start_time, NULL)) { printf("couldn't set start_time on node 0!\n"); MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); exit(EXIT_FAILURE); } } for (i = 0; i < ITER; i++) { MPI_Bcast(buffer, N, MPI_BYTE, 0, MPI_COMM_WORLD); if (0 == rank) { for (j = 1; j < size; j++) { MPI_Recv(&check, 1, MPI_INT, MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else { MPI_Send(&rank, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); } } if (0 == rank) { if (-1 == gettimeofday(stop_time, NULL)) { printf("couldn't set start_time on node 0!\n"); MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR); exit(EXIT_FAILURE); } seconds = (double) (stop_time->tv_sec - start_time->tv_sec) + (double) (stop_time->tv_usec - start_time->tv_usec) / ONE_MILLION; } free(buffer); if (0 == rank) { printf("N: %10d, iter: %d, time: %10f s, avg rate: %12f Mbps\n", N, ITER, seconds, ((double) N * ITER * 8) / (1024.0 * 1024.0 * seconds)); } } if (0 == rank) { free(start_time); free(stop_time); } MPI_Finalize(); return 0; }
void LU_decomp(struct problem *info, struct fmatrix *X, int *reorder, MPI_Datatype pivot_type, MPI_Op best_pivot_op) { MPI_Request req_spiv, req_sa, req_sm; MPI_Status status; number_type *m = malloc(info->blksz * sizeof(*m)); int diag; for (diag = 0; diag < info->n; diag++) { /* we do partial pivoting, so the proc with the pivot is on this column: */ int pivot_h = diag / info->blksz; int r, c, i; double start_time = MPI_Wtime(); double start_time2; struct pivot pivot = { -1, 0. }; /* choose pivot across the column */ if (info->coords[HDIM] == pivot_h) { /* column with pivot in block */ int pivot_c = diag % info->blksz; /* Argo doesn't want aliasing in allreduce */ struct pivot pivot_cand = { -1, 0. }; for (i = 0; i < info->blksz; i++) { if (reorder[i] > diag && fabs(CELL(X, i, pivot_c)) > fabs(pivot_cand.value)) { pivot_cand.row = info->blksz*info->coords[VDIM] + i; pivot_cand.value = CELL(X, i, pivot_c); } } start_time2 = MPI_Wtime(); MPI_Allreduce(&pivot_cand, &pivot, 1, pivot_type, best_pivot_op, info->vcomm); pivot_allr_time += MPI_Wtime() - start_time2; } /* broadcast pivot choice across row towards the right */ start_time2 = MPI_Wtime(); pipeline_right(info, pivot_h, &pivot, 1, pivot_type, 45, &req_spiv); pivot_bcast_time += MPI_Wtime() - start_time2; pivot_time += MPI_Wtime() - start_time; /* find rank of proc with pivot on the vertical communicator */ int pivot_v = pivot.row / info->blksz; /* fill in reorder */ if (info->coords[VDIM] == pivot_v) { reorder[pivot.row % info->blksz] = diag; } /* calculate and distribute the ms */ for (r = 0; r < info->blksz; r++) { if (reorder[r] > diag) { if (info->coords[HDIM] == pivot_h) { int pivot_c = diag % info->blksz; m[r] = CELL(X, r, pivot_c) / pivot.value; CELL(X, r, pivot_c) = m[r]; } /* broadcast m towards right */ start_time = MPI_Wtime(); pipeline_right(info, pivot_h, &m[r], 1, MPI_number_type, 64, &req_sm); m_bcast_time += MPI_Wtime() - start_time; } } /* distribute the pivot row and eliminate */ int startc = 0; if (info->coords[HDIM] == pivot_h) startc = (diag+1) % info->blksz; if (info->coords[HDIM] < pivot_h) startc = info->blksz; /* elimination */ for (c = startc; c < info->blksz; c++) { number_type a; if (info->coords[VDIM] == pivot_v) { a = CELL(X, pivot.row % info->blksz, c); } start_time = MPI_Wtime(); int up = (info->coords[VDIM]+info->sqp-1)%info->sqp; int down = (info->coords[VDIM]+1)%info->sqp; if (info->coords[VDIM] != pivot_v) { MPI_Recv(&a, 1, MPI_number_type, up, 78, info->vcomm, &status); } if (down != pivot_v) { MPI_Isend(&a, 1, MPI_number_type, down, 78, info->vcomm, &req_sa); } a_bcast_time += MPI_Wtime() - start_time; for (r = 0; r < info->blksz; r++) { if (reorder[r] > diag) { CELL(X, r,c) -= m[r]*a; } } if (down != pivot_v) MPI_Wait(&req_sa, &status); } } }
int main(int argc, char** argv) { //Number of CPUs int numProcs; //Processor ID int rank; //The status of our receiver MPI_Status status; //Init MPI, Starts the parallelization sort of. MPI_Init(&argc, &argv); //Finds out how many CPUs are in our network MPI_Comm_size(MPI_COMM_WORLD, &numProcs); //Determines the rank of a process MPI_Comm_rank(MPI_COMM_WORLD, &rank); //Height and width of image will be passed in. int height = atoi(argv[1]); int width = atoi(argv[2]); Complex num; struct timeval start; double time = 0.0; //Mandelbrot Set will have lie in this plane. //X range float realMax = 2.0; float realMin = -2.0; //Y range float imagMax = 2.0; float imagMin = -2.0; //Scale the image so that it can be seen at the give resolution. float scaleX = (realMax - realMin) / width; float scaleY = (imagMax - imagMin) / height; //Number of slaves int numGroups = numProcs - 1; //Number of remaining rows after even partitions for slave. int remainder = height % numGroups; //How height those partitions are. int grpHeight = (height - remainder) / numGroups; //The area of our partition int partArea = grpHeight * width; //Image array unsigned int* image = (unsigned int *) malloc(sizeof(unsigned int) * height * width); unsigned int* buffer = (unsigned int *) malloc(sizeof(unsigned int) * (width + 10)); int DATA_TAG = 0; int TERMINATE = 1; MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { int count = 0; int row = 0; //Starting the clock gettimeofday(&start, NULL); for (int proc = 1; proc < numProcs; proc++) { MPI_Send(&row, 1, MPI_INT, proc, DATA_TAG, MPI_COMM_WORLD); count++; row++; } do { MPI_Recv(buffer, width, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); count--; if (row < height) { MPI_Send(&row, 1, MPI_INT, status.MPI_SOURCE, DATA_TAG, MPI_COMM_WORLD); count++; row++; } else { MPI_Send(&row, 1, MPI_INT, status.MPI_SOURCE, TERMINATE, MPI_COMM_WORLD); } for (int x = 0; x < width; x++) { image[status.MPI_TAG * width + x] = buffer[x]; } } while (count > 0); //Stop the clock time = getElapsed(&start); //Output result printf("%d cores %dx%d: %fs\n", numProcs, height, width, time); //Calculate I/O time //gettimeofday(&start, NULL); //Display the set //writeImage("Static.ppm", image, height, width); //Stop the clock // time = getElapsed(&start); //Output result //printf("Runtime for file I/O: %fs\n", time); } else { int row; MPI_Recv(&row, 1, MPI_INT, 0, DATA_TAG, MPI_COMM_WORLD, &status); //printf("Slave: %d Receive Init", rank); while (status.MPI_TAG != TERMINATE) { num.imag = imagMin + ((float) row * scaleY); for (int x = 0; x < width; x++) { //Initialize Complex based on position. num.real = realMin + ((float) x * scaleX); //Calculates the color of the current pixel. buffer[x] = calPixel(num); } MPI_Send(buffer, width, MPI_UNSIGNED, 0, row, MPI_COMM_WORLD); //printf("Slave: %d Send row %d\n", rank, row); //Send only partition worked on MPI_Recv(&row, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); //printf("Slave: %d Recv row %d\n", rank, row); } } free(buffer); free(image); MPI_Finalize(); return 0; }
int main(int argc, char** argv) { const int PING_PONG_LIMIT = 10; double t_start, t_end, t_total, tLoop, t_tick; double MPI_Wtime(void); int tests, maxTest = 10, i; int k[9] = {1,4,16,64,256,1024,4096,16384,65536}; // Initialize the MPI environment MPI_Init(NULL, NULL); // Find out rank, size int world_rank; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); int world_size; MPI_Comm_size(MPI_COMM_WORLD, &world_size); // We are assuming at least 2 processes for this task if (world_size != 2) { fprintf(stderr, "World size must be two for %s\n", argv[0]); MPI_Abort(MPI_COMM_WORLD, 1); } // Get the name of the processor char processor_name[MPI_MAX_PROCESSOR_NAME]; int name_len; MPI_Get_processor_name(processor_name, &name_len); // Initialize the outer loop for 2^k where k = 2,4,6,8,10,12,14,16,18 for (int p = 0; p < sizeof(k)/sizeof(k[0]); p++) { int A[k[p]]; // Vector of integers // Populate A with ints (4 bytes each) for (i = 0; i < k[p]; i++) { A[i] = i; } // This is for loop timing tLoop = 1.0e10; for (tests = 0; tests < maxTest; tests++) { // begin timing t_start = MPI_Wtime(); // t_tick = MPI_Wtick(); int ping_pong_count = 0; int partner_rank = (world_rank + 1) % 2; while (ping_pong_count < PING_PONG_LIMIT) { if (world_rank == ping_pong_count % 2) { // Increment the ping pong count before you send it ping_pong_count++; MPI_Send(&ping_pong_count, k[p], MPI_INT, partner_rank, 0, MPI_COMM_WORLD); printf("World rank %d sent and incremented ping_pong_count %d to partner rank %d\n", world_rank, ping_pong_count, partner_rank); printf("The size of A is: %lu\n", sizeof(A)); printf("P is: %lu\n", sizeof(k)); } else { MPI_Recv(&ping_pong_count, k[p], MPI_INT, partner_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); printf("World rank %d received ping_pong_count %d from partner rank %d\n", world_rank, ping_pong_count, partner_rank); } } } t_end = MPI_Wtime(); t_total = t_end - t_start; if (t_total < tLoop) tLoop = t_total; printf("That took %f seconds\n", tLoop); printf("Number of processes in MPI_COMM_WORLD: %d\n", world_size); printf("Name of processor %s\n", processor_name); printf("The size of A is: %lu\n", sizeof(A)); printf("P is: %lu\n", sizeof(k)); } // ends the outer loop for m MPI_Finalize(); // return 0; }
//--------------------------------------------------------------------------- // mpi_slaving: //--------------------------------------------------------------------------- int mpi_slaving() { // Declare local variables MPI_Datatype MPI_SLAVEINF; /* Dataype for MPI communications */ slaveinfo sl_info; /* Information structure for slaves */ MPI_Status *status; /* Recv status handler */ int used_size; /* # of processors used */ long sfN; /* # of pixels in subfield */ float *data = NULL; /* subfield data burst */ float *winH = NULL; /* apodisation window for SR calc. */ float *winF = NULL; /* apodisation window for phase rec. */ float *mask = NULL; /* DiffLim Mask */ float *pc = NULL; /* Phase Consistency */ /* for reallocation: needs to be NULL */ int *shifts = NULL; /* shifts for KT Cross Spectra */ /* for reallocation: needs to be NULL */ int maxk; /* number of shifts */ // Triple correlation part long *index = NULL; /* index list for bispectrum vectors */ long bs_cnt; /* # of vectors used */ float *bsc = NULL; /* complex non-red/red bispectrum */ float *wc = NULL; /* complex non-red/red bs weights */ float *p1 = NULL; /* phase matrix for iterativ reconstr. */ float *aphs = NULL; /* average of low frequencies */ // General part float *phs = NULL; /* Phase of reconstructed image */ float *amp = NULL; /* Amplitude of reconstructed image */ int c = GO; /* helpers */ long i; // set up status variable for sends and receives status = (MPI_Status *) malloc(sizeof(MPI_Status)); // Set Slaveinfo datatype for MPI sends and receives mpi_setslavetype(&MPI_SLAVEINF); // Receive number of jobs ... MPI_Bcast(&used_size, 1, MPI_INT, 0, MPI_COMM_WORLD); // ...and if used_size = NOGO, we are quitting the slave! if (used_size == NOGO) { // free memory MPI_Type_free(&MPI_SLAVEINF); free(status); // returning NOGO quits the slaves in entry.c return NOGO; } // ...and decide if I am needed if (used_size > proc_id) { // work until TAG says NOGO while (GO) { // Receive the data information MPI_Recv(&sl_info, 1, MPI_SLAVEINF, 0, MPI_ANY_TAG, MPI_COMM_WORLD, status); // Break if shutdown signal was sent if (status->MPI_TAG == NOGO) break; if (status->MPI_TAG == TC) { /* First time allocation of memory */ if (c) { // find number of pixels in subfield sfN = sl_info.sfsizex * sl_info.sfsizey; // allocate memory for data data = (float *) malloc(sfN * sl_info.nrofframes * sizeof(float)); winH = (float *) malloc(sfN * sizeof(float)); winF = (float *) malloc(sfN * sizeof(float)); pc = (float *) malloc(sfN * sizeof(float)); /* Initialise hamming window, fractional hamming & mask */ hanming(winH, sl_info.sfsizex, sl_info.sfsizey, 0.5); frachamming(winF, sl_info.sfsizex, sl_info.sfsizey, sl_info.limApod, 0, NULL); // mask is no longer used... mask = ellmask(sl_info.sfsizex, sl_info.sfsizey, NULL, sl_info.rad_x, sl_info.rad_y); // allocate appropriate memory index = bs_init(&bs_cnt, sl_info); bsc = (float *) malloc(2 * bs_cnt * sizeof(float)); wc = (float *) malloc(bs_cnt * sizeof(float)); // Allocate memory for reconstructed amplitudes & phases amp = (float *) malloc(sfN * sizeof(float)); phs = (float *) malloc(2 * sfN * sizeof(float)); p1 = (float *) malloc(2 * sfN * sizeof(float)); // set flag so as to not allocate new memory later c = NOGO; } // initialise amps, phases & phase consistency to zero memset(bsc, 0.0, 2 * bs_cnt * sizeof(float)); memset(wc, 0.0, bs_cnt * sizeof(float)); memset(amp, 0.0, sfN * sizeof(float)); memset(phs, 0.0, 2 * sfN * sizeof(float)); memset(p1, 0.0, 2 * sfN * sizeof(float)); memset(pc, 0.0, sfN * sizeof(float)); // finally receive the data MPI_Recv(data, sfN * sl_info.nrofframes, MPI_FLOAT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, status); // compute mean of burst //mean(data, sl_info.sfsizex, sl_info.sfsizey, sl_info.nrofframes, temp); /* compute the position of the 'good' average phase parts 3 is an argument here because we deleted: ((int) (alpha[0] * i_rad) > 3 ? (int) (alpha[0] * i_rad) : 3), */ init_shift(sl_info.sfsizex / 2, sl_info.sfsizey / 2, 3, &maxk, &shifts); // create bi-spectrum/weights in non-redundant matrix aphs = bs_ave(data, winF, sl_info, index, bs_cnt, bsc, wc, amp, maxk, shifts); // set snr-threshold bs_snrt(wc, bs_cnt, &sl_info); // phase reconstruction // init phase matrices phs_init(phs, p1, pc, sl_info, maxk, shifts, aphs); // recursive approach rpr(phs, p1, pc, index, bs_cnt, bsc, wc, sl_info); // iterative approach for (i = 0; i < sl_info.max_it; i++) { iwlspr(phs, p1, pc, bsc, wc, index, bs_cnt, sl_info, maxk); if (chkphase(sl_info, phs) < 1.0e-5) break; } // Send back info, so master knows, which subfield burst this was // MPI_TAG will indicate whether we are at the end of the process MPI_Send(&sl_info, 1, MPI_SLAVEINF, 0, NOGO, MPI_COMM_WORLD); // Send back processed data MPI_Send(amp, sfN, MPI_FLOAT, 0, NOGO, MPI_COMM_WORLD); // send back phase MPI_Send(phs, 2 * sfN, MPI_FLOAT, 0, NOGO, MPI_COMM_WORLD); // free some memory free(shifts); shifts = NULL; free(aphs); aphs = NULL; } } // free memory if (data != NULL) free(data); if (winH != NULL) free(winH); if (winF != NULL) free(winF); if (mask != NULL) free(mask); if (pc != NULL) free(pc); if (shifts != NULL) free(shifts); if (amp != NULL) free(amp); if (phs != NULL) free(phs); if (index != NULL) free(index); if (p1 != NULL) free(p1); if (bsc != NULL) free(bsc); if (wc != NULL) free(wc); free(status); MPI_Type_free(&MPI_SLAVEINF); // idle until new data or shutdown Broadcast is sent return GO; } else { // free memory free(status); MPI_Type_free(&MPI_SLAVEINF); // idle until new data or shutdown Broadcast is sent return GO; } }
void run_pagerank_csr_mpi(char *file, char *dir) { t_list = (t_time_list*)malloc(sizeof(t_time_list)); t_list->list = (t_time_elem*)malloc(sizeof(t_time_elem)*10); init_timer(t_list, 10); lint i=0,j=0; t_buff *buf = (t_buff*)malloc(sizeof(t_buff)); buf->size=buff_size; buf->buff_idx = (sint**)malloc(sizeof(sint*)*num_threads); buf->double_buffer = (double***)malloc(sizeof(double**)*num_threads); buf->int_buffer = (lint***)malloc(sizeof(lint**)*num_threads); init_t_buff(buf, buf->size); //data structure for from_msg lint **idx_start = (lint**)_mm_malloc(sizeof(lint*)*num_nodes, 64); lint **idx_end = (lint**)_mm_malloc(sizeof(lint*)*num_nodes, 64); for(i=0;i<num_nodes;i++){ idx_start[i] = (lint*)_mm_malloc(sizeof(lint)*num_threads, 64); idx_end[i] = (lint*)_mm_malloc(sizeof(lint)*num_threads, 64); } //double rand_jump = RDM_JMP/(u/t_msouble)graph->v_size; double rand_jump = RDM_JMP; double purpose_jump = 1-RDM_JMP; lint iter=0; /* Initialize mpi */ int *argc = (int*)malloc(sizeof(int)); char ***argv = (char***)malloc(sizeof(char**)); //MPI_Init(argc, argv); sint myrank, size; MPI_Comm_rank (MPI_COMM_WORLD, &myrank); MPI_Comm_size (MPI_COMM_WORLD, &size); // init files char f_buf[100]; sprintf(f_buf, "%slog/%s_log_%d_%d",dir, file, myrank, num_nodes); FILE *lg = fopen(f_buf, "w"); //if((lg = fopen(f_buf, "w"))==NULL){ // printf("the file %s does not exists!\n", f_buf); // ERROR_PRINT(); //} //////////////////////////////// lint ii, jj; lint upper_bound, lower_bound; /* reading graph, init some parameters */ t_csr *gs = (t_csr*)malloc(sizeof(t_csr)); sprintf(f_buf, "%s%s_sd_%d_%d", data_extend, file, myrank, num_nodes); if(bin==FALSE){ scan_csr_idx(gs, f_buf, dir, SEND); read_graph_csr(gs, f_buf, dir, SEND); } else{ read_csr_bin(gs, f_buf,dir, SEND, myrank); } t_csr *gr = (t_csr*)malloc(sizeof(t_csr)); sprintf(f_buf, "%s%s_rc_%d_%d", data_extend, file, myrank, num_nodes); if(bin==FALSE){ scan_csr_idx(gr, f_buf, dir, RECV); read_graph_csr(gr, f_buf, dir, RECV); } else{ read_csr_bin(gr, f_buf,dir, RECV, myrank); } lint *distrib = (lint*)malloc(sizeof(lint)*num_nodes); if(bin==FALSE){ read_recip(file, dir, distrib, gr, myrank); if(myrank==0) printf("%1.10f\n", gr->vet_info[0].recip); } else{ read_recip_bin(file, dir, distrib, gr, myrank); } init_node_id(gs, distrib, num_nodes); init_weight(gr); t_msg *msg = (t_msg*)malloc(sizeof(t_msg)); lower_bound = gr->offset-OFFSET; upper_bound = gr->v_size+gr->offset-OFFSET; //printf("%d %d %d \n", myrank, lower_bound, upper_bound); double *diff_pr = (double*)_mm_malloc(sizeof(double)*gr->v_size, 64); double *diff_tmp = (double*)_mm_malloc(sizeof(double)*gs->e_size, 64); lint *pidx_tmp = (lint*)malloc(sizeof(lint)*gs->e_size); lint *diff_assign = (lint*)malloc(sizeof(lint)*gs->e_size); lint *idx_tmp = (lint*)malloc(sizeof(lint)*gs->e_size); lint *node_idx_tmp = (lint*)malloc(sizeof(lint)*gs->e_size); lint *bin_idx = (lint*)malloc(sizeof(lint)*(1+HASH_BINS)); for(i=0;i<gr->v_size;i++){ diff_pr[i]=gr->vet_info[i].weight * gr->vet_info[i].recip; } sprintf(f_buf, "%sparex/%s_pidx_%d_%d", dir, file, myrank, num_nodes); FILE *reader_pidx; if((reader_pidx=fopen(f_buf, "r")) == NULL){ printf("the file %s does not exists!\n", f_buf); ERROR_PRINT(); } for(i =0; i< gs->e_size; i++){ lint pval; fscanf(reader_pidx, "%lld\n", &pval); pidx_tmp[i] = pval; } MPI_Barrier(MPI_COMM_WORLD); for(j=0; j<gs->e_size; j++){ diff_assign[j] = j; idx_tmp[j] = gs->edge_idx[j]; node_idx_tmp[j] = gs->edge_info[j].node_id; } //for(i=0;i<gs->e_size; i++) // printf("%lld %lld\n", idx_tmp[i], pidx_tmp[i]); //read pidx_tmp //partition the msg partition_msg(diff_assign, pidx_tmp, idx_tmp, bin_idx, node_idx_tmp, gs->e_size); reverse_diff_assign(diff_assign, gs->e_size); //sprintf(f_buf, "data/rand/log/2_20_l_%d", myrank); //FILE *l = fopen(f_buf, "w"); //for(i=0;i<gs->e_size; i++) // fprintf(l, "%lld\n", diff_assign[i]); //fprintf(l, "\n"); //fclose(l); //printf("finished partition\n"); MPI_Barrier(MPI_COMM_WORLD); lint **send_ub = (lint**)malloc(sizeof(lint*)*num_nodes); for(i=0;i<num_nodes;i++) send_ub[i] = (lint*)malloc(sizeof(lint)*num_nodes); if(bin==FALSE) read_send_ub(file, dir, send_ub); else read_send_ub_bin(file, dir, send_ub); //if(myrank==0){ // printf("the send ub is\n"); // for(i=0;i<num_nodes; i++){ // for(j=0;j<num_nodes; j++) // printf("%lld ", send_ub[i][j]); // printf("\n"); // } //} init_msg(msg, send_ub, myrank); for(ii=0;ii<gr->v_size;ii++) gr->vet_info[ii].wb = rand_jump; /* major computation */ #ifdef USE_OMP omp_set_num_threads(num_threads); #endif sint terminate = FALSE; sint term_buf[10]; sint term[num_nodes]; while(terminate == FALSE){ for(ii=0;ii<num_nodes;ii++) term[ii]=TRUE; iter++; terminate = TRUE; if(myrank==0){ DPRINTF(1, "ITER %lld ", iter); for(ii=0;ii<10;ii++){ DPRINTF(1, "%f|%f ", gr->vet_info[ii].wb, gr->vet_info[ii].recip); } //DPRINTF(1, "%f|%f ", gr->vet_info[47245].wb, gr->vet_info[53765].recip); DPRINTF(1, "\n"); } tic_sr(t_list, 0); #ifdef USE_OMP #pragma omp parallel for shared (gs, gr) #endif for(ii=0;ii<gr->v_size;ii++){ for(jj=(ii==0?0:gr->vet_idx[ii-1]); jj<gr->vet_idx[ii]; jj++){ lint source = ii; lint target = gr->edge_idx[jj]; if(target>= lower_bound && target<upper_bound){ gr->vet_info[source].wb+=purpose_jump*diff_pr[target - gr->offset + OFFSET]; // prefetch next target value lint target_next = gr->edge_idx[jj+prefetch_dis]; _mm_prefetch((char *)(diff_pr + target_next), _MM_HINT_T0); } } } toc_sr(t_list, 0); for(ii=0;ii<num_nodes;ii++) msg->idx_recv[ii]=0; DPRINTF(3, "node %d, on iter %lld, finished local computation!\n", myrank, iter); //communication send sint num_comm = log(num_nodes)/log(2); sint ic, it; sint bucket[num_nodes]; sint role[num_nodes]; tic_sr(t_list, 1); #ifdef USE_SORTMSG to_msg_no_buf(diff_pr, msg, gs, myrank); reduce_msg(msg, myrank, send_ub); #else assign_diff(diff_tmp, diff_assign, diff_pr, gs, myrank); set_msg(msg, send_ub, myrank); to_msg_reduce(diff_tmp, msg, pidx_tmp, idx_tmp, bin_idx, node_idx_tmp); #endif toc_sr(t_list, 1); MPI_Request request_send_int[num_nodes]; MPI_Request request_send_double[num_nodes]; MPI_Request request_recv_int[num_nodes]; MPI_Request request_recv_double[num_nodes]; MPI_Status status; tic_sr(t_list, 2); sint msg_iter=1; #ifdef USE_SYNC for(it=0;it<msg_iter;it++){ for(ic=0; ic<num_comm; ic++){ compute_bucket_role(bucket, role, ic); if(role[myrank]==SEND_FIRST){ for(ii=0; ii<num_nodes; ii++){ if(bucket[ii]!=bucket[myrank] || role[ii]!=RECV_FIRST){ continue; } //odd send to even then recv from even lint send_avg = send_ub[myrank][ii]/msg_iter+1; lint send_start = send_avg*it; lint send_size = (send_ub[myrank][ii]-send_start)>=send_avg?send_avg:(send_ub[myrank][ii]-send_start); //printf("myrank %d, to %lld msg_iter %d avg:%lld start:%lld size:%lld ub: %lld\n", myrank, ii, it, send_avg, send_start, send_size, send_ub[myrank][ii]); MPI_Send(msg->double_send[ii]+send_start, send_size, MPI_DOUBLE, ii, 2, MPI_COMM_WORLD ); MPI_Send(msg->int_send[ii]+send_start, send_size, MPI_LONG_LONG, ii, 0, MPI_COMM_WORLD); lint recv_avg = send_ub[ii][myrank]/msg_iter+1; lint recv_start = recv_avg*it; lint recv_size = (send_ub[ii][myrank]-recv_start)>=recv_avg?recv_avg:(send_ub[ii][myrank]-recv_start); MPI_Recv(msg->double_recv[ii]+recv_start, recv_size, MPI_DOUBLE, ii, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(msg->int_recv[ii]+recv_start, recv_size, MPI_LONG_LONG, ii, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); msg->idx_recv[ii]=send_ub[ii][myrank]; //if(iter==1 && myrank==0 && ii==1) // printf("send ub %lld %lld\n", send_ub[ii][myrank], recv_size); //if(myrank==0 && ii==1 && iter ==1) // for(i=0;i<recv_size;i++) // printf("%d %lld\n", i+recv_start, msg->int_recv[ii][i+recv_start]); } } else if(role[myrank]==RECV_FIRST){ for(ii=0; ii<num_nodes; ii++){ if(bucket[ii]!=bucket[myrank] || role[ii]!=SEND_FIRST){ continue; } //even recv from odd then send to odd lint recv_avg = send_ub[ii][myrank]/msg_iter+1; lint recv_start = recv_avg*it; lint recv_size = (send_ub[ii][myrank]-recv_start)>=recv_avg?recv_avg:(send_ub[ii][myrank]-recv_start); //if(myrank==1 && ii==0) //printf("myrank %d, to %lld msg_iter %d avg:%lld start:%lld size:%lld ub: %lld\n", myrank, ii, it, recv_avg, recv_start, recv_size, send_ub[ii][myrank]); MPI_Recv(msg->double_recv[ii]+recv_start, recv_size, MPI_DOUBLE, ii, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(msg->int_recv[ii]+recv_start, recv_size, MPI_LONG_LONG, ii, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); lint send_avg = send_ub[myrank][ii]/msg_iter+1; lint send_start = send_avg*it; lint send_size = (send_ub[myrank][ii]-send_start)>=send_avg?send_avg:(send_ub[myrank][ii]-send_start); MPI_Send(msg->double_send[ii]+send_start, send_size, MPI_DOUBLE, ii, 2, MPI_COMM_WORLD); MPI_Send(msg->int_send[ii]+send_start, send_size, MPI_LONG_LONG, ii, 0, MPI_COMM_WORLD); msg->idx_recv[ii]=send_ub[ii][myrank]; } } //MPI_Barrier(MPI_COMM_WORLD); } } #else for(it=0;it<msg_iter;it++){ for(ii=0;ii<num_nodes;ii++){ if(ii==myrank) continue; lint send_avg = send_ub[myrank][ii]/msg_iter+1; lint send_start = send_avg*it; lint send_size = (send_ub[myrank][ii]-send_start)>=send_avg?send_avg:(send_ub[myrank][ii]-send_start); MPI_Isend(msg->double_send[ii]+send_start, send_size, MPI_DOUBLE, ii, 2, MPI_COMM_WORLD, &(request_send_double[ii])); MPI_Isend(msg->int_send[ii]+send_start, send_size, MPI_LONG_LONG, ii, 0, MPI_COMM_WORLD, &(request_send_int[ii])); } } for(it=0;it<msg_iter;it++){ for(ii=0;ii<num_nodes;ii++){ if(ii==myrank) continue; lint recv_avg = send_ub[ii][myrank]/msg_iter+1; lint recv_start = recv_avg*it; lint recv_size = (send_ub[ii][myrank]-recv_start)>=recv_avg?recv_avg:(send_ub[ii][myrank]-recv_start); MPI_Irecv(msg->double_recv[ii]+recv_start, recv_size, MPI_DOUBLE, ii, 2, MPI_COMM_WORLD, &(request_recv_double[ii])); MPI_Irecv(msg->int_recv[ii]+recv_start, recv_size, MPI_LONG_LONG, ii, 0, MPI_COMM_WORLD, &(request_recv_int[ii])); msg->idx_recv[ii]=send_ub[ii][myrank]; } //printf("come here\n"); } //printf("before log\n"); for(ii=0;ii<num_nodes;ii++){ if(ii == myrank) continue; MPI_Wait(&(request_send_double[ii]), &status); MPI_Wait(&(request_recv_double[ii]), &status); MPI_Wait(&(request_send_int[ii]), &status); MPI_Wait(&(request_recv_int[ii]), &status); } //printf("see log!\n"); //for(ii=0;ii<num_nodes;ii++){ //} #endif //if(iter==1){ // sprintf(f_buf, "/home/yinzhaom/SNY/data/log/log_%d", myrank); // FILE *logfile = fopen(f_buf, "w"); // int node, vet; // for(node=0;node<num_nodes;node++){ // if(node==myrank) // continue; // fprintf(logfile, "from node %d\n", node); // for(vet=0;vet<send_ub[node][myrank];vet++) // fprintf(logfile, "%d %lld %lf\n", vet, msg->int_recv[node][vet], msg->double_recv[node][vet]); // } //} //printf("finished msg passing of rank %d\n", myrank); toc_sr(t_list, 2); if(myrank==0){ for (jj=0;jj<num_nodes;jj++) DPRINTF(3, "%lld ", msg->idx_recv[jj]); DPRINTF(3, "\n"); } /////////////communication recv//////////////////// tic_sr(t_list, 3); from_msg(msg, gr, myrank,send_ub, purpose_jump, idx_start, idx_end); toc_sr(t_list, 3); //printf("FINIHSED from msg of rank %d\n", myrank); //if(iter==1) // pp_msg(msg, gr, myrank,send_ub, purpose_jump); //////////////////check termination////////////// tic_sr(t_list, 4); for(ii=0;ii<gr->v_size;ii++){ diff_pr[ii]=(gr->vet_info[ii].wb - gr->vet_info[ii].weight)*gr->vet_info[ii].recip; } for(ii=0;ii<gr->v_size;ii++){ if(fabs(diff_pr[ii])>THRESH) terminate = FALSE; gr->vet_info[ii].weight = gr->vet_info[ii].wb; } term_buf[0]=terminate; for(ii=0; ii<num_nodes; ii++){ if(ii==myrank) continue; MPI_Send(term_buf, 10, MPI_INT, ii, 1, MPI_COMM_WORLD); } for(ii=0; ii<num_nodes; ii++){ if(ii==myrank) continue; MPI_Recv(term_buf, 10, MPI_INT, ii, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); term[ii]=term_buf[0]; } term[myrank] = terminate; for(ii=0;ii<num_nodes;ii++) if(term[ii]==FALSE) terminate = FALSE; if(terminate == TRUE) break; toc_sr(t_list, 4); //printf("finished check termination of rank %d\n", myrank); } if(myrank==MASTER){ DPRINTF(1, "Final pagerank values: \n"); for(ii=0;ii<10;ii++){ DPRINTF(1, "%lld|%lf ", (ii+gr->offset), gr->vet_info[ii].weight); } DPRINTF(1, "\n"); } if(myrank==MASTER){ printf("iterations taken: %lld\n", iter); lint recv_size=0; for(i=0;i<num_nodes;i++){ if(i==myrank) continue; recv_size += send_ub[i][myrank]; } print_result(t_list, iter, gs->e_size, recv_size); } /* Shut down mpi */ MPI_Finalize(); //fclose(lg); }
int IMPI_Gateway_export(int *src_comm_lrank, int *dest_grank, int *tag, size_t *length, void **buffer) { int gateway_flag = 0; int i; int iprobe_flag; size_t recv_msgcount; static int my_comm_host_rank; static int send_context; static int procs_on_metahost; static int *meta_header_sent; static char *router_msg[1]; static size_t router_msg_size[1]; static size_t *meta_msg_i_size; static Meta_Header **meta_msg_i; static MPI_Status recv_status; struct MPIR_COMMUNICATOR *comm_host_ptr; static int firstcall=1; if(firstcall) { /* set up buffers */ router_msg_size[0] = INIT_ROUTER_BUFFER_SIZE; router_msg[0] = (char *)malloc( INIT_ROUTER_BUFFER_SIZE * sizeof(char)); if( router_msg[0]==NULL ) exit(-1); comm_host_ptr = MPIR_GET_COMM_PTR( MPI_COMM_HOST ); my_comm_host_rank = comm_host_ptr->local_rank; send_context = comm_host_ptr->send_context; procs_on_metahost = 3; meta_msg_i = (Meta_Header **) malloc( procs_on_metahost * sizeof( Meta_Header * ) ); meta_msg_i_size = (int *) malloc( procs_on_metahost * sizeof( int ) ); meta_header_sent = (int *) malloc( procs_on_metahost * sizeof( int ) ); for( i = 0; i < procs_on_metahost; i++ ) { if( !(MPIR_meta_cfg.isRouter[i]) ) { meta_msg_i[i] = (Meta_Header *) malloc( INIT_ROUTER_BUFFER_SIZE * sizeof(char) ); meta_msg_i_size[i] = INIT_ROUTER_BUFFER_SIZE * sizeof(char); meta_header_sent[i] = 0; } } firstcall = 0; DBG("Check_gateway --> first call"); } /* receive messages from the mpi-processes of the localhost and route them to the according host */ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_HOST, &iprobe_flag, &recv_status); DBG2("Iprobe is %d / Sender is %d", iprobe_flag, recv_status.MPI_SOURCE); if (!iprobe_flag) { /* No gateway message available...*/ return 0; } else { if(buffer==NULL) return 1; /* <-- this is just a check-gateway call */ /* get the size of the message to be received */ MPI_Get_count(&recv_status, MPI_BYTE, &recv_msgcount); if( (recv_status.MPI_TAG == MPIR_SEPARATE_MSG_TAG) && (meta_header_sent[recv_status.MPI_SOURCE]) ) { DBG2("SEPARATE_MSG_TAG %d %d",recv_msgcount,recv_msgcount-sizeof(Meta_Header)); /* a meta header was sent and this is the message belonging to that header */ meta_msg_i[recv_status.MPI_SOURCE] = (Meta_Header * )IMPI_adjustbuffer( (char *)(meta_msg_i[recv_status.MPI_SOURCE]), meta_msg_i_size[recv_status.MPI_SOURCE], recv_msgcount + sizeof( Meta_Header ) ); if ( meta_msg_i_size[recv_status.MPI_SOURCE] < recv_msgcount + sizeof( Meta_Header ) ) meta_msg_i_size[recv_status.MPI_SOURCE] = recv_msgcount + sizeof( Meta_Header ); } else { DBG2("No SEPARATE_MSG_TAG %d %d",recv_msgcount,recv_msgcount-sizeof(Meta_Header)); router_msg[0] = IMPI_adjustbuffer(router_msg[0], router_msg_size[0], recv_msgcount); if( router_msg_size[0] < recv_msgcount ) router_msg_size[0] = recv_msgcount; } if( (recv_status.MPI_TAG == MPIR_SEPARATE_MSG_TAG) && (meta_header_sent[recv_status.MPI_SOURCE]) ) { /* we receive this message in the buffer for the sender process, directly after the meta header */ MPI_Recv( meta_msg_i[recv_status.MPI_SOURCE] + 1, recv_msgcount, MPI_BYTE, recv_status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_HOST, &recv_status); DBG("Local message received"); } else { DBG2("Going to recv the router message (%d) from %d", recv_msgcount, recv_status.MPI_SOURCE); MPI_Recv(router_msg[0], recv_msgcount, MPI_BYTE, recv_status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_HOST, &recv_status); DBG("Router message received"); } /* check type of message - command message or MPI message ? */ switch (recv_status.MPI_TAG) { case MPIR_SEPARATE_MSG_TAG: { DBG("MPIR_SEPARATE_MSG_TAG"); /* | IMPI: this connection mapping would correspond to the IMPI host mapping, but currently we | maintain only one IMPI host per IMPI client! */ // conn = get_conn_for_dest( meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.dest_grank ); DBG4("Gateway-msg for [a%d] from [m%d], tag %d, MPI size %d", meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.dest_grank, meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.src_comm_lrank, meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.tag, meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.count ); *(dest_grank) = meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.dest_grank; *(src_comm_lrank) = meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.src_comm_lrank; *(tag) = meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.tag; *(length) = meta_msg_i[recv_status.MPI_SOURCE]->msg.MPI.count; *(buffer) = (void*)(meta_msg_i[recv_status.MPI_SOURCE] + 1); gateway_flag = 1; meta_header_sent[recv_status.MPI_SOURCE] = 0; break; } case MPIR_SEPARATE_META_HEADER_TAG: { DBG("MPIR_SEPARATE_META_HEADER_TAG"); /* | this is a short message containing the meta data for a nonblocking msg | that comes later on, therefore we must save the data here */ DBG2("memcpy (%d) in buffer of %d", recv_msgcount, recv_status.MPI_SOURCE); memcpy( meta_msg_i[recv_status.MPI_SOURCE], router_msg[0], recv_msgcount ); meta_header_sent[recv_status.MPI_SOURCE] = 1; DBG("Header stored"); break; } case MPIR_ROUTMSG_TAG: { DBG("MPIR_ROUTMSG_TAG"); switch (((Meta_Header *)router_msg[0])->msg.Rout.command) { case FINALIZE: { break; } default: { /* ERROR: got router message with wrong command */ break; } } break; } default: { /* ERROR: got message with wrong tag */ } } } /* if (!iprobe_flag) ... */ DBG1("Leaving Check_gateway with %d", gateway_flag); return gateway_flag; }
void solve(int ite){ /**iteraciones por cada intervalo de tiempo ***/ for(int t=0;t<ite;t++){ /**Calculo de la fuerza total que actua en cada una de las particulas * que interctuan entre si ***/ int source = (myid+1)%nproc; int dest = (myid+nproc-1)%nproc; double loc_pos[num_per_thread][DIM]; double tmp_pos[num_per_thread][DIM]; double loc_masses[num_per_thread]; double tmp_masses[num_per_thread]; double loc_forces[N][DIM]; double tmp_forces[N][DIM]; if(myid==0){ for(int i=1; i<nproc; i++){ for(int j=i, k=0 ; k<num_per_thread ;k++,j+=nproc){ loc_pos[k][X] = pos[j][X]; loc_pos[k][Y] = pos[j][Y]; loc_masses[k] = masses[j]; } MPI_Send(loc_pos, num_per_thread*DIM, MPI_DOUBLE, i, 1, MPI_COMM_WORLD); MPI_Send(loc_masses, num_per_thread*DIM, MPI_DOUBLE, i, 2, MPI_COMM_WORLD); } for(int j=0, k=0 ; k<num_per_thread ;k++,j+=nproc){ loc_pos[k][X] = pos[j][X]; loc_pos[k][Y] = pos[j][Y]; loc_masses[k] = masses[j]; } } else{ MPI_Recv(loc_pos, num_per_thread*DIM, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &status); MPI_Recv(loc_masses, num_per_thread*DIM, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, &status); } for(int i=0; i<num_per_thread ;i++){ loc_forces[i][X] = 0; loc_forces[i][Y] = 0; tmp_forces[i][X] = 0; tmp_forces[i][Y] = 0; tmp_pos[i][X] = loc_pos[i][X]; tmp_pos[i][Y] = loc_pos[i][Y]; tmp_masses[i] = loc_masses[i]; tmp_masses[i] = loc_masses[i]; } //Compute forces due to interactions among local particles for(int q=0; q<num_per_thread-1 ;q++){ for(int k=q+1; k<num_per_thread ;k++){ double x_diff = loc_pos[q][X]-tmp_pos[k][X]; double y_diff = loc_pos[q][Y]-tmp_pos[k][Y]; double dist = sqrt(x_diff*x_diff + y_diff*y_diff); double dist_cubed = dist*dist*dist; if(dist_cubed<0) dist_cubed = -1*dist_cubed; double force_qkx = G*loc_masses[q]*tmp_masses[k]/dist_cubed * x_diff; double force_qky = G*loc_masses[q]*tmp_masses[k]/dist_cubed * y_diff; loc_forces[q][X] -= force_qkx; loc_forces[q][Y] -= force_qky; tmp_forces[k][X] += force_qkx; tmp_forces[k][Y] += force_qky; } } for(int phase=1; phase<nproc; phase++){ MPI_Sendrecv_replace(tmp_masses, num_per_thread, MPI_DOUBLE, dest, 1, source, 1, MPI_COMM_WORLD, &status); MPI_Sendrecv_replace(tmp_pos, num_per_thread*DIM, MPI_DOUBLE, dest, 2, source, 2, MPI_COMM_WORLD, &status); MPI_Sendrecv_replace(tmp_forces, num_per_thread*DIM, MPI_DOUBLE, dest, 3, source, 3, MPI_COMM_WORLD, &status); for(int q=0; q<num_per_thread-1 ;q++){ for(int k=q+1; k<num_per_thread ;k++){ double x_diff = loc_pos[q][X]-tmp_pos[k][X]; double y_diff = loc_pos[q][Y]-tmp_pos[k][Y]; double dist = sqrt(x_diff*x_diff + y_diff*y_diff); double dist_cubed = dist*dist*dist; if(dist_cubed<0) dist_cubed = -1*dist_cubed; double force_qkx = G*loc_masses[q]*tmp_masses[k]/dist_cubed * x_diff; double force_qky = G*loc_masses[q]*tmp_masses[k]/dist_cubed * y_diff; loc_forces[q][X] -= force_qkx; loc_forces[q][Y] -= force_qky; tmp_forces[k][X] += force_qkx; tmp_forces[k][Y] += force_qky; } } } MPI_Sendrecv_replace(tmp_masses, num_per_thread, MPI_DOUBLE, dest, 1, source, 1, MPI_COMM_WORLD, &status); MPI_Sendrecv_replace(tmp_pos, num_per_thread*DIM, MPI_DOUBLE, dest, 2, source, 2, MPI_COMM_WORLD, &status); MPI_Sendrecv_replace(tmp_forces, num_per_thread*DIM, MPI_DOUBLE, dest, 3, source, 3, MPI_COMM_WORLD, &status); for(int k=0; k<num_per_thread ;k++){ loc_forces[k][X] += tmp_forces[k][X]; loc_forces[k][Y] += tmp_forces[k][Y]; } if(myid==0){ for(int k=0 ; k<num_per_thread ;k++){ forces[k*nproc][X] = loc_forces[k][X]; forces[k*nproc][Y] = loc_forces[k][Y]; } for(int i=1; i<nproc; i++){ MPI_Recv(loc_forces, num_per_thread*DIM, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, &status); for(int k=0 ; k<num_per_thread ;k++){ forces[k*nproc+i][X] = loc_forces[k][X]; forces[k*nproc+i][Y] = loc_forces[k][Y]; } } } else{ MPI_Send(loc_forces, num_per_thread*DIM, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); } double newVel[num_per_thread][DIM]; double bVel[num_per_thread][DIM]; double newPos[num_per_thread][DIM]; MPI_Scatter(pos, num_per_thread*DIM, MPI_DOUBLE, newPos, num_per_thread*DIM, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatter(vel, num_per_thread*DIM, MPI_DOUBLE, newVel, num_per_thread*DIM, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatter(masses, num_per_thread, MPI_DOUBLE, loc_masses, num_per_thread, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatter(vel, num_per_thread*DIM, MPI_DOUBLE, bVel, num_per_thread*DIM, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Scatter(forces, num_per_thread*DIM, MPI_DOUBLE, loc_forces, num_per_thread*DIM, MPI_DOUBLE, 0, MPI_COMM_WORLD); //printf("antes: %f from %d\n",newPos[1][0],myid); for(int i=0; i<num_per_thread ;i++){ newPos[i][X] += delta_t*bVel[i][X]; newPos[i][Y] += delta_t*bVel[i][Y]; newVel[i][X] += delta_t/loc_masses[i]*loc_forces[i][X]; newVel[i][Y] += delta_t/loc_masses[i]*loc_forces[i][Y]; } //printf("despi: %f from %d\n",newPos[1][0],myid); MPI_Gather(newPos, num_per_thread*DIM, MPI_DOUBLE, pos, num_per_thread*DIM, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(newVel, num_per_thread*DIM, MPI_DOUBLE, vel, num_per_thread*DIM, MPI_DOUBLE, 1, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); /**Calculo de la posicion y velocidad de cada * particula *** for(int q=0;q<N;q++){ pos[q][X] = newPos[q][X]; pos[q][Y] = newPos[q][Y]; vel[q][X] = newVel[q][X]; vel[q][Y] = newVel[q][Y]; }*/ } }
int main (int argc, char ** argv) { int rank, size, i, j, k, token; MPI_Comm comm = MPI_COMM_WORLD; MPI_Status status; enum ADIOS_READ_METHOD method = ADIOS_READ_METHOD_BP; ADIOS_SELECTION * sel; void * data = NULL; uint64_t start[3], count[3], step = 0; MPI_Init (&argc, &argv); MPI_Comm_rank (comm, &rank); MPI_Comm_size (comm, &size); adios_read_init_method (method, comm, "verbose=3"); adios_logger_open ("log_read_as_file_C", rank); /* adios_read_open_file() allows for seeing all timesteps in the file */ ADIOS_FILE * f = adios_read_open_file ("global_array_time_C.bp", method, comm); if (f == NULL) { log_error ("%s\n", adios_errmsg()); return -1; } ADIOS_VARINFO * v = adios_inq_var (f, "temperature"); // read in two timesteps data = malloc (2 * v->dims[0] * v->dims[1] * sizeof (double)); if (data == NULL) { log_error ("malloc failed.\n"); return -1; } // read in timestep 'rank' (up to 12) step = rank % 13; start[0] = 0; count[0] = v->dims[0]; start[1] = 0; count[1] = v->dims[1]; /* Read a subset of the temperature array */ sel = adios_selection_boundingbox (v->ndim, start, count); /* 2 steps from 'step' */ adios_schedule_read (f, sel, "temperature", step, 2, data); adios_perform_reads (f, 1); if (rank == 0) log_test ("Array size of temperature [0:%lld,0:%lld]\n", v->dims[0], v->dims[1]); if (rank > 0) { MPI_Recv (&token, 1, MPI_INT, rank-1, 0, comm, &status); } log_test("------------------------------------------------\n"); log_test("rank=%d: \n", rank); for (i = 0; i < 2; i++) { log_test ("step %lld = [\n", step+i); for (j = 0; j < v->dims[0]; j++) { log_test (" ["); for (k = 0; k < v->dims[1]; k++) { log_test ("%g ", ((double *)data) [ i * v->dims[0] * v->dims[1] + j * v->dims[1] + k]); } log_test ("]\n"); } log_test ("]\n"); } log_test ("\n"); if (rank < size-1) { MPI_Send (&token, 1, MPI_INT, rank+1, 0, comm); } free (data); adios_free_varinfo (v); adios_read_close (f); MPI_Barrier (comm); adios_read_finalize_method (method); adios_logger_close(); MPI_Finalize (); return 0; }
/* Guassian Elimination algorithm using MPI */ void gaussElimination() { MPI_Status status; MPI_Request request; int row, col, i, norm; float multiplier; /* Array with the row size and number of rows that each processor will handle */ int * first_row_A_array = (int*) malloc ( p * sizeof(int) ); int * n_of_rows_A_array = (int*) malloc ( p * sizeof(int) ); int * first_row_B_array = (int*) malloc ( p * sizeof(int) ); int * n_of_rows_B_array = (int*) malloc ( p * sizeof(int) ); for ( i = 0; i < p; i++ ) { first_row_A_array[i] = 0; n_of_rows_A_array[i] = 0; first_row_B_array[i] = 0; n_of_rows_B_array[i] = 0; } /* Main loop. After every iteration, a new column will have all 0 values down the [norm] index */ for (norm = 0; norm < N-1; norm++) { /* --------------------------------------- */ /* Broadcasting of common values */ /* -------------------------------------- */ /* Broadcast the A[norm] row and B[norm], important values of this iteration */ MPI_Bcast( &A[ N*norm ], N, MPI_FLOAT, SOURCE, MPI_COMM_WORLD ); MPI_Bcast( &B[norm], 1, MPI_FLOAT, SOURCE, MPI_COMM_WORLD ); /* --------------------------------------- */ /* Calculation of number of rows to operate */ /* -------------------------------------- */ /* subset of rows of this iteration */ int subset = N - 1 - norm; /* number that indicates the step as a float */ float step = ((float)subset ) / (p); /* First and last rows that this process will work into for this iteration */ int first_row = norm + 1 + ceil( step * (my_rank) ); int last_row = norm + 1 + floor( step * (my_rank+1) ); if ( last_row >= N ) last_row = N-1; int number_of_rows = last_row - first_row +1; /*printf("\nProcess number %d of %d says in iteration %d that a=%d, b=%d and n=%d\n", my_rank+1, p, norm+1,first_row,last_row,number_of_rows) ;*/ /* --------------------------------------- */ /* Send data from process 0 to others */ /* -------------------------------------- */ if ( my_rank == SOURCE ) { for ( i = 1; i < p; i++ ) { /* We send to each process the amount of data that they are going to handle */ int first_row_rmte = norm + 1 + ceil( step * (i) ); int last_row_rmte = norm + 1 + floor( step * (i+1) ); if( last_row_rmte >= N ) last_row_rmte = N -1; int number_of_rows_rmte = last_row_rmte - first_row_rmte +1; /* In case this process isn't assigned any task, continue. This happens when there are more processors than rows */ //if( number_of_rows_rmte < 1 || first_row_rmte >= N ) continue; if ( number_of_rows_rmte < 0 ) number_of_rows_rmte = 0; if ( first_row_rmte >= N ) { number_of_rows_rmte = 0; first_row_rmte = N-1; }; first_row_A_array[i] = first_row_rmte * N; first_row_B_array[i] = first_row_rmte; n_of_rows_A_array[i] = number_of_rows_rmte * N; n_of_rows_B_array[i] = number_of_rows_rmte ; //MPI_Isend( &A[first_row_rmte * N], N * number_of_rows_rmte, MPI_FLOAT, i,0, MPI_COMM_WORLD, &request); //MPI_Isend( &B[first_row_rmte], number_of_rows_rmte, MPI_FLOAT, i,0, MPI_COMM_WORLD, &request); } } /* Receiver side */ /* else { if ( number_of_rows > 0 && first_row < N) { //MPI_Recv( &A[first_row * N], N * number_of_rows, MPI_FLOAT, SOURCE, 0, MPI_COMM_WORLD, &status); //MPI_Recv( &B[first_row], number_of_rows, MPI_FLOAT, SOURCE, 0, MPI_COMM_WORLD, &status); } }*/ MPI_Scatterv( &A[0], // send buffer n_of_rows_A_array, // array with number of elements in each chunk first_row_A_array, // array with pointers to initial element of each chunk MPI_FLOAT, // type of elements to send &A[first_row * N], // receive buffer N * number_of_rows, // number of elements to receive MPI_FLOAT, // type of elements to receive SOURCE, // who sends MPI_COMM_WORLD ); MPI_Scatterv( &B[0], n_of_rows_B_array, first_row_B_array, MPI_FLOAT, &B[first_row], number_of_rows, MPI_FLOAT, SOURCE, MPI_COMM_WORLD ); /*printf("\nProcess %d: Iteration number %d of %d\n", my_rank, norm+1, N-1); print_A();*/ /* --------------------------------------- */ /* Gaussian elimination */ /* The arrays only have the needed values */ /* -------------------------------------- */ if ( number_of_rows > 0 && first_row < N) { /* Similar code than in the sequential case */ for (row = first_row; row <= last_row; row++) { multiplier = A[N*row + norm] / A[norm + N*norm]; for (col = norm; col < N; col++) { A[col+N*row] -= A[N*norm + col] * multiplier; } B[row] -= B[norm] * multiplier; } } /* --------------------------------------- */ /* Send back the results */ /* -------------------------------------- */ /* Sender side */ if ( my_rank != SOURCE ) { if ( number_of_rows > 0 && first_row < N) { MPI_Isend( &A[first_row * N], N * number_of_rows, MPI_FLOAT, SOURCE,0, MPI_COMM_WORLD, &request); MPI_Isend( &B[first_row], number_of_rows, MPI_FLOAT, SOURCE,0, MPI_COMM_WORLD, &request); } } /* Receiver side */ else { for ( i = 1; i < p; i++ ) { // In case this process isn't assigned any task, continue. This happens when there are more processors than rows if( n_of_rows_B_array[i] < 1 || first_row_B_array[i] >= N) continue; MPI_Recv( &A[ first_row_A_array[i] ], n_of_rows_A_array[i] , MPI_FLOAT, i,0, MPI_COMM_WORLD, &status ); MPI_Recv( &B[ first_row_B_array[i] ], n_of_rows_B_array[i] , MPI_FLOAT, i,0, MPI_COMM_WORLD, &status ); } } /* MPI_Gatherv( &A[first_row * N], // send buffer N * number_of_rows, // number of elements to send MPI_FLOAT, // type of elements to send &A[0], // receive buffer n_of_rows_A_array, // array with number of elements in each chunk first_row_A_array, // array with pointers to initial element of each chunk, in the reception buffer MPI_FLOAT, // type of elements to receive SOURCE, // who receives MPI_COMM_WORLD ); MPI_Gatherv( &B[first_row], number_of_rows, MPI_FLOAT, &B[0], n_of_rows_B_array, first_row_B_array, MPI_FLOAT, SOURCE, MPI_COMM_WORLD ); */ } }
/// Recreate the shared nodes. An alternate incorrect version can be enabled by undefining CORRECT_COORD_COMPARISON void ParFUM_recreateSharedNodes(int meshid, int dim, MPI_Comm newComm) { #define CORRECT_COORD_COMPARISON MPI_Comm comm = newComm; int rank, nParts; int send_count=0; // sanity check int recv_count=0; // sanity check MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nParts); #if SUPER_FAST_SPECIFIC_TORUS #define TORUSY 15 #define TORUSZ 15 CkPrintf("rank %d is manually configuring the IDXL lists to make the shared node generation fast\n"); FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes"); IDXL_Side &shared = mesh->node.shared; int low = (rank-1+nParts) % nParts; int high = (rank+1) % nParts; IDXL_List &list1 = shared.addList(low); IDXL_List &list2 = shared.addList(high); int nodesInPlane = TORUSY * TORUSZ; int numNodes = FEM_Mesh_get_length(meshid,FEM_NODE); // vp - 1 for(int j=0;j<nodesInPlane;j++){ list1.push_back(j); } // vp + 1 for(int j=0;j<nodesInPlane;j++){ list2.push_back(numNodes - nodesInPlane +j); } return; #else // Shared data will be temporarily stored in the following structure int *sharedNodeCounts; // sharedCounts[i] = number of nodes shared with rank i int **sharedNodeLists; // sharedNodes[i] is the list of nodes shared with rank i // Initialize shared data sharedNodeCounts = (int *)malloc(nParts*sizeof(int)); sharedNodeLists = (int **)malloc(nParts*sizeof(int *)); for (int i=0; i<nParts; i++) { sharedNodeLists[i] = NULL; sharedNodeCounts[i] = 0; } // Get local node count and coordinates int numNodes; int coord_msg_tag=42, sharedlist_msg_tag=43; double *nodeCoords; numNodes = FEM_Mesh_get_length(meshid,FEM_NODE); nodeCoords = (double *)malloc(dim*numNodes*sizeof(double)); FEM_Mesh_become_get(meshid); FEM_Mesh_data(meshid,FEM_NODE,FEM_COORD, nodeCoords, 0, numNodes,FEM_DOUBLE, dim); //MPI_Barrier(MPI_COMM_WORLD); if (rank==0) CkPrintf("Extracted node data...\n"); // Begin exchange of node coordinates to determine shared nodes // FIX ME: compute bounding box, only exchange when bounding boxes collide /// The highest partition # to which I send my coordinates(wraps around) int sendUpperBound; if(nParts %2==0){ sendUpperBound = rank + (nParts/2) - (rank%2); } else { sendUpperBound = rank + (nParts/2) ; } /// The lowest partition # to which I send my coordinates(wraps around) int sendLowerBound; if(nParts %2==0){ sendLowerBound = rank - (nParts/2) + ((rank+1)%2); } else { sendLowerBound = rank - (nParts/2); } // Special case optimization for when the mesh is generated in such a way that only neighboring partitions share nodes // look for command line argument #ifdef SHARED_NODES_ONLY_NEIGHBOR //#warning "ParFUM_recreateSharedNodes only allows adjacent partitions(rank +/- 1) to have shared nodes" sendUpperBound = rank + 1; sendLowerBound = rank - 1; #endif for (int i=rank+1; i<=sendUpperBound; i++) { //send nodeCoords to rank i MPI_Send(nodeCoords, dim*numNodes, MPI_DOUBLE, i%nParts, coord_msg_tag, comm); send_count ++; // printf("[%d] Sending %d doubles to rank %d \n",rank,dim*numNodes,i%nParts); } // Receive coordinates from the appropriate number of other partitions // These can be received in any order for (int i=sendLowerBound; i<rank; i++) { std::vector<int> remoteSharedNodes, localSharedNodes; double *recvNodeCoords; MPI_Status status; int source, length; // Probe for a coordinate message from any source; extract source and msg length MPI_Probe(MPI_ANY_SOURCE, coord_msg_tag, comm, &status); source = status.MPI_SOURCE; length = status.MPI_LENGTH/sizeof(double); // printf("[%d] Receiving %d doubles from rank %d \n",rank,length,source); recv_count ++; // Receive whatever data was available according to probe recvNodeCoords = (double *)malloc(length*sizeof(double)); MPI_Recv((void*)recvNodeCoords, length, MPI_DOUBLE, source, coord_msg_tag, comm, &status); // Match coords between local nodes and received coords int recvNodeCount = length/dim; // PERFORM THE NODE COMPARISONS #ifdef SHARED_NODES_ONLY_NEIGHBOR int borderNodes = BORDERNODES; //#warning "Only the first and last BORDERNODES nodes on each partition are candidates for being shared nodes" // indices are inclusive int myBottomLow = 0; int myBottomHigh = borderNodes; int myTopLow = numNodes - borderNodes; int myTopHigh = numNodes-1; int recvBottomLow = 0; int recvBottomHigh = borderNodes; int recvTopLow = recvNodeCount - borderNodes; int recvTopHigh = recvNodeCount-1; CkPrintf("[%d] rank=%d myBottomLow=%d myBottomHigh=%d myTopLow=%d myTopHigh=%d recvBottomLow=%d recvBottomHigh=%d recvTopLow=%d recvTopHigh=%d\n", CkMyPe(), rank, myBottomLow, myBottomHigh, myTopLow, myTopHigh, recvBottomLow, recvBottomHigh, recvTopLow, recvTopHigh); // make sure the top region is non-negative if(myTopLow < 0) myTopLow = 0; if(recvTopLow < 0) recvTopLow = 0; // make the two regions be non-overlapping if(myBottomHigh >= myTopLow) myTopLow = myTopLow-1; if(recvBottomHigh >= recvTopLow) recvTopLow = recvTopLow-1; for (int j=myBottomLow; j<=myBottomHigh; j++) { for (int k=recvBottomLow; k<=recvBottomHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } for (int j=myTopLow; j<=myBottomHigh; j++) { for (int k=recvTopLow; k<=recvTopHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } for (int j=myTopLow; j<=myTopHigh; j++) { for (int k=recvBottomLow; k<=recvBottomHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } for (int j=myBottomLow; j<=myTopHigh; j++) { for (int k=recvTopLow; k<=recvTopHigh; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); break; } } } #else // CkPrintf("Comparing %d nodes with %d received nodes\n", numNodes, recvNodeCount); for (int j=0; j<numNodes; j++) { for (int k=0; k<recvNodeCount; k++) { if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) { localSharedNodes.push_back(j); remoteSharedNodes.push_back(k); //printf("[%d] found local node %d to match with remote node %d \n",rank,j,k); break; } } } #endif // Copy local nodes that were shared with source into the data structure int *localSharedNodeList = (int *)malloc(localSharedNodes.size()*sizeof(int)); for (int m=0; m<localSharedNodes.size(); m++) { localSharedNodeList[m] = localSharedNodes[m]; } sharedNodeCounts[source] = localSharedNodes.size(); sharedNodeLists[source] = localSharedNodeList; // do not delete localSharedNodeList as a pointer to it is stored // Send remote nodes that were shared with this partition to remote partition MPI_Send((int *)&remoteSharedNodes[0], remoteSharedNodes.size(), MPI_INT, source, sharedlist_msg_tag, comm); free(recvNodeCoords); } for (int i=rank+1; i<=sendUpperBound; i++) { // recv shared node lists (from the partitions in any order) int *sharedNodes; MPI_Status status; int source, length; // Probe for a shared node list from any source; extract source and msg length MPI_Probe(MPI_ANY_SOURCE, sharedlist_msg_tag, comm, &status); source = status.MPI_SOURCE; length = status.MPI_LENGTH/sizeof(int); // Recv the shared node list the probe revealed was available sharedNodes = (int *)malloc(length*sizeof(int)); MPI_Recv((void*)sharedNodes, length, MPI_INT, source, sharedlist_msg_tag, comm, &status); // Store the shared node list in the data structure sharedNodeCounts[source] = length; sharedNodeLists[source] = sharedNodes; // don't delete sharedNodes! we kept a pointer to it! } if (rank==0) CkPrintf("Received new shared node lists...\n"); // IMPLEMENT ME: use sharedNodeLists and sharedNodeCounts to move shared node data // to IDXL FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes"); IDXL_Side &shared = mesh->node.shared; for(int i=0;i<nParts;i++){ if(i == rank) continue; if(sharedNodeCounts[i] != 0){ IDXL_List &list = shared.addList(i); for(int j=0;j<sharedNodeCounts[i];j++){ list.push_back(sharedNodeLists[i][j]); } } } MPI_Barrier(MPI_COMM_WORLD); if (rank==0) CkPrintf("Recreation of shared nodes complete...\n"); //printf("After recreating shared nodes %d \n",rank); //shared.print(); #ifdef SHARED_NODES_ONLY_NEIGHBOR CkAssert(send_count + recv_count == 2); #else CkAssert(send_count + recv_count == nParts-1); #endif // Clean up free(nodeCoords); free(sharedNodeCounts); for (int i=0; i<nParts; i++) { if (sharedNodeLists[i]) free(sharedNodeLists[i]); } free(sharedNodeLists); #endif // normal mode, not super fast mesh specific one }
/* ************************************************************************ */ static void calculate_gaussseidel (struct calculation_arguments const* arguments, struct calculation_results *results, struct options* options) { int i, j; /* local variables for loops */ int m1, m2; /* used as indices for old and new matrices */ double star; /* four times center value minus 4 neigh.b values */ double residuum; /* residuum of current iteration */ double maxresiduum; /* maximum residuum value of a slave in iteration */ double maxresiduumbuf; /* holds the preceeding maxresiduum */ int termflag; /* once the last rank is accurate enough to terminate by precision, it communicates this information using this flag to the next process above it. */ int termflag2; /* this flag is set and sent down starting from rank 0 once rank 0 receives termflag = 1 from a process beneath it */ int const N = arguments->N; int const N_global = arguments->N_global; double const h = arguments->h; int const nproc = arguments->nproc; int const rank = arguments->rank; int term_iteration = options->term_iteration; termflag = 0; termflag2 = 0; /* initialize m1 and m2 depending on algorithm */ if (options->method == METH_JACOBI) { m1 = 0; m2 = 1; } else { m1 = 0; m2 = 0; } while (term_iteration > 0) { double** Matrix_Out = arguments->Matrix[m1]; double** Matrix_In = arguments->Matrix[m2]; maxresiduum = 0; maxresiduumbuf = 0; if(rank > 0) { // receive communication line from above MPI_Recv(Matrix_Out[0], N_global + 1, MPI_DOUBLE, rank - 1, rank - 1 + results->stat_iteration, MPI_COMM_WORLD, NULL); // receive preceeding maxresiduum MPI_Recv(&maxresiduumbuf, 1, MPI_DOUBLE, rank - 1, rank - 1, MPI_COMM_WORLD, NULL); // receive final termination flag from above MPI_Recv(&termflag2, 1, MPI_DOUBLE, rank - 1, rank - 1, MPI_COMM_WORLD, NULL); } // in the initial run the first process must not receive values if(results->stat_iteration > 0) { if(rank != nproc - 1) { // reveive communication line from below MPI_Recv(Matrix_Out[N], N_global + 1, MPI_DOUBLE, rank + 1, rank + 1 + results->stat_iteration - 1, MPI_COMM_WORLD, NULL); // receive preliminary termflag from preceeding rank MPI_Recv(&termflag, 1, MPI_INT, rank + 1, rank + 1, MPI_COMM_WORLD, NULL); } } /* over all rows */ for (i = 1; i < N; i++) { /* over all columns */ for (j = 1; j < N_global; j++) { star = 0.25 * (Matrix_In[i-1][j] + Matrix_In[i][j-1] + Matrix_In[i][j+1] + Matrix_In[i+1][j]); if (options->inf_func == FUNC_FPISIN) { star += (0.25 * TWO_PI_SQUARE * h * h) * sin((PI * h) * ((double)i + arguments->offset)) * sin((PI * h) * (double)j); } if (options->termination == TERM_PREC || term_iteration == 1) { residuum = Matrix_In[i][j] - star; residuum = (residuum < 0) ? -residuum : residuum; maxresiduum = (residuum < maxresiduum) ? maxresiduum : residuum; maxresiduum = (maxresiduumbuf < maxresiduum) ? maxresiduum : maxresiduumbuf; } Matrix_Out[i][j] = star; } } // in the last iteration the values must not get sent upwards, this lets the pipeline run out if(term_iteration > 1 && termflag2 != 1) { if(rank > 0) { // send communication line upwards MPI_Send(Matrix_Out[1], N_global + 1, MPI_DOUBLE, rank - 1, rank + results->stat_iteration, MPI_COMM_WORLD); // send prelimary termination flag upwards MPI_Send(&termflag, 1, MPI_INT, rank - 1, rank, MPI_COMM_WORLD); } } if(rank != nproc - 1) { // send communication line downwards MPI_Send(Matrix_Out[N - 1], N_global + 1, MPI_DOUBLE, rank + 1, rank + results->stat_iteration, MPI_COMM_WORLD); // send maxresiduum down the procs MPI_Send(&maxresiduum, 1, MPI_DOUBLE, rank + 1, rank, MPI_COMM_WORLD); // send final termflag downwards MPI_Send(&termflag2, 1, MPI_DOUBLE, rank + 1, rank, MPI_COMM_WORLD); } /* exchange m1 and m2 */ i = m1; m1 = m2; m2 = i; results->stat_iteration++; results->stat_precision = maxresiduum; if(termflag2 == 1) term_iteration = 0; // if we receive the termflag at the top of our process stack (rank 0), set termflag2 to 1 // so the stack terminates if(rank == 0) if(termflag == 1) { termflag2 = 1; } /* check for stopping calculation, depending on termination method */ if (options->termination == TERM_PREC) { if (rank == nproc - 1) { if (maxresiduum < options->term_precision && termflag != 1) { termflag = 1; } } } else if (options->termination == TERM_ITER) { term_iteration--; } } results->m = m2; }
int main(int argc, char **argv) { int rank, size, version, subversion, namelen, universe_size, jugadorMano, repartidor, sizeMazo, sizeDescartadas; char processor_name[MPI_MAX_PROCESSOR_NAME], worker_program[100]; MPI_Comm juego_comm; Carta mazo[N_CARTAS_MAZO]; Carta mano0[N_CARTAS_MANO]; Carta mano1[N_CARTAS_MANO]; Carta mano2[N_CARTAS_MANO]; Carta mano3[N_CARTAS_MANO]; Carta manoJugadorHumano[N_CARTAS_MANO]; char *caras[] = {"As", "Dos", "Tres", "Cuatro", "Cinco", "Seis", "Siete", "Sota", "Caballo", "Rey"}; char *palos[] = {"Oros", "Copas", "Espadas", "Bastos"}; char *lancesEtiquetas[] = {"Grande", "Chica", "Pares", "Juego", "Al punto"}; int valores[] = {1, 1, 10, 4, 5, 6, 7, 10, 10, 10}; int equivalencias[] = {1, 1, 10, 4, 5, 6, 7, 8, 9, 10}; int piedras[N_PAREJAS] = {0, 0}; int apuestas[N_LANCES + 1] = {0, 0, 0, 0, 0}; int jugadorHumano = 99; int pareja1[6]; // miembro 1 de pareja - miembro 2 de pareja - piedras - rondas - juegos - vacas int pareja2[6]; int ronda = 0; // inicialización de contadores pareja1[0] = 0; pareja1[1] = 2; pareja2[0] = 1; pareja2[1] = 3; int l = 0; for (l = 2; l < 6; l++) { pareja1[l] = 0; pareja2[l] = 0; } int N_PUNTOS_JUEGO = 40; int N_JUEGOS_VACA = 3; int N_VACAS_PARTIDA = 3; int N_PARTIDAS = 1; srand(time(NULL)); /* randomize */ sizeMazo = crearMazo(mazo, caras, palos, valores, equivalencias); /* llena el mazo de cartas */ sizeDescartadas = 0; int ordago = 0; char modo = 'Z'; //1 automático, 0 manual printf("Introduzca el número de partidas (1): \n"); fflush(stdout); scanf(" %d", &N_PARTIDAS); getchar(); fflush(stdout); fflush(stdin); printf("Introduzca el número de vacas (3): \n"); fflush(stdout); scanf(" %d", &N_VACAS_PARTIDA); getchar(); fflush(stdout); fflush(stdin); printf("Introduzca el número de juegos (3): \n"); fflush(stdout); scanf(" %d", &N_VACAS_PARTIDA); getchar(); fflush(stdout); fflush(stdin); printf("Introduzca el número de puntos por juego (40): \n"); fflush(stdout); scanf(" %d", &N_PUNTOS_JUEGO); getchar(); fflush(stdout); fflush(stdin); printf("Introduzca el modo de juego (A:automático, I:interactivo): \n"); while (modo != 'A' || modo != 'I' || modo != 'a' || modo != 'i') { modo = getchar(); if (modo == 'A' || modo == 'I' || modo == 'a' || modo == 'i') { break; } else { getchar(); printf("Introduzca una A o una I\n"); } } printf("Comenzando partida en modo %c\n", modo); if (modo == 'I' || modo == 'i') { jugadorHumano = rand() % (N_JUGADORES + 1 - 0) + 0; //jugadorHumano = 3; printf("El identificador para el jugador humano es: %d\n", jugadorHumano); } printf("[maestro] Tamaño del mazo" " %d\n", sizeMazo); //printMazo(mazo); /*Imprime el mazo*/ printf("\n"); barajarMazo(mazo); /*Baraja el mazo*/ printf("\n"); MPI_Init(&argc, &argv); /* starts MPI */ MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* get current process id */ MPI_Comm_size(MPI_COMM_WORLD, &size); /* get number of processes */ MPI_Get_processor_name(processor_name, &namelen); MPI_Get_version(&version, &subversion); printf("[maestro] Iniciado proceso maestro %d de %d en %s ejecutando MPI %d.%d\n", rank, size, processor_name, version, subversion); if (size != 1) printf("[maestro] Error: sólo debería estar ejecutándose el proceso maestro, pero hay %d procesos ejecutándose\n", size); /* Fijar el total de procesos a ejecutar incluyendo el maestro */ universe_size = 5; strcpy(worker_program, "./jugador"); printf("[maestro] Generando %d procesos ejecutando %s\n", universe_size - 1, worker_program); MPI_Comm_spawn(worker_program, MPI_ARGV_NULL, universe_size - 1, MPI_INFO_NULL, 0, MPI_COMM_SELF, &juego_comm, MPI_ERRCODES_IGNORE); printf("[maestro] Ejecutado proceso maestro con identificador %d de un total de %d\n", rank, size); /* PRIMER INTERCAMBIO DE INFORMACIÓN: maestro a jugadores */ MPI_Bcast(&sizeMazo, 1, MPI_INT, MPI_ROOT, juego_comm);/*Envío del tamaño del mazo */ MPI_Bcast(&sizeDescartadas, 1, MPI_INT, MPI_ROOT, juego_comm);/*Envío del tamaño del mazo de descartadas*/ int corte; /* jugador que realizará el corte */ int N = 0, M = N_JUGADORES - 1; /* valores del intervalo */ corte = M + rand() / (RAND_MAX / (N - M + 1) + 1); /* proceso aleatorio de entre los existentes */ MPI_Bcast(&corte, 1, MPI_INT, MPI_ROOT, juego_comm); /* envío del id de proceso que realizará el corte a todos*/ MPI_Bcast(&modo, 1, MPI_CHAR, MPI_ROOT, juego_comm); MPI_Bcast(&jugadorHumano, 1, MPI_INT, MPI_ROOT, juego_comm); /* envío del id de jugador humano en caso de modo manual*/ enviarMazo(mazo, corte, juego_comm, N_CARTAS_MAZO); /* envío del mazo al jugador que va a cortar la baraja*/ MPI_Recv(&repartidor, 1, MPI_INT, corte, 0, juego_comm, MPI_STATUS_IGNORE); /**************************************************************/ /* Comienzan rondas /**************************************************************/ while ((pareja1[5] != 1 && pareja2[5] != 1)) { //mientras no haya una partida ganada... printf("INICIANDO RONDA %d\n", ronda); if (ronda != 0) { Carta mazo[N_CARTAS_MAZO]; sizeMazo = crearMazo(mazo, caras, palos, valores, equivalencias); /* llena el mazo de cartas */ printf("ATENCION Tamaño mazo: %d\n", sizeMazo); sizeDescartadas = 0; int ordago = 0; barajarMazo(mazo); /*Baraja el mazo*/ } printf("[maestro] El jugador repartidor es: %d\n", repartidor); //int mano = add_mod(postre, 1, 4); //printf("[maestro] El jugador mano es: %d\n", mano); MPI_Bcast(&repartidor, 1, MPI_INT, MPI_ROOT, juego_comm); //envío del repartidor a todos los procesos MPI_Bcast(&sizeMazo, 1, MPI_INT, MPI_ROOT, juego_comm); /* envío del mazo al jugador que va a repartir */ enviarMazo(mazo, repartidor, juego_comm, N_CARTAS_MAZO); /* e/s auxiliar reparto de cartas */ int i = 0; for (i = 0; i <= (N_CARTAS_MANO * N_JUGADORES - 1); i++) { int buffer[3]; MPI_Recv(&buffer, 3, MPI_INT, repartidor, 0, juego_comm, MPI_STATUS_IGNORE); printf("[repartidor %d] Repartida carta %d al jugador %d\n", repartidor, buffer[0], buffer[1]); int siguiente = buffer[1]; MPI_Recv(&buffer, 3, MPI_INT, siguiente, 0, juego_comm, MPI_STATUS_IGNORE); printf("[jugador %d] Jugador %d recibe carta %d \n", buffer[0], buffer[0], buffer[1]); } MPI_Recv(&sizeMazo, 1, MPI_INT, repartidor, 0, juego_comm, MPI_STATUS_IGNORE); recibirMazo(mazo, repartidor, juego_comm, N_CARTAS_MAZO, MPI_STATUS_IGNORE); printf("[maestro] tamaño del mazo: %d\n", sizeMazo); MPI_Bcast(&sizeMazo, 1, MPI_INT, MPI_ROOT, juego_comm); //envío del tamaño del mazo a resto de procesos int siguienteJugador = add_mod(repartidor, 1, 4); jugadorMano = 99; int turno = 0; int turnoDescartes = 1; int bufferSnd[3] = {99, siguienteJugador, turno}; int bufferRcv[3] = {99, siguienteJugador, turno}; int descarte = 99; int contador = 0; // si jugamos con humano, hay que recibir su mano while (jugadorMano == 99) { if ((turno % 4) == 0 && (turno != 0) && (turnoDescartes == 1)) { //turno de descartes // recibe identificador de carta a descartar int descarteHumano = 99; for (i = 0; i < N_CARTAS_MANO; i++) { if ((modo == 'I' || modo == 'i') && (siguienteJugador == jugadorHumano)) { printf("¿Desea descartar %s de %s? (S/N)\n", manoJugadorHumano[i].cara, manoJugadorHumano[i].palo); char c; scanf(" %c", &c); if (c == 'S' || c == 's') { descarteHumano = 1; } else { descarteHumano = 0; } MPI_Send(&descarteHumano, 1, MPI_INT, jugadorHumano, 0, juego_comm); } MPI_Recv(&descarte, 1, MPI_INT, siguienteJugador, 0, juego_comm, MPI_STATUS_IGNORE); if (descarte != 99 && descarte != 98) { marcarDescarte(mazo, N_CARTAS_MAZO, descarte); repartirCarta(mazo[N_CARTAS_MAZO - sizeMazo], siguienteJugador, juego_comm); mazo[N_CARTAS_MAZO - sizeMazo].estado = 1; sizeMazo--; } } MPI_Recv(&bufferRcv[1], 1, MPI_INT, siguienteJugador, 0, juego_comm, MPI_STATUS_IGNORE); siguienteJugador = bufferRcv[1]; contador++; if (contador == N_JUGADORES) { turnoDescartes = 0; contador = 0; } MPI_Bcast(&turnoDescartes, 1, MPI_INT, MPI_ROOT, juego_comm); } else { // mus corrido turnoDescartes = 1; MPI_Bcast(&turnoDescartes, 1, MPI_INT, MPI_ROOT, juego_comm); int mus = 99; if ((modo == 'I' || modo == 'i') && siguienteJugador == jugadorHumano) { char c = 'Z'; printf("[maestro] Mano actual del jugador %d\n", jugadorHumano); recibirMazo(manoJugadorHumano, jugadorHumano, juego_comm, N_CARTAS_MANO, MPI_STATUS_IGNORE); printMazo(manoJugadorHumano, N_CARTAS_MANO); printf("¿Hay mus? (S:mus, N:no mus):\n"); fflush(stdout); scanf(" %c", &c); getchar(); if (c == 'S' || c == 's') { mus = 0; } else { mus = 1; } fflush(stdout); fflush(stdin); MPI_Send(&mus, 1, MPI_INT, jugadorHumano, 0, juego_comm); }; MPI_Recv(&bufferRcv[0], 1, MPI_INT, siguienteJugador, 0, juego_comm, MPI_STATUS_IGNORE); MPI_Recv(&bufferRcv[1], 1, MPI_INT, siguienteJugador, 0, juego_comm, MPI_STATUS_IGNORE); MPI_Recv(&bufferRcv[2], 1, MPI_INT, siguienteJugador, 0, juego_comm, MPI_STATUS_IGNORE); jugadorMano = bufferRcv[0]; if (jugadorMano != 99) { printf("[maestro] Mus cortado por jugador: %d\n", jugadorMano); } turno++; if (jugadorMano == 99 || jugadorMano == 0 || jugadorMano == 1 || jugadorMano == 2 || jugadorMano == 3) { bufferSnd[0] = jugadorMano; } else { jugadorMano = 99; } if (bufferRcv[1] == 0 || bufferRcv[1] == 1 || bufferRcv[1] == 2 || bufferRcv[1] == 3) { siguienteJugador = bufferRcv[1]; bufferSnd[1] = siguienteJugador; } else { siguienteJugador = add_mod(siguienteJugador, 1, 4); bufferSnd[1] = siguienteJugador; } if (jugadorMano != 99) { siguienteJugador = jugadorMano; bufferSnd[1] = jugadorMano; } bufferSnd[2] = turno; MPI_Bcast(&bufferSnd, 3, MPI_INT, MPI_ROOT, juego_comm); } MPI_Bcast(&siguienteJugador, 1, MPI_INT, MPI_ROOT, juego_comm); } printf("[maestro] La mano es: %d\n", jugadorMano); int conteos[10]; int paresBuf[25]; int juegoBuf[5]; for (i = 0; i < 10; i++) { conteos[i] = 0; } int rbuf[50]; int rbufInv[50]; int envite[2]; int enviteAnterior[2]; envite[0] = 0; envite[1] = 0; enviteAnterior[0] = 0; enviteAnterior[1] = 0; int envites[10]; int enviteContraria[2]; enviteContraria[0] = 99; enviteContraria[1] = 0; int lances[N_LANCES]; int tienenPares[N_JUGADORES + 1]; int tienenJuego[N_JUGADORES + 1]; int hayPares = 0; int hayJuego = 0; //int pareja; //1 es pareja mano, 0 es pareja postre int j = 0; for (j = 0; j < N_LANCES + 1; j++) { /* envites */ //automático if (j == 2) { // ver si hay pares // recupera pares de los jugadores MPI_Gather(conteos, 1, MPI_INT, tienenPares, 1, MPI_INT, MPI_ROOT, juego_comm); // comprueba que alguno es distinto de cero for (i = 0; i < N_JUGADORES; i++) { if (tienenPares[i] != 0) { hayPares = 1; } } printf("HAY PARES: %d\n", hayPares); } if (j == 3) { // ver si hay juego // recupera pares de los jugadores MPI_Gather(conteos, 1, MPI_INT, tienenJuego, 1, MPI_INT, MPI_ROOT, juego_comm); // comprueba que alguno es distinto de cero for (i = 0; i < N_JUGADORES; i++) { if (tienenJuego[i] != 0) { hayJuego = 1; } } printf("HAY JUEGO: %d\n", hayJuego); MPI_Bcast(&hayJuego, 1, MPI_INT, MPI_ROOT, juego_comm); } // recibir envite de la mano if ((j == 0) || (j == 1) || ((j == 2) && (hayPares == 1)) || ((j == 3) && (hayJuego == 1)) || ((j == 4) && (hayJuego == 0))) { printf("[maestro] Se juega este lance\n"); if ((modo == 'I' || modo == 'i') && (siguienteJugador == jugadorHumano) && (jugadorHumano == jugadorMano)) { int e = 0; printf("[maestro] Mano actual del jugador %d\n", jugadorHumano); recibirMazo(manoJugadorHumano, jugadorHumano, juego_comm, N_CARTAS_MANO, MPI_STATUS_IGNORE); printMazo(manoJugadorHumano, N_CARTAS_MANO); printf("Introduzca envite a %s: (0:no, 2: sí, >2: más)\n", lancesEtiquetas[j]); fflush(stdout); scanf(" %d", &e); getchar(); fflush(stdout); fflush(stdin); MPI_Send(&e, 1, MPI_INT, jugadorHumano, 0, juego_comm); } } MPI_Recv(&envite, 2, MPI_INT, jugadorMano, 0, juego_comm, MPI_STATUS_IGNORE); printf("[maestro]: Lance %d\n", j); enviteAnterior[0] = envite[0]; enviteAnterior[1] = envite[1]; printf("[maestro]: Envite de la mano: %d\n", envite[0]); // enviar envite a todos los jugadores MPI_Bcast(&envite, 2, MPI_INT, MPI_ROOT, juego_comm); if ((modo == 'I' || modo == 'i') && jugadorHumano != jugadorMano) { int e = 0; int humanoTienePares = 0; MPI_Recv(&humanoTienePares, 1, MPI_INT, jugadorHumano, 0, juego_comm, MPI_STATUS_IGNORE); if (humanoTienePares != 0) { printf("[maestro] Mano actual del jugador %d\n", jugadorHumano); recibirMazo(manoJugadorHumano, jugadorHumano, juego_comm, N_CARTAS_MANO, MPI_STATUS_IGNORE); printMazo(manoJugadorHumano, N_CARTAS_MANO); printf("Introduzca envite a %s: (0:no, 2: sí, >2: más)\n", lancesEtiquetas[j]); fflush(stdout); scanf(" %d", &e); getchar(); fflush(stdout); fflush(stdin); MPI_Send(&e, 1, MPI_INT, jugadorHumano, 0, juego_comm); } } //rondas de envites hasta que se igualen o no se acepten // se garantiza que no va a haber repeticiones porque se van a rajar // recibir respuesta de todos los jugadores: de la pareja contraria, prima la más alta MPI_Gather(conteos, 10, MPI_INT, envites, 2, MPI_INT, MPI_ROOT, juego_comm); printf("[maestro] recibe envites\n"); if (ocurrenciasArray(envites, 8, 99) == 1) { ordago = 1; printf("¡¡¡ÓRDAGO!!!\n"); MPI_Bcast(&ordago, 1, MPI_INT, MPI_ROOT, juego_comm); break; }//ordago MPI_Bcast(&ordago, 1, MPI_INT, MPI_ROOT, juego_comm); apuestas[j] = calcularEnvite(envites, enviteAnterior, jugadorMano, piedras); printf("PIEDRAS MANO: %d\n", piedras[1]); printf("PIEDRAS POSTRE: %d\n", piedras[0]); // enviar respuesta de pareja contraria a todos los jugadores MPI_Bcast(&enviteContraria, 2, MPI_INT, MPI_ROOT, juego_comm); if (j == 3 && hayJuego == 1) { break; //no se juega al punto } } // } // almacenar envites /* Recepción de datos para evaluar las manos de los jugadores */ MPI_Gather(conteos, 10, MPI_INT, rbuf, 10, MPI_INT, MPI_ROOT, juego_comm); MPI_Gather(conteos, 10, MPI_INT, rbufInv, 10, MPI_INT, MPI_ROOT, juego_comm); MPI_Gather(conteos, 5, MPI_INT, paresBuf, 5, MPI_INT, MPI_ROOT, juego_comm); MPI_Gather(conteos, 1, MPI_INT, juegoBuf, 1, MPI_INT, MPI_ROOT, juego_comm); /*cálculo de manos*/ lances[0] = calculaGrande(rbuf, jugadorMano); lances[1] = calculaChica(rbufInv); lances[2] = calcularPares(paresBuf, jugadorMano); lances[3] = calcularJuego(juegoBuf, jugadorMano); printf("Mejor mano a grande: jugador %d\n", lances[0]); printf("Mejor mano a chica: jugador %d\n", lances[1]); printf("Mejor mano a pares: jugador %d\n", lances[2]); printf("Mejor mano a juego: jugador %d\n", lances[3]); if (ordago == 1) { printf("Ganador del lance %s y juego: jugador %d\n ", lancesEtiquetas[j], lances[j]); } /* for (i = 0; i < N_JUGADORES; i++) { if (paresBuf[5 * i] != 99) { printf("[jugador %d] Duples de la misma carta: %s\n", i, caras[paresBuf[5 * i]]); } else if (paresBuf[5 * i + 2] == 2) { printf("[jugador %d] DUPLES PAREJAS DE %s Y %s\n", i, caras[paresBuf[5 * i + 3]], caras[paresBuf[5 * i + 4]]); } else if (paresBuf[5 * i + 1] != 99) { printf("[jugador %d] MEDIAS DE: %s\n", i, caras[paresBuf[5 * i + 1]]); } else if (paresBuf[5 * i + 2] == 1) { printf("[jugador %d] PAREJA DE %s\n", i, caras[paresBuf[5 * i + 3]]); } } */ for (i = 0; i < N_JUGADORES; i++) { printf("JUEGO DE JUGADOR %d: %d\n", i, juegoBuf[i]); } for (i = 0; i <= N_LANCES; i++) { printf("APUESTA LANCE %d: %d\n", i, apuestas[i]); if (apuestas[i] != 0) { piedras[queParejaSoy(lances[i], jugadorMano)] += apuestas[i]; } } printf("PIEDRAS MANO: %d\n", piedras[1]); printf("PIEDRAS POSTRE: %d\n", piedras[0]); if (enQueParejaEstoy(jugadorMano) == 1) { pareja1[2] = piedras[1]; pareja2[2] = piedras[0]; } else { pareja1[2] = piedras[0]; pareja2[2] = piedras[1]; } pareja1[3] = pareja1[2] / N_PUNTOS_JUEGO; pareja2[3] = pareja2[2] / N_PUNTOS_JUEGO; pareja1[4] = pareja1[3] / N_JUEGOS_VACA; pareja2[4] = pareja2[3] / N_JUEGOS_VACA; pareja1[5] = pareja1[4] / N_VACAS_PARTIDA; pareja2[5] = pareja2[4] / N_VACAS_PARTIDA; recibirMazo(mano0, 0, juego_comm, N_CARTAS_MANO, MPI_STATUS_IGNORE); recibirMazo(mano1, 1, juego_comm, N_CARTAS_MANO, MPI_STATUS_IGNORE); recibirMazo(mano2, 2, juego_comm, N_CARTAS_MANO, MPI_STATUS_IGNORE); recibirMazo(mano3, 3, juego_comm, N_CARTAS_MANO, MPI_STATUS_IGNORE); printf("MANO DEL JUGADOR 0:\n"); printMazo(mano0, N_CARTAS_MANO); printf("MANO DEL JUGADOR 1:\n"); printMazo(mano1, N_CARTAS_MANO); printf("MANO DEL JUGADOR 2:\n"); printMazo(mano2, N_CARTAS_MANO); printf("MANO DEL JUGADOR 3:\n"); printMazo(mano3, N_CARTAS_MANO); printf("PUNTUACIONES:\n"); printf("PAREJA 1\n"); printf("RONDA: %d\n", pareja1[2]); printf("JUEGO: %d\n", pareja1[3]); printf("VACA: %d\n", pareja1[4]); printf("PARTIDA: %d\n", pareja1[5]); printf("PAREJA 2\n"); printf("RONDA: %d\n", pareja2[2]); printf("JUEGO: %d\n", pareja2[3]); printf("VACA: %d\n", pareja2[4]); printf("PARTIDA: %d\n", pareja2[5]); /**************************************************************/ /* Terminan rondas /**************************************************************/ repartidor = jugadorMano; ronda++; if ((pareja1[5] == 1) || pareja2[5] == 1) { // fin de partida int finPartida = 1; MPI_Bcast(&finPartida, 1, MPI_INT, MPI_ROOT, juego_comm); } else { //sigue partida int finPartida = 0; MPI_Bcast(&finPartida, 1, MPI_INT, MPI_ROOT, juego_comm); } } MPI_Comm_disconnect(&juego_comm); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int my_id; /* process id */ int p; /* number of processes */ char* message_s, *message_r; /* storage for the message */ int i, max_msgs, msg_size; MPI_Status status; /* return status for receive */ double elapsed_time_sec; double bandwidth; double startTime = 0; MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &my_id ); MPI_Comm_size( MPI_COMM_WORLD, &p ); if ((sscanf (argv[1], "%d", &max_msgs) < 1) || (sscanf (argv[2], "%d", &msg_size) < 1)) { fprintf (stderr, "need msg count and msg size as params\n"); goto EXIT; } message_s = (char*)malloc (msg_size); message_r = (char*)malloc (msg_size); /* don't start timer until everybody is ok */ MPI_Barrier(MPI_COMM_WORLD); if( my_id < p/2 ) { startTime = MPI_Wtime(); for(i=0; i<max_msgs; i++){ MPI_Send(message_s, msg_size, MPI_CHAR, my_id+p/2, 0, MPI_COMM_WORLD); MPI_Recv(message_r, msg_size, MPI_CHAR, my_id+p/2, 0, MPI_COMM_WORLD, &status); } MPI_Barrier(MPI_COMM_WORLD); elapsed_time_sec = MPI_Wtime() - startTime; fprintf(stdout, "Totaltime: %8.3f s\n",elapsed_time_sec); elapsed_time_sec /= 2; /* We want the ping performance not round-trip. */ elapsed_time_sec /= max_msgs; /* time for each message */ bandwidth = msg_size / elapsed_time_sec; /* bandwidth */ fprintf (stdout, "%5d %7d\t ", max_msgs, msg_size); fprintf (stdout,"%8.3f us\t %8.3f MB/sec\n", elapsed_time_sec * 1e6, bandwidth / 1e6); } else { for(i=0; i<max_msgs; i++){ MPI_Recv(message_r, msg_size, MPI_CHAR, my_id-p/2, 0, MPI_COMM_WORLD, &status); MPI_Send(message_s, msg_size, MPI_CHAR, my_id-p/2, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } free(message_s); free(message_r); EXIT: MPI_Finalize(); return 0; }