void exec_fish(int map_lock, char *river) { char *fish_line = &river[(RIVER_HEIGHT -1) * RIVER_WIDTH]; int pos; pos = RIVER_WIDTH / 2; lock_map(map_lock); SETFISHBIT(fish_line[pos]); unlock_map(map_lock); while (1) { usleep(FISH_CYCLE); lock_map(map_lock); move_fish(river, &pos, fish_line); unlock_map(map_lock); } }
int main(int argc, char **argv) { double sum_total_timer, total_timer = 0.0; double sum_gather_timer, gather_timer = 0.0; double sum_mpi_timer, mpi_timer = 0.0; double curr_time; double output_time; double dt = 0.0; double local_max_norm = 0.1; double max_norm = 0; int steps; int* fish_off; int* n_fish_split; MPI_Init (&argc, &argv); #ifdef TRACE_WITH_VAMPIR VT_symdef(TRACE_LOCAL_COMP, "Local computation", "Computation"); VT_symdef(TRACE_FISH_GATHER, "Gathering to 0", "Communication"); VT_symdef(TRACE_MAX_NORM, "Collecting max norm", "Communication"); VT_symdef(TRACE_OUTPUT, "Output", "Output"); #endif MPI_Comm_size (comm, &n_proc); MPI_Comm_rank (comm, &rank); make_fishtype (&fishtype); get_options(argc, argv); srand48(clock()); //MPI_Allreduce (&local_max_norm, &max_norm, 1, MPI_DOUBLE, MPI_MAX, comm); //printf("local_max_norm = %g, max_norm = %g\n", local_max_norm, max_norm); #ifdef TRACE_WITH_VAMPIR VT_traceoff(); #endif if (output_filename) { outputp = 1; if (0 == rank) { output_fp = fopen(output_filename, "w"); if (output_fp == NULL) { printf("Could not open %s for output\n", output_filename); exit(1); } fprintf(output_fp, "n_fish: %d\n", n_fish); } } fish_off = malloc ( (n_proc+1) * sizeof(int) ); n_fish_split = malloc ( (n_proc) * sizeof(int) ); //split each fish to different processors. //fish_off: offset index of the fish in that processor //n_fish_split is the # of fish in each processor //ALL FUNCTIONALITY OF split_fish SHOULD BE DONE AFTER init_fish //split_fish (n_proc, fish_off, n_fish_split); //n_local_fish = n_fish_split[rank]; /* All fish are generated on proc 0 to ensure same random numbers. (Yes, the circle case could be parallelized. Feel free to do it.) */ //split physical box sizes row = (int)sqrt((double)n_proc); column = n_proc/row; double rowSep = WALL_SEP/row; double columnSep = WALL_SEP/column; int rowIndex = rank / column; int columnIndex = rank % column; topBound = rowSep * rowIndex; bottomBound = topBound + rowSep; leftBound = columnSep * columnIndex; rightBound = leftBound + columnSep; assert(n_proc % row == 0); // Add n_proc # of arrays each holding ID of local fishes fish_t fishProc[n_proc][n_fish]; int n_fish_proc[n_proc]; int k; for (k = 0; k < n_proc; k++) n_fish_proc[k] = 0; ////////////////////////////////// init_fish (rank, fish_off, n_fish_split, row, column, fishProc, n_fish_proc); // distribute initial conditions to all processes if (rank == 0) { local_fish = fishProc[0]; n_local_fish = n_fish_proc[0]; // Functionality of MPI_Scatterv is done here with Isends //MPI_Request request[n_proc-1]; int mesTag = 0; MPI_Request *req; for (k = 1; k < n_proc; ++k) { //printf("n_fish_proc[%d], %d\n", k, n_fish_proc[k]); MPI_Isend(fishProc[k], n_fish_proc[k], fishtype, k, mesTag, comm, req); } } else { MPI_Status status; // Processors of rank != 0 receives. MPI_Recv( local_fish, n_fish, fishtype, 0, MPI_ANY_TAG, comm, &status); MPI_Get_count(&status, fishtype, &n_local_fish); } printf("rank[%d], n_local_fish = %d\n", rank, n_local_fish); ///* //MPI_Scatterv (fish, n_fish_split, fish_off, fishtype, // local_fish, n_local_fish, fishtype, // 0, comm); //*/ #ifdef TRACE_WITH_VAMPIR tracingp = 1; VT_traceon(); #endif start_mpi_timer(&total_timer); for (output_time = 0.0, curr_time = 0.0, steps = 0; curr_time <= end_time && steps < max_steps; curr_time += dt, ++steps) { #ifdef TRACE_WITH_VAMPIR if (steps >= STEPS_TO_TRACE) { tracingp = 0; VT_traceoff(); } #endif trace_begin(TRACE_FISH_GATHER); start_mpi_timer (&gather_timer); start_mpi_timer (&mpi_timer); /* Pull in all the fish. Obviously, this is not a good idea. You will be greatly expanding this one line... However, feel free to waste memory when producing output. If you're dumping fish to a file, go ahead and do an Allgatherv _in the output steps_ if you want. Or you could pipeline dumping the fish. MPI_Allgatherv (local_fish, n_local_fish, fishtype, fish, n_fish_split, fish_off, fishtype, comm); */ //MPI_Request* sendReq, recvReq; // Set aside buffer for fish received from other processes. /* for (j = 0; j < NUM_NEIGHBOR; ++j) { //FIXME: which neighbors does not exist? if (rankNeighbor[j] >= 0) { MPI_Isend(local_fish, n_local_fish, fishtype, rankNeighbor[j], MPI_ANY_TAG, comm, &sendReqArray); MPI_Irecv(impact_fish, n_fish, fishtype, rankNeighbor[NUM_NEIGHBOR - j], MPI_ANY_TAG, comm, &sendReqArray); MPI_Wait(recvReq, MPI_STATUS_IGNORE); interact_fish_mpi(local_fish, n_local_fish, impact_fish, sizeof(impact_fish)); } } */ // get migrate fish // send migrate fish // receive migrate fish // update local fish // get impact fish // send impact fish // receive impact fish // interact impact fish // interact local fish // move MPI_Request sendReqArray[NUM_NEIGHBOR]; MPI_Request recvReqArray[NUM_NEIGHBOR]; fish_t receive_impact_fish[NUM_NEIGHBOR][n_fish]; int n_receive_impact_fish[NUM_NEIGHBOR]; fish_t receive_migrate_fish[NUM_NEIGHBOR][n_fish]; int n_receive_migrate_fish[NUM_NEIGHBOR]; int n_send_impact_fish[NUM_NEIGHBOR]; fish_t* send_impact_fish[NUM_NEIGHBOR]; int n_send_migrate_fish[NUM_NEIGHBOR]; fish_t* send_migrate_fish[NUM_NEIGHBOR]; get_interacting_fish( local_fish, n_local_fish, send_migrate_fish, n_send_migrate_fish, 1); int tmp; for (tmp = 0; tmp < NUM_NEIGHBOR; tmp++) { printf("rank[%d], iter[%d] ------- get [%d] migrate fish for neig[%d]. \n", rank, iter, n_send_migrate_fish[tmp], tmp); } Isend_receive_fish(send_migrate_fish, n_send_migrate_fish, receive_migrate_fish, n_fish, sendReqArray, recvReqArray); wait_for_fish(recvReqArray, n_receive_migrate_fish); // FIXME: Have not implement update on local fish. //update_local_fish(); get_interacting_fish(local_fish, n_local_fish, send_impact_fish, n_send_impact_fish, 0); for (tmp = 0; tmp < NUM_NEIGHBOR; tmp++) { printf("rank[%d], iter[%d] ------- get [%d] impact fish for neig[%d]. \n", rank, iter, n_send_impact_fish[tmp], tmp); } Isend_receive_fish(send_impact_fish, n_send_impact_fish, receive_impact_fish, n_fish, sendReqArray, recvReqArray); wait_for_fish(recvReqArray, n_receive_impact_fish); int index; for (index = 0; index < NUM_NEIGHBOR; index++) { if (n_receive_impact_fish[index] > 0) { interact_fish_mpi(local_fish, n_local_fish, receive_impact_fish[index], n_receive_impact_fish[index]); } } //*/ // make sure we are sending and receiving the same # msg. //assert(dbg == 0); // While waiting, interact with fish in its own pocket first printf("rank[%d], iter[%d] ------- interact [%d] local fishes\n", rank, iter, n_local_fish); interact_fish_mpi(local_fish, n_local_fish, local_fish, n_local_fish); printf("rank[%d], iter[%d] ------- finished interact local fish\n", rank, iter); stop_mpi_timer (&gather_timer); stop_mpi_timer (&mpi_timer); trace_end(TRACE_FISH_GATHER); /* We only output once every output_interval time unit, at most. Without that restriction, we can easily create a huge output file. Printing a record for ten fish takes about 300 bytes, so for every 1000 steps, we could dump 300K of info. Now scale the number of fish by 1000... */ trace_begin(TRACE_OUTPUT); if (outputp && curr_time >= output_time) { if (0 == rank) output_fish (output_fp, curr_time, dt, fish, n_fish); output_time = curr_time + output_interval; } trace_end(TRACE_OUTPUT); trace_begin (TRACE_LOCAL_COMP); //interact_fish (local_fish, n_local_fish, fish, n_fish); local_max_norm = compute_norm (local_fish, n_local_fish); trace_end (TRACE_LOCAL_COMP); trace_begin (TRACE_MAX_NORM); start_mpi_timer (&mpi_timer); printf("rank[%d], iter[%d] ------- Allreduce max_norm, \n", rank, iter); MPI_Allreduce (&local_max_norm, &max_norm, 1, MPI_DOUBLE, MPI_MAX, comm); printf("rank[%d], iter[%d] ------- local_max_norm: %g, max_norm: %g\n", local_max_norm, max_norm); stop_mpi_timer (&mpi_timer); trace_end (TRACE_MAX_NORM); trace_begin (TRACE_LOCAL_COMP); dt = max_norm_change / max_norm; dt = f_max(dt, min_dt); dt = f_min(dt, max_dt); printf("rank[%d], iter[%d] ------- moving [%d] local_fish, \n", rank, iter, n_local_fish); move_fish(local_fish, n_local_fish, dt); printf("rank[%d], iter[%d] ------- finished moving.\n", rank, iter); trace_end (TRACE_LOCAL_COMP); iter++; } stop_mpi_timer(&total_timer); #ifdef TRACE_WITH_VAMPIR VT_traceoff(); #endif if (outputp) { MPI_Allgatherv (local_fish, n_local_fish, fishtype, fish, n_fish_split, fish_off, fishtype, comm); if (0 == rank) { output_fish (output_fp, curr_time, dt, fish, n_fish); printf("\tEnded at %g (%g), %d (%d) steps\n", curr_time, end_time, steps, max_steps); } } printf("rank[%d], ------- 39, \n", rank); MPI_Reduce (&total_timer, &sum_total_timer, 1, MPI_DOUBLE, MPI_SUM, 0, comm); printf("rank[%d], ------- 40, \n", rank); MPI_Reduce (&gather_timer, &sum_gather_timer, 1, MPI_DOUBLE, MPI_SUM, 0, comm); printf("rank[%d], ------- 41, \n", rank); MPI_Reduce (&mpi_timer, &sum_mpi_timer, 1, MPI_DOUBLE, MPI_SUM, 0, comm); printf("rank[%d], ------- 42, \n", rank); if (0 == rank) { printf("Number of PEs: %d\n" "Time taken on 0: %g (avg. %g)\n" "Time in gathers on 0: %g (avg %g)\n" "Time in MPI on 0: %g (avg %g)\n", n_proc, total_timer, sum_total_timer / n_proc, gather_timer, sum_gather_timer / n_proc, mpi_timer, sum_mpi_timer / n_proc); } printf("rank[%d], ------- 43, \n", rank); MPI_Barrier (comm); printf("rank[%d], ------- 44, \n", rank); MPI_Finalize (); printf("rank[%d], ------- done!!, \n", rank); return 0; }