void create_seq( int myId ) { double seed = 314159265.00; double a = 1220703125.00; double x, mySeed; INT_TYPE i, k, chunk; int ini, fim; chunk = (NUM_KEYS + NUM_THREADS - 1) / NUM_THREADS; ini = chunk * myId; fim = ini + chunk; if ( fim > NUM_KEYS ) { fim = NUM_KEYS; } mySeed = find_my_seed( myId, NUM_THREADS, (long)4*NUM_KEYS, seed, a ); k = MAX_KEY/4; for (i = ini; i < fim; i++) { x = randlc(&mySeed, &a); x += randlc(&mySeed, &a); x += randlc(&mySeed, &a); x += randlc(&mySeed, &a); key_array[i] = k*x; } }
void create_seq( double seed, double a ) { double x, s; INT_TYPE i, k; #pragma omp parallel private(x,s,i,k) { INT_TYPE k1, k2; double an = a; int myid, num_procs; INT_TYPE mq; #ifdef _OPENMP myid = omp_get_thread_num(); num_procs = omp_get_num_threads(); #else myid = 0; num_procs = 1; #endif mq = (NUM_KEYS + num_procs - 1) / num_procs; k1 = mq * myid; k2 = k1 + mq; if ( k2 > NUM_KEYS ) k2 = NUM_KEYS; KS = 0; s = find_my_seed( myid, num_procs, (long)4*NUM_KEYS, seed, an ); k = MAX_KEY/4; for (i=k1; i<k2; i++) { x = randlc(&s, &an); x += randlc(&s, &an); x += randlc(&s, &an); x += randlc(&s, &an); key_array[i] = k*x; } } /*omp parallel*/ }
int main( int argc, char **argv ) { int i, iteration, itemp; double timecounter, maxtime; /* Initialize MPI */ MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); /* Initialize the verification arrays if a valid class */ for( i=0; i<TEST_ARRAY_SIZE; i++ ) switch( CLASS ) { case 'S': test_index_array[i] = S_test_index_array[i]; test_rank_array[i] = S_test_rank_array[i]; break; case 'A': test_index_array[i] = A_test_index_array[i]; test_rank_array[i] = A_test_rank_array[i]; break; case 'W': test_index_array[i] = W_test_index_array[i]; test_rank_array[i] = W_test_rank_array[i]; break; case 'B': test_index_array[i] = B_test_index_array[i]; test_rank_array[i] = B_test_rank_array[i]; break; case 'C': test_index_array[i] = C_test_index_array[i]; test_rank_array[i] = C_test_rank_array[i]; break; case 'D': test_index_array[i] = D_test_index_array[i]; test_rank_array[i] = D_test_rank_array[i]; break; }; /* Printout initial NPB info */ if( my_rank == 0 ) { FILE *fp; printf( "\n\n NAS Parallel Benchmarks 3.3 -- IS Benchmark\n\n" ); printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS*MIN_PROCS, CLASS ); printf( " Iterations: %d\n", MAX_ITERATIONS ); printf( " Number of processes: %d\n", comm_size ); fp = fopen("timer.flag", "r"); timeron = 0; if (fp) { timeron = 1; fclose(fp); } } /* Check that actual and compiled number of processors agree */ if( comm_size != NUM_PROCS ) { if( my_rank == 0 ) printf( "\n ERROR: compiled for %d processes\n" " Number of active processes: %d\n" " Exiting program!\n\n", NUM_PROCS, comm_size ); MPI_Finalize(); exit( 1 ); } /* Check to see whether total number of processes is within bounds. This could in principle be checked in setparams.c, but it is more convenient to do it here */ if( comm_size < MIN_PROCS || comm_size > MAX_PROCS) { if( my_rank == 0 ) printf( "\n ERROR: number of processes %d not within range %d-%d" "\n Exiting program!\n\n", comm_size, MIN_PROCS, MAX_PROCS); MPI_Finalize(); exit( 1 ); } MPI_Bcast(&timeron, 1, MPI_INT, 0, MPI_COMM_WORLD); #ifdef TIMING_ENABLED for( i=1; i<=T_LAST; i++ ) timer_clear( i ); #endif /* Generate random number sequence and subsequent keys on all procs */ create_seq( find_my_seed( my_rank, comm_size, 4*(long)TOTAL_KEYS*MIN_PROCS, 314159265.00, /* Random number gen seed */ 1220703125.00 ), /* Random number gen mult */ 1220703125.00 ); /* Random number gen mult */ /* Do one interation for free (i.e., untimed) to guarantee initialization of all data and code pages and respective tables */ rank( 1 ); /* Start verification counter */ passed_verification = 0; if( my_rank == 0 && CLASS != 'S' ) printf( "\n iteration\n" ); /* Initialize timer */ timer_clear( 0 ); /* Initialize separate communication, computation timing */ #ifdef TIMING_ENABLED for( i=1; i<=T_LAST; i++ ) timer_clear( i ); #endif /* Start timer */ timer_start( 0 ); /* This is the main iteration */ for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { if( my_rank == 0 && CLASS != 'S' ) printf( " %d\n", iteration ); rank( iteration ); } /* Stop timer, obtain time for processors */ timer_stop( 0 ); timecounter = timer_read( 0 ); /* End of timing, obtain maximum time of all processors */ MPI_Reduce( &timecounter, &maxtime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); /* This tests that keys are in sequence: sorting of last ranked key seq occurs here, but is an untimed operation */ full_verify(); /* Obtain verification counter sum */ itemp = passed_verification; MPI_Reduce( &itemp, &passed_verification, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD ); /* The final printout */ if( my_rank == 0 ) { if( passed_verification != 5*MAX_ITERATIONS + comm_size ) passed_verification = 0; c_print_results( "IS", CLASS, (int)(TOTAL_KEYS), MIN_PROCS, 0, MAX_ITERATIONS, NUM_PROCS, comm_size, maxtime, ((double) (MAX_ITERATIONS)*TOTAL_KEYS*MIN_PROCS) /maxtime/1000000., "keys ranked", passed_verification, NPBVERSION, COMPILETIME, MPICC, CLINK, CMPI_LIB, CMPI_INC, CFLAGS, CLINKFLAGS ); } #ifdef TIMING_ENABLED if (timeron) { double t1[T_LAST+1], tmin[T_LAST+1], tsum[T_LAST+1], tmax[T_LAST+1]; char t_recs[T_LAST+1][9]; for( i=0; i<=T_LAST; i++ ) t1[i] = timer_read( i ); MPI_Reduce( t1, tmin, T_LAST+1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD ); MPI_Reduce( t1, tsum, T_LAST+1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD ); MPI_Reduce( t1, tmax, T_LAST+1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); if( my_rank == 0 ) { strcpy( t_recs[T_TOTAL], "total" ); strcpy( t_recs[T_RANK], "rcomp" ); strcpy( t_recs[T_RCOMM], "rcomm" ); strcpy( t_recs[T_VERIFY], "verify"); printf( " nprocs = %6d ", comm_size); printf( " minimum maximum average\n" ); for( i=0; i<=T_LAST; i++ ) { printf( " timer %2d (%-8s): %10.4f %10.4f %10.4f\n", i+1, t_recs[i], tmin[i], tmax[i], tsum[i]/((double) comm_size) ); } printf( "\n" ); } } #endif MPI_Finalize(); return 0; /**************************/ } /* E N D P R O G R A M */