int main( int argc, char **argv ) { int i, iteration, timer_on; double timecounter; FILE *fp; /* Initialize timers */ timer_on = 0; if ((fp = fopen("timer.flag", "r")) != NULL) { fclose(fp); timer_on = 1; } timer_clear( 0 ); if (timer_on) { timer_clear( 1 ); timer_clear( 2 ); timer_clear( 3 ); } if (timer_on) timer_start( 3 ); /* Initialize the verification arrays if a valid class */ for( i=0; i<TEST_ARRAY_SIZE; i++ ) switch( CLASS ) { case 'S': test_index_array[i] = S_test_index_array[i]; test_rank_array[i] = S_test_rank_array[i]; break; case 'A': test_index_array[i] = A_test_index_array[i]; test_rank_array[i] = A_test_rank_array[i]; break; case 'W': test_index_array[i] = W_test_index_array[i]; test_rank_array[i] = W_test_rank_array[i]; break; case 'B': test_index_array[i] = B_test_index_array[i]; test_rank_array[i] = B_test_rank_array[i]; break; case 'C': test_index_array[i] = C_test_index_array[i]; test_rank_array[i] = C_test_rank_array[i]; break; case 'D': test_index_array[i] = D_test_index_array[i]; test_rank_array[i] = D_test_rank_array[i]; break; }; /* Printout initial NPB info */ printf ( "\n\n NAS Parallel Benchmarks (NPB3.3-OMP) - IS Benchmark\n\n" ); printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS, CLASS ); printf( " Iterations: %d\n", MAX_ITERATIONS ); #ifdef _OPENMP printf( " Number of available threads: %d\n", omp_get_max_threads() ); #endif printf( "\n" ); if (timer_on) timer_start( 1 ); /* Generate random number sequence and subsequent keys on all procs */ create_seq( 314159265.00, /* Random number gen seed */ 1220703125.00 ); /* Random number gen mult */ alloc_key_buff(); if (timer_on) timer_stop( 1 ); /* Do one interation for free (i.e., untimed) to guarantee initialization of all data and code pages and respective tables */ rank( 1 ); /* Start verification counter */ passed_verification = 0; if( CLASS != 'S' ) printf( "\n iteration\n" ); /* Start timer */ timer_start( 0 ); /* This is the main iteration */ for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { if( CLASS != 'S' ) printf( " %d\n", iteration ); rank( iteration ); } /* End of timing, obtain maximum time of all processors */ timer_stop( 0 ); timecounter = timer_read( 0 ); /* This tests that keys are in sequence: sorting of last ranked key seq occurs here, but is an untimed operation */ if (timer_on) timer_start( 2 ); full_verify(); if (timer_on) timer_stop( 2 ); if (timer_on) timer_stop( 3 ); /* The final printout */ if( passed_verification != 5*MAX_ITERATIONS + 1 ) passed_verification = 0; c_print_results( "IS", CLASS, (int)(TOTAL_KEYS/64), 64, 0, MAX_ITERATIONS, timecounter, ((double) (MAX_ITERATIONS*TOTAL_KEYS)) /timecounter/1000000., "keys ranked", passed_verification, NPBVERSION, COMPILETIME, CC, CLINK, C_LIB, C_INC, CFLAGS, CLINKFLAGS ); /* Print additional timers */ if (timer_on) { double t_total, t_percent; t_total = timer_read( 3 ); printf("\nAdditional timers -\n"); printf(" Total execution: %8.3f\n", t_total); if (t_total == 0.0) t_total = 1.0; timecounter = timer_read(1); t_percent = timecounter/t_total * 100.; printf(" Initialization : %8.3f (%5.2f%%)\n", timecounter, t_percent); timecounter = timer_read(0); t_percent = timecounter/t_total * 100.; printf(" Benchmarking : %8.3f (%5.2f%%)\n", timecounter, t_percent); timecounter = timer_read(2); t_percent = timecounter/t_total * 100.; printf(" Sorting : %8.3f (%5.2f%%)\n", timecounter, t_percent); } return 0; /**************************/ } /* E N D P R O G R A M */
int main( int argc, char **argv ) { int i, iteration, itemp; double timecounter, maxtime; /* Initialize MPI */ MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); /* Initialize the verification arrays if a valid class */ for( i=0; i<TEST_ARRAY_SIZE; i++ ) switch( CLASS ) { case 'S': test_index_array[i] = S_test_index_array[i]; test_rank_array[i] = S_test_rank_array[i]; break; case 'A': test_index_array[i] = A_test_index_array[i]; test_rank_array[i] = A_test_rank_array[i]; break; case 'W': test_index_array[i] = W_test_index_array[i]; test_rank_array[i] = W_test_rank_array[i]; break; case 'B': test_index_array[i] = B_test_index_array[i]; test_rank_array[i] = B_test_rank_array[i]; break; case 'C': test_index_array[i] = C_test_index_array[i]; test_rank_array[i] = C_test_rank_array[i]; break; case 'D': test_index_array[i] = D_test_index_array[i]; test_rank_array[i] = D_test_rank_array[i]; break; }; /* Printout initial NPB info */ if( my_rank == 0 ) { FILE *fp; printf( "\n\n NAS Parallel Benchmarks 3.3 -- IS Benchmark\n\n" ); printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS*MIN_PROCS, CLASS ); printf( " Iterations: %d\n", MAX_ITERATIONS ); printf( " Number of processes: %d\n", comm_size ); fp = fopen("timer.flag", "r"); timeron = 0; if (fp) { timeron = 1; fclose(fp); } } /* Check that actual and compiled number of processors agree */ if( comm_size != NUM_PROCS ) { if( my_rank == 0 ) printf( "\n ERROR: compiled for %d processes\n" " Number of active processes: %d\n" " Exiting program!\n\n", NUM_PROCS, comm_size ); MPI_Finalize(); exit( 1 ); } /* Check to see whether total number of processes is within bounds. This could in principle be checked in setparams.c, but it is more convenient to do it here */ if( comm_size < MIN_PROCS || comm_size > MAX_PROCS) { if( my_rank == 0 ) printf( "\n ERROR: number of processes %d not within range %d-%d" "\n Exiting program!\n\n", comm_size, MIN_PROCS, MAX_PROCS); MPI_Finalize(); exit( 1 ); } MPI_Bcast(&timeron, 1, MPI_INT, 0, MPI_COMM_WORLD); #ifdef TIMING_ENABLED for( i=1; i<=T_LAST; i++ ) timer_clear( i ); #endif /* Generate random number sequence and subsequent keys on all procs */ create_seq( find_my_seed( my_rank, comm_size, 4*(long)TOTAL_KEYS*MIN_PROCS, 314159265.00, /* Random number gen seed */ 1220703125.00 ), /* Random number gen mult */ 1220703125.00 ); /* Random number gen mult */ /* Do one interation for free (i.e., untimed) to guarantee initialization of all data and code pages and respective tables */ rank( 1 ); /* Start verification counter */ passed_verification = 0; if( my_rank == 0 && CLASS != 'S' ) printf( "\n iteration\n" ); /* Initialize timer */ timer_clear( 0 ); /* Initialize separate communication, computation timing */ #ifdef TIMING_ENABLED for( i=1; i<=T_LAST; i++ ) timer_clear( i ); #endif /* Start timer */ timer_start( 0 ); /* This is the main iteration */ for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { if( my_rank == 0 && CLASS != 'S' ) printf( " %d\n", iteration ); rank( iteration ); } /* Stop timer, obtain time for processors */ timer_stop( 0 ); timecounter = timer_read( 0 ); /* End of timing, obtain maximum time of all processors */ MPI_Reduce( &timecounter, &maxtime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); /* This tests that keys are in sequence: sorting of last ranked key seq occurs here, but is an untimed operation */ full_verify(); /* Obtain verification counter sum */ itemp = passed_verification; MPI_Reduce( &itemp, &passed_verification, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD ); /* The final printout */ if( my_rank == 0 ) { if( passed_verification != 5*MAX_ITERATIONS + comm_size ) passed_verification = 0; c_print_results( "IS", CLASS, (int)(TOTAL_KEYS), MIN_PROCS, 0, MAX_ITERATIONS, NUM_PROCS, comm_size, maxtime, ((double) (MAX_ITERATIONS)*TOTAL_KEYS*MIN_PROCS) /maxtime/1000000., "keys ranked", passed_verification, NPBVERSION, COMPILETIME, MPICC, CLINK, CMPI_LIB, CMPI_INC, CFLAGS, CLINKFLAGS ); } #ifdef TIMING_ENABLED if (timeron) { double t1[T_LAST+1], tmin[T_LAST+1], tsum[T_LAST+1], tmax[T_LAST+1]; char t_recs[T_LAST+1][9]; for( i=0; i<=T_LAST; i++ ) t1[i] = timer_read( i ); MPI_Reduce( t1, tmin, T_LAST+1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD ); MPI_Reduce( t1, tsum, T_LAST+1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD ); MPI_Reduce( t1, tmax, T_LAST+1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); if( my_rank == 0 ) { strcpy( t_recs[T_TOTAL], "total" ); strcpy( t_recs[T_RANK], "rcomp" ); strcpy( t_recs[T_RCOMM], "rcomm" ); strcpy( t_recs[T_VERIFY], "verify"); printf( " nprocs = %6d ", comm_size); printf( " minimum maximum average\n" ); for( i=0; i<=T_LAST; i++ ) { printf( " timer %2d (%-8s): %10.4f %10.4f %10.4f\n", i+1, t_recs[i], tmin[i], tmax[i], tsum[i]/((double) comm_size) ); } printf( "\n" ); } } #endif MPI_Finalize(); return 0; /**************************/ } /* E N D P R O G R A M */
int main(int argc, char** argv ) { int i, iteration, itemp; int nthreads = 1; double timecounter, maxtime; /* Initialize the verification arrays if a valid class */ for( i=0; i<TEST_ARRAY_SIZE; i++ ) switch( CLASS ) { case 'S': test_index_array[i] = S_test_index_array[i]; test_rank_array[i] = S_test_rank_array[i]; break; case 'A': test_index_array[i] = A_test_index_array[i]; test_rank_array[i] = A_test_rank_array[i]; break; case 'W': test_index_array[i] = W_test_index_array[i]; test_rank_array[i] = W_test_rank_array[i]; break; case 'B': test_index_array[i] = B_test_index_array[i]; test_rank_array[i] = B_test_rank_array[i]; break; case 'C': test_index_array[i] = C_test_index_array[i]; test_rank_array[i] = C_test_rank_array[i]; break; }; /* Printout initial NPB info */ printf( "\n\n NAS Parallel Benchmarks 2.3 OpenMP C version" " - IS Benchmark\n\n" ); printf( " Size: %d (class %c)\n", TOTAL_KEYS, CLASS ); printf( " Iterations: %d\n", MAX_ITERATIONS ); /* Initialize timer */ timer_clear( 0 ); /* Generate random number sequence and subsequent keys on all procs */ create_seq( 314159265.00, /* Random number gen seed */ 1220703125.00 ); /* Random number gen mult */ /* Do one interation for free (i.e., untimed) to guarantee initialization of all data and code pages and respective tables */ #pragma omp parallel rank( 1 ); /* Start verification counter */ passed_verification = 0; if( CLASS != 'S' ) printf( "\n iteration\n" ); /* Start timer */ timer_start( 0 ); /* This is the main iteration */ #pragma omp parallel private(iteration) for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { #pragma omp master if( CLASS != 'S' ) printf( " %d\n", iteration ); rank( iteration ); #if defined(_OPENMP) #pragma omp master nthreads = omp_get_num_threads(); #endif /* _OPENMP */ } /* End of timing, obtain maximum time of all processors */ timer_stop( 0 ); timecounter = timer_read( 0 ); /* This tests that keys are in sequence: sorting of last ranked key seq occurs here, but is an untimed operation */ full_verify(); /* The final printout */ if( passed_verification != 5*MAX_ITERATIONS + 1 ) passed_verification = 0; c_print_results( "IS", CLASS, TOTAL_KEYS, 0, 0, MAX_ITERATIONS, nthreads, timecounter, ((double) (MAX_ITERATIONS*TOTAL_KEYS)) /timecounter/1000000., "keys ranked", passed_verification, NPBVERSION, COMPILETIME, CC, CLINK, C_LIB, C_INC, CFLAGS, CLINKFLAGS, "randlc2"); return 0; /**************************/ } /* E N D P R O G R A M */
int main( int argc, char **argv ) { MPI_Init(&argc,&argv); INT_TYPE chunk; int ini, fim; int i, j, iteration, timer_on; double timecounter; FILE *fp; int myrank; MPI_Status st; MPI_Comm_rank(MPI_COMM_WORLD,&myrank); MPI_Comm_size(MPI_COMM_WORLD,&NUM_THREADS); if (myrank == 0) { /* Initialize timers */ timer_on = 0; if ((fp = fopen("timer.flag", "r")) != NULL) { fclose(fp); timer_on = 1; } timer_clear( 0 ); if (timer_on) { timer_clear( 1 ); timer_clear( 2 ); timer_clear( 3 ); } if (timer_on) timer_start( 3 ); /* Initialize the verification arrays if a valid class */ for( i=0; i<TEST_ARRAY_SIZE; i++ ) switch( CLASS ) { case 'S': test_index_array[i] = S_test_index_array[i]; test_rank_array[i] = S_test_rank_array[i]; break; case 'A': test_index_array[i] = A_test_index_array[i]; test_rank_array[i] = A_test_rank_array[i]; break; case 'W': test_index_array[i] = W_test_index_array[i]; test_rank_array[i] = W_test_rank_array[i]; break; case 'B': test_index_array[i] = B_test_index_array[i]; test_rank_array[i] = B_test_rank_array[i]; break; case 'C': test_index_array[i] = C_test_index_array[i]; test_rank_array[i] = C_test_rank_array[i]; break; case 'D': test_index_array[i] = D_test_index_array[i]; test_rank_array[i] = D_test_rank_array[i]; break; }; /* Printout initial NPB info */ printf ( "\n\n NAS Parallel Benchmarks (NPB3.3-SER) - IS Benchmark\n\n" ); printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS, CLASS ); printf( " Number of available threads: %d\n", NUM_THREADS ); printf( " Iterations: %d\n", MAX_ITERATIONS ); if (timer_on) timer_start( 1 ); } R23 = pow(2, -23); T23 = pow(2, 23); R46 = pow(2, -46); T46 = pow(2, 46); /* Generate random number sequence and subsequent keys on all procs */ create_seq(myrank); if (myrank == 0) { // sincronizar resultados for (i = 1; i < NUM_THREADS; i++) { chunk = (NUM_KEYS + NUM_THREADS - 1) / NUM_THREADS; ini = chunk * i; fim = ini + chunk; if ( fim > NUM_KEYS ) { fim = NUM_KEYS; } MPI_Recv( &aux_key_array[ini], (fim - ini), MPI_INT, i, 0, MPI_COMM_WORLD, &st ); for (j = ini; j < fim; j++) { key_array[j] = aux_key_array[j]; } } } else { chunk = (NUM_KEYS + NUM_THREADS - 1) / NUM_THREADS; ini = chunk * myrank; fim = ini + chunk; if ( fim > NUM_KEYS ) { fim = NUM_KEYS; } // enviar resultados MPI_Send( &key_array[ini], (fim - ini), MPI_INT, 0, 0, MPI_COMM_WORLD ); } if (myrank == 0) { if (timer_on) { timer_stop( 1 ); } /* Do one interation for free (i.e., untimed) to guarantee initialization of all data and code pages and respective tables */ rank( 1 ); /* Start verification counter */ passed_verification = 0; if( CLASS != 'S' ) printf( "\n iteration\n" ); /* Start timer */ timer_start( 0 ); /* This is the main iteration */ for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { if( CLASS != 'S' ) printf( " %d\n", iteration ); rank( iteration ); } /* End of timing, obtain maximum time of all processors */ timer_stop( 0 ); timecounter = timer_read( 0 ); /* This tests that keys are in sequence: sorting of last ranked key seq occurs here, but is an untimed operation */ if (timer_on) timer_start( 2 ); full_verify(); if (timer_on) timer_stop( 2 ); if (timer_on) timer_stop( 3 ); /* The final printout */ if( passed_verification != 5*MAX_ITERATIONS + 1 ) passed_verification = 0; c_print_results( "IS", CLASS, (int)(TOTAL_KEYS/64), 64, 0, MAX_ITERATIONS, timecounter, ((double) (MAX_ITERATIONS*TOTAL_KEYS)) /timecounter/1000000., "keys ranked", passed_verification, NPBVERSION, COMPILETIME, CC, CLINK, C_LIB, C_INC, CFLAGS, CLINKFLAGS ); /* Print additional timers */ if (timer_on) { double t_total, t_percent; t_total = timer_read( 3 ); printf("\nAdditional timers -\n"); printf(" Total execution: %8.3f\n", t_total); if (t_total == 0.0) t_total = 1.0; timecounter = timer_read(1); t_percent = timecounter/t_total * 100.; printf(" Initialization : %8.3f (%5.2f%%)\n", timecounter, t_percent); timecounter = timer_read(0); t_percent = timecounter/t_total * 100.; printf(" Benchmarking : %8.3f (%5.2f%%)\n", timecounter, t_percent); timecounter = timer_read(2); t_percent = timecounter/t_total * 100.; printf(" Sorting : %8.3f (%5.2f%%)\n", timecounter, t_percent); } } MPI_Finalize(); return 0; /**************************/ } /* E N D P R O G R A M */
int main( int argc, char **argv ) { int i, iteration; double timecounter; FILE *fp; cl_int ecode; if (argc == 1) { fprintf(stderr, "Usage: %s <kernel directory>\n", argv[0]); exit(-1); } /* Initialize timers */ timer_on = 0; if ((fp = fopen("timer.flag", "r")) != NULL) { fclose(fp); timer_on = 1; } timer_clear( 0 ); if (timer_on) { timer_clear( 1 ); timer_clear( 2 ); timer_clear( 3 ); } if (timer_on) timer_start( 3 ); /* Initialize the verification arrays if a valid class */ for( i=0; i<TEST_ARRAY_SIZE; i++ ) switch( CLASS ) { case 'S': test_index_array[i] = S_test_index_array[i]; test_rank_array[i] = S_test_rank_array[i]; break; case 'A': test_index_array[i] = A_test_index_array[i]; test_rank_array[i] = A_test_rank_array[i]; break; case 'W': test_index_array[i] = W_test_index_array[i]; test_rank_array[i] = W_test_rank_array[i]; break; case 'B': test_index_array[i] = B_test_index_array[i]; test_rank_array[i] = B_test_rank_array[i]; break; case 'C': test_index_array[i] = C_test_index_array[i]; test_rank_array[i] = C_test_rank_array[i]; break; case 'D': test_index_array[i] = D_test_index_array[i]; test_rank_array[i] = D_test_rank_array[i]; break; }; /* set up the OpenCL environment. */ setup_opencl(argc, argv); /* Printout initial NPB info */ printf( "\n\n NAS Parallel Benchmarks (NPB3.3-OCL) - IS Benchmark\n\n" ); printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS, CLASS ); printf( " Iterations: %d\n", MAX_ITERATIONS ); if (timer_on) timer_start( 1 ); /* Generate random number sequence and subsequent keys on all procs */ create_seq( 314159265.00, /* Random number gen seed */ 1220703125.00 ); /* Random number gen mult */ if (timer_on) timer_stop( 1 ); /* Do one interation for free (i.e., untimed) to guarantee initialization of all data and code pages and respective tables */ rank( 1 ); /* Start verification counter */ passed_verification = 0; DTIMER_START(T_BUFFER_WRITE); ecode = clEnqueueWriteBuffer(cmd_queue, m_passed_verification, CL_TRUE, 0, sizeof(cl_int), &passed_verification, 0, NULL, NULL); clu_CheckError(ecode, "clEnqueueWriteBuffer() for m_passed_verification"); DTIMER_STOP(T_BUFFER_WRITE); if( CLASS != 'S' ) printf( "\n iteration\n" ); /* Start timer */ timer_start( 0 ); /* This is the main iteration */ for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { if( CLASS != 'S' ) printf( " %d\n", iteration ); rank( iteration ); } DTIMER_START(T_BUFFER_READ); ecode = clEnqueueReadBuffer(cmd_queue, m_passed_verification, CL_TRUE, 0, sizeof(cl_int), &passed_verification, 0, NULL, NULL); clu_CheckError(ecode, "clEnqueueReadBuffer() for m_passed_verification"); DTIMER_STOP(T_BUFFER_READ); /* End of timing, obtain maximum time of all processors */ timer_stop( 0 ); timecounter = timer_read( 0 ); /* This tests that keys are in sequence: sorting of last ranked key seq occurs here, but is an untimed operation */ if (timer_on) timer_start( 2 ); full_verify(); if (timer_on) timer_stop( 2 ); if (timer_on) timer_stop( 3 ); /* The final printout */ if( passed_verification != 5*MAX_ITERATIONS + 1 ) passed_verification = 0; c_print_results( "IS", CLASS, (int)(TOTAL_KEYS/64), 64, 0, MAX_ITERATIONS, timecounter, ((double) (MAX_ITERATIONS*TOTAL_KEYS)) /timecounter/1000000., "keys ranked", passed_verification, NPBVERSION, COMPILETIME, CC, CLINK, C_LIB, C_INC, CFLAGS, CLINKFLAGS, "", clu_GetDeviceTypeName(device_type), device_name); /* Print additional timers */ if (timer_on) { double t_total, t_percent; t_total = timer_read( 3 ); printf("\nAdditional timers -\n"); printf(" Total execution: %8.3f\n", t_total); if (t_total == 0.0) t_total = 1.0; timecounter = timer_read(1); t_percent = timecounter/t_total * 100.; printf(" Initialization : %8.3f (%5.2f%%)\n", timecounter, t_percent); timecounter = timer_read(0); t_percent = timecounter/t_total * 100.; printf(" Benchmarking : %8.3f (%5.2f%%)\n", timecounter, t_percent); timecounter = timer_read(2); t_percent = timecounter/t_total * 100.; printf(" Sorting : %8.3f (%5.2f%%)\n", timecounter, t_percent); } release_opencl(); fflush(stdout); return 0; /**************************/ } /* E N D P R O G R A M */