void rarch_perf_log(void) { if (!runloop_ctl(RUNLOOP_CTL_IS_PERFCNT_ENABLE, NULL)) return; RARCH_LOG("[PERF]: Performance counters (RetroArch):\n"); log_counters(perf_counters_rarch, perf_ptr_rarch); }
int main( int argc, char *argv[] ) { int Events[] = { #ifdef CACHE_PROFILE PAPI_L2_TCM, PAPI_L3_TCM, PAPI_L2_TCA, PAPI_L3_TCA #else PAPI_FP_OPS #endif }; long long values[SIZE( Events )]; long long tic; if( argc != 4 ) { printf( "Usage: %s input_format input_file output_prefix\n", argv[0] ); return EXIT_FAILURE; } char *input_format = argv[1]; char *input_file = argv[2]; char *output_prefix = argv[3]; int status = 0; /** internal cells start and end index*/ int nintci, nintcf; /** external cells start and end index. * The external cells are only ghost cells. They are accessed only through internal cells*/ int nextci, nextcf; /** link cell-to-cell array. Stores topology information*/ int **lcc; /** red-black colouring of the cells*/ int *nboard; /** boundary coefficients for each volume cell */ double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su; char pstats_filename[strlen( output_prefix ) + strlen( "pstats.dat" ) + 1]; strcpy( pstats_filename, output_prefix ); strcat( pstats_filename, "pstats.dat" ); FILE *pstats = fopen( pstats_filename, "w" ); if( pstats == NULL ) { printf( "Cannot open file for writing: %s\n", pstats_filename ); return EXIT_FAILURE; } /* Start counting events */ if( PAPI_start_counters( Events, SIZE( Events ) ) != PAPI_OK ) { handle_error( 1 ); } /** start measuring wall clock time */ tic = PAPI_get_real_usec(); /* initialization */ // read-in the input file if( !strcmp( "bin", input_format ) ) { status = read_binary( input_file, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } else if( !strcmp( "text", input_format ) ) { status = read_formatted( input_file, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } else { printf( "valid input_format values: text, bin\n" ); return EXIT_FAILURE; } if( status != 0 ) { printf( "failed to initialize data!\n" ); return EXIT_FAILURE; } /* Print profile data for phase INPUT */ log_counters( pstats, "INPUT", &tic, values ); // allocate arrays used in gccg int nomax = 3; /** the reference residual*/ double resref = 0.0; /** the ratio between the reference and the current residual*/ double ratio; /** array storing residuals */ double *resvec = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); /** the variation vector -> keeps the result in the end */ double *var = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); /** the computation vectors */ double *direc1 = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); double *direc2 = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); /** additional vectors */ double *cgup = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); double *oc = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *cnorm = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *adxor1 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *adxor2 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *dxor1 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *dxor2 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); // initialize the reference residual for( int nc = nintci; nc <= nintcf; nc++ ) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } resref = sqrt( resref ); if( resref < 1.0e-15 ) { printf( "i/o - error: residue sum less than 1.e-15 - %lf\n", resref ); return EXIT_FAILURE; } // initialize the arrays for( int nc = 0; nc <= 10; nc++ ) { oc[nc] = 0.0; cnorm[nc] = 1.0; } for( int nc = nintci; nc <= nintcf; nc++ ) { cgup[nc] = 0.0; var[nc] = 0.0; } for( int nc = nextci; nc <= nextcf; nc++ ) { var[nc] = 0.0; cgup[nc] = 0.0; direc1[nc] = 0.0; bs[nc] = 0.0; be[nc] = 0.0; bn[nc] = 0.0; bw[nc] = 0.0; bl[nc] = 0.0; bh[nc] = 0.0; } for( int nc = nintci; nc <= nintcf; nc++ ) { cgup[nc] = 1.0 / bp[nc]; } int if1 = 0; int if2 = 0; int iter = 1; int nor = 1; int nor1 = nor - 1; /* finished initalization */ /* start computation loop */ while( iter < 10000 ) { /* start phase 1 */ // update the old values of direc for( int nc = nintci; nc <= nintcf; nc++ ) { direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } // compute new guess (approximation) for direc for( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]] - bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]] - bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]] - bh[nc] * direc1[lcc[5][nc]]; } /* end phase 1 */ /* start phase 2 */ // execute normalization steps double oc1, oc2, occ; if( nor1 == 1 ) { oc1 = 0; occ = 0; for( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; for( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; } else if( nor1 == 2 ) { oc1 = 0; occ = 0; for( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor2[nc] * direc2[nc]; } oc2 = occ / cnorm[2]; for( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; } if2++; } cnorm[nor] = 0; double omega = 0; // compute the new residual for( int nc = nintci; nc <= nintcf; nc++ ) { cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } omega = omega / cnorm[nor]; double resnew = 0.0; for( int nc = nintci; nc <= nintcf; nc++ ) { var[nc] = var[nc] + omega * direc1[nc]; resvec[nc] = resvec[nc] - omega * direc2[nc]; resnew = resnew + resvec[nc] * resvec[nc]; } resnew = sqrt( resnew ); ratio = resnew / resref; // exit on no improvements of residual if( ratio <= 1.0e-10 ) { break; } iter++; // prepare additional arrays for the next iteration step if( nor == nomax ) { nor = 1; } else { if( nor == 1 ) { for( int nc = nintci; nc <= nintcf; nc++ ) { dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else if( nor == 2 ) { for( int nc = nintci; nc <= nintcf; nc++ ) { dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } nor++; } nor1 = nor - 1; }/* end phase 2 */ /* finished computation loop */ /* Print profile data for phase CALC */ log_counters( pstats, "CALC", &tic, values ); /* write output file */ int nodeCnt; int **points, **elems; if( vol2mesh( nintci, nintcf, lcc, &nodeCnt, &points, &elems ) != 0 ) { printf( "error during conversion from volume to mesh\n" ); } write_vtk( output_prefix, "VAR.vtk", nintci, nintcf, nodeCnt, points, elems, var ); write_vtk( output_prefix, "CGUP.vtk", nintci, nintcf, nodeCnt, points, elems, cgup ); write_vtk( output_prefix, "SU.vtk", nintci, nintcf, nodeCnt, points, elems, su ); /* Print profile data for phase OUTPUT */ log_counters( pstats, "OUTPUT", &tic, values ); /* Stop counting events */ if( PAPI_stop_counters( values, SIZE( values ) ) != PAPI_OK ) { handle_error( 1 ); } fclose( pstats ); #if 0 /* Free all the dynamically allocated memory */ free( direc2 ); free( direc1 ); free( dxor2 ); free( dxor1 ); free( adxor2 ); free( adxor1 ); free( cnorm ); free( oc ); free( var ); free( cgup ); free( resvec ); free( su ); free( bp ); free( bh ); free( bl ); free( bw ); free( bn ); free( be ); free( bs ); #endif printf( "Simulation completed successfully!\n" ); return EXIT_SUCCESS; }
void retro_perf_log(void) { RARCH_LOG("[PERF]: Performance counters (libretro):\n"); log_counters(perf_counters_libretro, perf_ptr_libretro); }