void finalOutputVTK(char *file_out_prefix, int nintci, int nintcf, int **lcc, double *var, double *cgup, double *su) { int i; // This variables will be generated by vol2mech function int nodeCnt; int **points; int **elems; char *su_name = concat(file_out_prefix, ".SU.vtk"); char *var_name = concat(file_out_prefix, ".VAR.vtk"); char *cgup_name = concat(file_out_prefix, ".CGUP.vtk"); // Generate mesh vol2mesh(nintci, nintcf, lcc, &nodeCnt, &points, &elems); // Create VTK files write_result_vtk(su_name, nintci, nintcf, nodeCnt, points, elems, su); write_result_vtk(var_name, nintci, nintcf, nodeCnt, points, elems, var); write_result_vtk(cgup_name, nintci, nintcf, nodeCnt, points, elems, cgup); free(su_name); free(var_name); free(cgup_name); for (i = 0; i < 3; i++) {// This size we got in vol2mesh free(points[i]); } free(points); for (i = 0; i < 8; i++) { free(elems[i]); } free(elems); }
int main(int argc, char *argv[]) { if (argc < 4) { printf("Usage: %s <format> <input_file> <output_file_prefix>\n", argv[0]); return EXIT_FAILURE; } char *format = argv[1]; char *file_in = argv[2]; char *file_out = argv[3]; int status = 0; /** internal cells start and end index*/ int nintci, nintcf; /** external cells start and end index. The external cells are only ghost * cells. They are accessed only through internal cells*/ int nextci, nextcf; /** link cell-to-cell array. Stores topology information*/ int **lcc; /** red-black colouring of the cells*/ int *nboard; /** boundary coefficients for each volume cell */ double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su; const PAPI_hw_info_t* hw_info = PAPI_get_hardware_info(); if ( test_start() != 0 ) exit(1); /************************************************************/ /* initialization */ // read-in the input file int f_status; if (strcmp(format, "text") == 0) f_status = read_formatted(file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard); else f_status = read_formatted_bin(file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard); if (f_status != 0) { printf("failed to initialize data!\n"); return EXIT_FAILURE; } // allocate arrays used in gccg int nomax = 3; /** the reference residual*/ double resref = 0.0; /** the ratio between the reference and the current residual*/ double ratio; /** array storing residuals */ double* resvec = (double *) calloc(sizeof(double), (nintcf + 1)); /** the variation vector -> keeps the result in the end */ double* var = (double *) calloc(sizeof(double), (nextcf + 1)); /** the computation vectors */ double* direc1 = (double *) calloc(sizeof(double), (nextcf + 1)); double* direc2 = (double *) calloc(sizeof(double), (nextcf + 1)); /** additional vectors */ double* cgup = (double *) calloc(sizeof(double), (nextcf + 1)); double* oc = (double *) calloc(sizeof(double), (nintcf + 1)); double* cnorm = (double *) calloc(sizeof(double), (nintcf + 1)); double* adxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double* adxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); double* dxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double* dxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); // initialize the reference residual for (int nc = nintci; nc <= nintcf; nc++) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } resref = sqrt(resref); if (resref < 1.0e-15) { printf("i/o - error: residue sum less than 1.e-15 - %lf\n", resref); return EXIT_FAILURE; } // initialize the arrays for (int nc = 0; nc <= 10; nc++) { oc[nc] = 0.0; cnorm[nc] = 1.0; } for (int nc = nintci; nc <= nintcf; nc++) { cgup[nc] = 0.0; var[nc] = 0.0; } for (int nc = nextci; nc <= nextcf; nc++) { var[nc] = 0.0; cgup[nc] = 0.0; direc1[nc] = 0.0; bs[nc] = 0.0; be[nc] = 0.0; bn[nc] = 0.0; bw[nc] = 0.0; bl[nc] = 0.0; bh[nc] = 0.0; } for (int nc = nintci; nc <= nintcf; nc++) cgup[nc] = 1.0 / bp[nc]; int if1 = 0; int if2 = 0; int iter = 1; int nor = 1; int nor1 = nor - 1; /* finished initalization */ if ( test_measure("INPUT") != 0 ) exit( 1 ); /***************************************************/ while (iter < 10000) { /* start phase 1 */ // update the old values of direc for (int nc = nintci; nc <= nintcf; nc++) { direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } // compute new guess (approximation) for direc for (int nc = nintci; nc <= nintcf; nc++) { direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]] - bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]] - bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]] - bh[nc] * direc1[lcc[5][nc]]; } /* end phase 1 */ /* start phase 2 */ // execute normalization steps double oc1, oc2, occ; if (nor1 == 1) { oc1 = 0; occ = 0; for (int nc = nintci; nc <= nintcf; nc++) { occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; for (int nc = nintci; nc <= nintcf; nc++) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; } else if (nor1 == 2) { oc1 = 0; occ = 0; for (int nc = nintci; nc <= nintcf; nc++) occ = occ + adxor1[nc] * direc2[nc]; oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for (int nc = nintci; nc <= nintcf; nc++) occ = occ + adxor2[nc] * direc2[nc]; oc2 = occ / cnorm[2]; for (int nc = nintci; nc <= nintcf; nc++) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; } if2++; } cnorm[nor] = 0; double omega = 0; // compute the new residual for (int nc = nintci; nc <= nintcf; nc++) { cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } omega = omega / cnorm[nor]; double resnew = 0.0; for (int nc = nintci; nc <= nintcf; nc++) { var[nc] = var[nc] + omega * direc1[nc]; resvec[nc] = resvec[nc] - omega * direc2[nc]; resnew = resnew + resvec[nc] * resvec[nc]; } resnew = sqrt(resnew); ratio = resnew / resref; // exit on no improvements of residual if (ratio <= 1.0e-10) break; iter++; // prepare additional arrays for the next iteration step if (nor == nomax) nor = 1; else { if (nor == 1) { for (int nc = nintci; nc <= nintcf; nc++) { dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else if (nor == 2) { for (int nc = nintci; nc <= nintcf; nc++) { dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } nor++; } nor1 = nor - 1; }/* end phase 2 */ /* finished computation loop */ if ( test_measure("CALC") != 0 ) exit( 1 ); /**************************************************************/ /* write output file */ if ( write_result(file_in, file_out, nintci, nintcf, var, iter, ratio) != 0 ) printf("error when trying to write to file %s\n", file_out); if ( test_measure("OUTPUT") != 0 ) exit( 1 ); int nodeCnt; int** points; int** elems; vol2mesh(nintci, nintcf, lcc, &nodeCnt, &points, &elems); write_result_vtk("SU.vtk", nintci, nintcf, nodeCnt, points, elems, su); write_result_vtk("VAR.vtk", nintci, nintcf, nodeCnt, points, elems, var); write_result_vtk("CGUP.vtk", nintci, nintcf, nodeCnt, points, elems, cgup); /* Free all the dynamically allocated memory */ free(direc2); free(direc1); free(dxor2); free(dxor1); free(adxor2); free(adxor1); free(cnorm); free(oc); free(var); free(cgup); free(resvec); free(su); free(bp); free(bh); free(bl); free(bw); free(bn); free(be); free(bs); printf("Simulation completed successfully!\n"); return EXIT_SUCCESS; }
int main( int argc, char *argv[] ) { int Events[] = { #ifdef CACHE_PROFILE PAPI_L2_TCM, PAPI_L3_TCM, PAPI_L2_TCA, PAPI_L3_TCA #else PAPI_FP_OPS #endif }; long long values[SIZE( Events )]; long long tic; if( argc != 4 ) { printf( "Usage: %s input_format input_file output_prefix\n", argv[0] ); return EXIT_FAILURE; } char *input_format = argv[1]; char *input_file = argv[2]; char *output_prefix = argv[3]; int status = 0; /** internal cells start and end index*/ int nintci, nintcf; /** external cells start and end index. * The external cells are only ghost cells. They are accessed only through internal cells*/ int nextci, nextcf; /** link cell-to-cell array. Stores topology information*/ int **lcc; /** red-black colouring of the cells*/ int *nboard; /** boundary coefficients for each volume cell */ double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su; char pstats_filename[strlen( output_prefix ) + strlen( "pstats.dat" ) + 1]; strcpy( pstats_filename, output_prefix ); strcat( pstats_filename, "pstats.dat" ); FILE *pstats = fopen( pstats_filename, "w" ); if( pstats == NULL ) { printf( "Cannot open file for writing: %s\n", pstats_filename ); return EXIT_FAILURE; } /* Start counting events */ if( PAPI_start_counters( Events, SIZE( Events ) ) != PAPI_OK ) { handle_error( 1 ); } /** start measuring wall clock time */ tic = PAPI_get_real_usec(); /* initialization */ // read-in the input file if( !strcmp( "bin", input_format ) ) { status = read_binary( input_file, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } else if( !strcmp( "text", input_format ) ) { status = read_formatted( input_file, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } else { printf( "valid input_format values: text, bin\n" ); return EXIT_FAILURE; } if( status != 0 ) { printf( "failed to initialize data!\n" ); return EXIT_FAILURE; } /* Print profile data for phase INPUT */ log_counters( pstats, "INPUT", &tic, values ); // allocate arrays used in gccg int nomax = 3; /** the reference residual*/ double resref = 0.0; /** the ratio between the reference and the current residual*/ double ratio; /** array storing residuals */ double *resvec = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); /** the variation vector -> keeps the result in the end */ double *var = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); /** the computation vectors */ double *direc1 = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); double *direc2 = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); /** additional vectors */ double *cgup = ( double * ) calloc( sizeof( double ), ( nextcf + 1 ) ); double *oc = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *cnorm = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *adxor1 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *adxor2 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *dxor1 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); double *dxor2 = ( double * ) calloc( sizeof( double ), ( nintcf + 1 ) ); // initialize the reference residual for( int nc = nintci; nc <= nintcf; nc++ ) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } resref = sqrt( resref ); if( resref < 1.0e-15 ) { printf( "i/o - error: residue sum less than 1.e-15 - %lf\n", resref ); return EXIT_FAILURE; } // initialize the arrays for( int nc = 0; nc <= 10; nc++ ) { oc[nc] = 0.0; cnorm[nc] = 1.0; } for( int nc = nintci; nc <= nintcf; nc++ ) { cgup[nc] = 0.0; var[nc] = 0.0; } for( int nc = nextci; nc <= nextcf; nc++ ) { var[nc] = 0.0; cgup[nc] = 0.0; direc1[nc] = 0.0; bs[nc] = 0.0; be[nc] = 0.0; bn[nc] = 0.0; bw[nc] = 0.0; bl[nc] = 0.0; bh[nc] = 0.0; } for( int nc = nintci; nc <= nintcf; nc++ ) { cgup[nc] = 1.0 / bp[nc]; } int if1 = 0; int if2 = 0; int iter = 1; int nor = 1; int nor1 = nor - 1; /* finished initalization */ /* start computation loop */ while( iter < 10000 ) { /* start phase 1 */ // update the old values of direc for( int nc = nintci; nc <= nintcf; nc++ ) { direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } // compute new guess (approximation) for direc for( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]] - bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]] - bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]] - bh[nc] * direc1[lcc[5][nc]]; } /* end phase 1 */ /* start phase 2 */ // execute normalization steps double oc1, oc2, occ; if( nor1 == 1 ) { oc1 = 0; occ = 0; for( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; for( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; } else if( nor1 == 2 ) { oc1 = 0; occ = 0; for( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor2[nc] * direc2[nc]; } oc2 = occ / cnorm[2]; for( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; } if2++; } cnorm[nor] = 0; double omega = 0; // compute the new residual for( int nc = nintci; nc <= nintcf; nc++ ) { cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } omega = omega / cnorm[nor]; double resnew = 0.0; for( int nc = nintci; nc <= nintcf; nc++ ) { var[nc] = var[nc] + omega * direc1[nc]; resvec[nc] = resvec[nc] - omega * direc2[nc]; resnew = resnew + resvec[nc] * resvec[nc]; } resnew = sqrt( resnew ); ratio = resnew / resref; // exit on no improvements of residual if( ratio <= 1.0e-10 ) { break; } iter++; // prepare additional arrays for the next iteration step if( nor == nomax ) { nor = 1; } else { if( nor == 1 ) { for( int nc = nintci; nc <= nintcf; nc++ ) { dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else if( nor == 2 ) { for( int nc = nintci; nc <= nintcf; nc++ ) { dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } nor++; } nor1 = nor - 1; }/* end phase 2 */ /* finished computation loop */ /* Print profile data for phase CALC */ log_counters( pstats, "CALC", &tic, values ); /* write output file */ int nodeCnt; int **points, **elems; if( vol2mesh( nintci, nintcf, lcc, &nodeCnt, &points, &elems ) != 0 ) { printf( "error during conversion from volume to mesh\n" ); } write_vtk( output_prefix, "VAR.vtk", nintci, nintcf, nodeCnt, points, elems, var ); write_vtk( output_prefix, "CGUP.vtk", nintci, nintcf, nodeCnt, points, elems, cgup ); write_vtk( output_prefix, "SU.vtk", nintci, nintcf, nodeCnt, points, elems, su ); /* Print profile data for phase OUTPUT */ log_counters( pstats, "OUTPUT", &tic, values ); /* Stop counting events */ if( PAPI_stop_counters( values, SIZE( values ) ) != PAPI_OK ) { handle_error( 1 ); } fclose( pstats ); #if 0 /* Free all the dynamically allocated memory */ free( direc2 ); free( direc1 ); free( dxor2 ); free( dxor1 ); free( adxor2 ); free( adxor1 ); free( cnorm ); free( oc ); free( var ); free( cgup ); free( resvec ); free( su ); free( bp ); free( bh ); free( bl ); free( bw ); free( bn ); free( be ); free( bs ); #endif printf( "Simulation completed successfully!\n" ); return EXIT_SUCCESS; }
int main( int argc, char *argv[] ) { if ( argc < 4 ) { printf( "Usage: %s format_file input_file output_file\n", argv[0] ); return EXIT_FAILURE; } // For checking the library initialisation int retval; // EventSet for L2 & L3 cache misses and accesses int EventSet = PAPI_NULL; int EventSet1 = PAPI_NULL; // Data pointer for getting the cpu info const PAPI_hw_info_t * hwinfo = NULL; PAPI_mh_info_t mem_hrch; // Initialising the library retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { printf( "Initialisation of Papi failed \n" ); exit( 1 ); } if ( ( hwinfo = PAPI_get_hardware_info() ) == NULL ) { printf( "Unable to access hw info \n" ); return 1; } /* Accessing the cpus per node, threads per core, memory, frequency */ printf( "No. of cpus in one node : %d \n", hwinfo->ncpu ); printf( "Threads per core : %d \n", hwinfo->threads ); printf( "No. of cores per socket : %d \n", hwinfo->cores ); printf( "No. of sockets : %d \n", hwinfo->sockets ); printf( "Total CPUS in the entire system : %d \n", hwinfo->totalcpus ); /* Variables for reading counters of EventSet*/ long long eventValues[NUMEVENTS] = { 0 }; // long long eventFpValue[ NUM_FPEVENTS ] = {0}; char *format = argv[1]; char *file_in = argv[2]; char *file_out = argv[3]; char delim[] = "."; char *cp = (char *) malloc( sizeof(char) * 10 ); cp = strcpy( cp, file_in ); char *token = malloc( sizeof(char) * 10 ); token = strtok( cp, delim ); char * res_file = malloc( sizeof(char) * 30 ); res_file = strcpy( res_file, file_out ); res_file = strcat( res_file, token ); free( cp ); // free( token ); char *csv_file = malloc( sizeof(char) * 30 ); csv_file = strcpy( csv_file, res_file ); FILE *csv_fp = fopen( strcat( csv_file, OPTI ), "w" ); FILE *res_fp = fopen( strcat( res_file, "_psdats.dat" ), "w" ); int status = 0; free( res_file ); free( csv_file ); /** internal cells start and end index*/ int nintci, nintcf; /** external cells start and end index. The external cells are only ghost cells. * They are accessed only through internal cells*/ int nextci, nextcf; /** link cell-to-cell array. Stores topology information*/ int **lcc; /** red-black colouring of the cells*/ int *nboard; /** boundary coefficients for each volume cell */ double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su; // Parameters for measuring the time long long startusec, endusec; /*the total number of points (after conversion to unstructured mesh topology)*/ int nodeCnt; /* the array containing the coordinate of the points * (after conversion to unstructured mesh topology) */ int **points; /* the array containing the mesh elements (after conversion to unstructured mesh topology) */ int **elems; // Creating the eventSets if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) { printf( "Problem in create eventset \n" ); exit( 1 ); } // Create the Flops eventSet /*if ( PAPI_create_eventset( &EventSet1 ) != PAPI_OK ) { printf( "Problem in creating the flops eventset \n" ); exit(1); }*/ int EventCode[NUMEVENTS] = { PAPI_L2_TCM, PAPI_L2_TCA, PAPI_L3_TCM, PAPI_L3_TCA }; // int EventFpCode[ NUM_FPEVENTS ] = { PAPI_FP_OPS }; // Adding events to the eventset if ( PAPI_add_events( EventSet, EventCode, NUMEVENTS ) != PAPI_OK ) { printf( "Problem in adding events \n" ); exit( 1 ); } /*if( PAPI_add_events( EventSet1, EventFpCode, 1 ) != PAPI_OK ){ printf( "Problem in adding the flops event \n" ); exit( 1 ); }*/ printf( "Success in adding events \n" ); // Start the eventset counters PAPI_start( EventSet ); // PAPI_start( EventSet1 ); startusec = PAPI_get_real_usec(); /* initialization */ // read-in the input file int f_status; if ( !strcmp( format, "bin" ) ) { f_status = read_bin_formatted( file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } else if ( !strcmp( format, "txt" ) ) { f_status = read_formatted( file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard ); } if ( f_status != 0 ) { printf( "failed to initialize data! \n" ); return EXIT_FAILURE; } // allocate arrays used in gccg int nomax = 3; /** the reference residual*/ double resref = 0.0; /** the ratio between the reference and the current residual*/ double ratio; /** array storing residuals */ double* resvec = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); /** the variation vector -> keeps the result in the end */ double* var = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); /** the computation vectors */ double* direc1 = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); double* direc2 = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); /** additional vectors */ double* cgup = (double *) calloc( sizeof(double), ( nextcf + 1 ) ); double* oc = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* cnorm = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* adxor1 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* adxor2 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* dxor1 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); double* dxor2 = (double *) calloc( sizeof(double), ( nintcf + 1 ) ); // initialize the reference residual for ( int nc = nintci; nc <= nintcf; nc++ ) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } resref = sqrt( resref ); if ( resref < 1.0e-15 ) { printf( "i/o - error: residue sum less than 1.e-15 - %lf\n", resref ); return EXIT_FAILURE; } // initialize the arrays for ( int nc = 0; nc <= 10; nc++ ) { oc[nc] = 0.0; cnorm[nc] = 1.0; } for ( int nc = nintci; nc <= nintcf; nc++ ) { cgup[nc] = 0.0; var[nc] = 0.0; } for ( int nc = nextci; nc <= nextcf; nc++ ) { var[nc] = 0.0; cgup[nc] = 0.0; direc1[nc] = 0.0; bs[nc] = 0.0; be[nc] = 0.0; bn[nc] = 0.0; bw[nc] = 0.0; bl[nc] = 0.0; bh[nc] = 0.0; } for ( int nc = nintci; nc <= nintcf; nc++ ) cgup[nc] = 1.0 / bp[nc]; int if1 = 0; int if2 = 0; int iter = 1; int nor = 1; int nor1 = nor - 1; /* finished initalization */ endusec = PAPI_get_real_usec(); // Read the eventSet counters PAPI_read( EventSet, eventValues ); // PAPI_read( EventSet1, eventFpValue ); fprintf( res_fp, "Execution time in microseconds for the initialisation: %lld \n", endusec - startusec ); fprintf( res_fp, "Initialisation.... \n" ); fprintf( res_fp, "INPUT \t PAPI_L2_TCM \t %lld \n", eventValues[0] ); fprintf( res_fp, "INPUT \t PAPI_L2_TCA \t %lld \n", eventValues[1] ); fprintf( res_fp, "INPUT \t PAPI_L3_TCM \t %lld \n", eventValues[2] ); fprintf( res_fp, "INPUT \t PAPI_L3_TCA \t %lld \n", eventValues[3] ); // fprintf( res_fp, "INPUT \t PAPI_FP_OPS \t %lld \n", eventFpValue[0] ); // Cache miss rate calculations float L2_cache_miss_rate, L3_cache_miss_rate; L2_cache_miss_rate = ( (float) eventValues[0] / eventValues[1] ) * 100; L3_cache_miss_rate = ( (float) eventValues[2] / eventValues[3] ) * 100; fprintf( res_fp, "INPUT \t L2MissRate \t %f% \n", L2_cache_miss_rate ); fprintf( res_fp, "INPUT \t L3MissRate \t %f% \n", L3_cache_miss_rate ); fprintf( csv_fp, "Results for the INPUT phase \n" ); fprintf( csv_fp, "%s, %lld, %lld, %lld, %lld, %f, %f \n", OPTI, eventValues[0], eventValues[1], eventValues[2], eventValues[3], L2_cache_miss_rate, L3_cache_miss_rate ); // Resetting the event counters PAPI_reset( EventSet ); // PAPI_reset( EventSet1 ); fprintf( res_fp, "Starting with the computation part \n" ); startusec = PAPI_get_real_usec(); /* start computation loop */ while ( iter < 10000 ) { /* start phase 1 */ // update the old values of direc for ( int nc = nintci; nc <= nintcf; nc++ ) { direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } // compute new guess (approximation) for direc for ( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]] - bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]] - bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]] - bh[nc] * direc1[lcc[5][nc]]; } /* end phase 1 */ /* start phase 2 */ // execute normalization steps double oc1, oc2, occ; if ( nor1 == 1 ) { oc1 = 0; occ = 0; for ( int nc = nintci; nc <= nintcf; nc++ ) { occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; for ( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; } else if ( nor1 == 2 ) { oc1 = 0; occ = 0; for ( int nc = nintci; nc <= nintcf; nc++ ) occ = occ + adxor1[nc] * direc2[nc]; oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for ( int nc = nintci; nc <= nintcf; nc++ ) occ = occ + adxor2[nc] * direc2[nc]; oc2 = occ / cnorm[2]; for ( int nc = nintci; nc <= nintcf; nc++ ) { direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; } if2++; } cnorm[nor] = 0; double omega = 0; // compute the new residual for ( int nc = nintci; nc <= nintcf; nc++ ) { cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } omega = omega / cnorm[nor]; double resnew = 0.0; for ( int nc = nintci; nc <= nintcf; nc++ ) { var[nc] = var[nc] + omega * direc1[nc]; resvec[nc] = resvec[nc] - omega * direc2[nc]; resnew = resnew + resvec[nc] * resvec[nc]; } resnew = sqrt( resnew ); ratio = resnew / resref; // exit on no improvements of residual if ( ratio <= 1.0e-10 ) break; iter++; // prepare additional arrays for the next iteration step if ( nor == nomax ) nor = 1; else { if ( nor == 1 ) { for ( int nc = nintci; nc <= nintcf; nc++ ) { dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else if ( nor == 2 ) { for ( int nc = nintci; nc <= nintcf; nc++ ) { dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } nor++; } nor1 = nor - 1; }/* end phase 2 */ /* finished computation loop */ endusec = PAPI_get_real_usec(); // Read the eventSet counters PAPI_read( EventSet, eventValues ); // PAPI_read( EventSet1, eventFpValue ); fprintf( res_fp, "Execution time in microseconds for the computation : %lld \n", endusec - startusec ); fprintf( res_fp, "CALC \t PAPI_L2_TCM \t %lld \n", eventValues[0] ); fprintf( res_fp, "CALC \t PAPI_L2_TCA \t %lld \n", eventValues[1] ); fprintf( res_fp, "CALC \t PAPI_L3_TCM \t %lld \n", eventValues[2] ); fprintf( res_fp, "CALC \t PAPI_L3_TCA \t %lld \n", eventValues[3] ); // fprintf( res_fp, "CALC \t PAPI_FP_OPS \t %lld \n", eventFpValue[0] ); L2_cache_miss_rate = ( (float) eventValues[0] / eventValues[1] ) * 100; L3_cache_miss_rate = ( (float) eventValues[2] / eventValues[3] ) * 100; fprintf( res_fp, "CALC \t L2MissRate \t %f%\n", L2_cache_miss_rate ); fprintf( res_fp, "CALC \t L3MissRate \t %f%\n", L3_cache_miss_rate ); fprintf( csv_fp, "Results for the CALC phase \n" ); fprintf( csv_fp, "%s, %lld, %lld, %lld, %lld, %f, %f \n", OPTI, eventValues[0], eventValues[1], eventValues[2], eventValues[3], L2_cache_miss_rate, L3_cache_miss_rate ); // Resetting the event counters PAPI_reset( EventSet ); // PAPI_reset( EventSet1 ); char *vtk_file = malloc( sizeof(char) * 30 ); fprintf( res_fp, "Starting with the output vtk part \n" ); startusec = PAPI_get_real_usec(); /* write output file */ vol2mesh( nintci, nintcf, lcc, &nodeCnt, &points, &elems ); if( write_result( file_in, file_out, nintci, nintcf, var, iter, ratio ) != 0 ) { printf( "error when trying to write to file %s\n", file_out ); } if( write_result_vtk( strcat( strcpy( vtk_file, file_out ), "SU.vtk" ), nintci, nintcf, nodeCnt, points, elems, su ) != 0 ) { printf( "error when trying to write to vtk file %s\n", "SU.vtk" ); } if( write_result_vtk( strcat( strcpy( vtk_file, file_out ), "CGUP.vtk" ), nintci, nintcf, nodeCnt, points, elems, cgup ) != 0 ) { printf( "error when trying to write to vtk file %s\n", "CGUP.vtk" ); } if( write_result_vtk( strcat( strcpy( vtk_file, file_out ), "VAR.vtk" ), nintci, nintcf, nodeCnt, points, elems, var ) != 0 ) { printf( "error when trying to write to vtk file %s\n", "VAR.vtk" ); } free( vtk_file ); /* finished computation loop */ endusec = PAPI_get_real_usec(); // Read the eventSet counters PAPI_stop( EventSet, eventValues ); // PAPI_stop( EventSet1, eventFpValue ); fprintf( res_fp, "Execution time in microseconds for the output vtk part : %lld \n", endusec - startusec ); fprintf( res_fp, "OUTPUT \t PAPI_L2_TCM \t %lld \n", eventValues[0] ); fprintf( res_fp, "OUTPUT \t PAPI_L2_TCA \t %lld \n", eventValues[1] ); fprintf( res_fp, "OUTPUT \t PAPI_L3_TCM \t %lld \n", eventValues[2] ); fprintf( res_fp, "OUTPUT \t PAPI_L3_TCA \t %lld \n", eventValues[3] ); // fprintf( res_fp, "CALC \t PAPI_FP_OPS \t %lld \n", eventFpValue[0] ); L2_cache_miss_rate = ( (float) eventValues[0] / eventValues[1] ) * 100; L3_cache_miss_rate = ( (float) eventValues[2] / eventValues[3] ) * 100; fprintf( res_fp, "OUTPUT \t L2MissRate \t %f%\n", L2_cache_miss_rate ); fprintf( res_fp, "OUTPUT \t L3MissRate \t %f%\n", L3_cache_miss_rate ); fprintf( csv_fp, "Results for the OUTPUT phase \n" ); fprintf( csv_fp, "%s, %lld, %lld, %lld, %lld, %f, %f \n", OPTI, eventValues[0], eventValues[1], eventValues[2], eventValues[3], L2_cache_miss_rate, L3_cache_miss_rate ); /* Free all the dynamically allocated memory */ free( direc2 ); free( direc1 ); free( dxor2 ); free( dxor1 ); free( adxor2 ); free( adxor1 ); free( cnorm ); free( oc ); free( var ); free( cgup ); free( resvec ); free( su ); free( bp ); free( bh ); free( bl ); free( bw ); free( bn ); free( be ); free( bs ); printf( "Simulation completed successfully!\n" ); fclose( res_fp ); fclose( csv_fp ); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { if (argc < 4) { printf("Usage: %s data_type(text or bin) input_file output_file\n", argv[0]); return EXIT_FAILURE; } char *file_type = argv[1]; char *file_in = argv[2]; char *file_out = argv[3]; char *str1 = "SU.vtk"; char *str2 = "VAR.vtk"; char *str3 = "CGUP.vtk"; char *file_perf = "pstats.dat"; int status = 0; /** internal cells start and end index*/ int nintci, nintcf; /** external cells start and end index. The external cells are only ghost cells. They are accessed only through internal cells*/ int nextci, nextcf; /** link cell-to-cell array. Stores topology information*/ int **lcc; /** red-black colouring of the cells*/ int *nboard; /** boundary coefficients for each volume cell */ double *bs, *be, *bn, *bw, *bl, *bh, *bp, *su; /**parameter used for volmesh and reading binary input file */ int* nodeCnt; int*** points; int*** elems; /**Measured Performance and Papi parameters*/ long long *values_i = (long long *) calloc(sizeof(long long), 4); long long *values_c = (long long *) calloc(sizeof(long long), 4); long long *values_o = (long long *) calloc(sizeof(long long), 4); double *mflops = (double *) calloc(sizeof(double), 3); double *L1mira = (double *) calloc(sizeof(double), 3); double *Lmirate = (double *) calloc(sizeof(double), 3); double *util = (double *) calloc(sizeof(double), 3); long long *et = (long long *) calloc(sizeof(long long), 3); long long start_cycles, start_usec,end_cycles_1, end_usec_1, end_cycles_2, end_cycles_3, end_usec_2, end_usec_3; /**In cluster mpp_inter L1 and L2 events can not computed at the same time, so set into two groups*/ int Events[NUM_EVENTS]={PAPI_L2_TCM,PAPI_L2_TCA,PAPI_FP_INS,PAPI_TOT_CYC}; // int Events[NUM_EVENTS]={PAPI_L1_TCM,PAPI_L1_TCA,PAPI_FP_INS,PAPI_TOT_CYC}; /**start HW counters and execution time recorder*/ if ( PAPI_start_counters( Events, NUM_EVENTS ) != PAPI_OK ) printf("Fail to start PAPI counter\n"); start_cycles = PAPI_get_real_cyc(); // Gets the starting time in clock cycles start_usec = PAPI_get_real_usec(); // Gets the starting time in microseconds /* initialization */ // read-in the input file int f_status; if (strcmp(file_type,"text") == 0) { f_status = read_formatted(file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su, &nboard); } else if (strcmp(file_type,"bin") == 0) { f_status = read_formatted_bin(file_in, &nintci, &nintcf, &nextci, &nextcf, &lcc, &bs, &be, &bn, &bw, &bl, &bh, &bp, &su,&nboard); } else { printf ("Input file format is nor correct\n"); return EXIT_FAILURE; } if (f_status != 0){ printf("failed to initialize data!\n"); return EXIT_FAILURE; } // allocate arrays used in gccg int nomax = 3; /** the reference residual*/ double resref = 0.0; /** the ratio between the reference and the current residual*/ double ratio; /** array storing residuals */ double* resvec = (double *) calloc(sizeof(double), (nintcf + 1)); /** the variation vector -> keeps the result in the end */ double* var = (double *) calloc(sizeof(double), (nextcf + 1)); /** the computation vectors */ double* direc1 = (double *) calloc(sizeof(double), (nextcf + 1)); double* direc2 = (double *) calloc(sizeof(double), (nextcf + 1)); /** additional vectors */ double* cgup = (double *) calloc(sizeof(double), (nextcf + 1)); double* oc = (double *) calloc(sizeof(double), (nintcf + 1)); double* cnorm = (double *) calloc(sizeof(double), (nintcf + 1)); double* adxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double* adxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); double* dxor1 = (double *) calloc(sizeof(double), (nintcf + 1)); double* dxor2 = (double *) calloc(sizeof(double), (nintcf + 1)); /**store volume information*/ int nc=0; // initialize the reference residual for ( nc = nintci; nc <= nintcf; nc++) { resvec[nc] = su[nc]; resref = resref + resvec[nc] * resvec[nc]; } resref = sqrt(resref); if (resref < 1.0e-15){ printf("i/o - error: residue sum less than 1.e-15 - %lf\n", resref); return EXIT_FAILURE; } // initialize the arrays for (nc = 0; nc <= 10; nc++){ oc[nc] = 0.0; cnorm[nc] = 1.0; } for (nc = nintci; nc <= nintcf; nc++){ cgup[nc] = 0.0; var[nc] = 0.0; } for (nc = nextci; nc <= nextcf; nc++){ var[nc] = 0.0; cgup[nc] = 0.0; direc1[nc] = 0.0; bs[nc] = 0.0; be[nc] = 0.0; bn[nc] = 0.0; bw[nc] = 0.0; bl[nc] = 0.0; bh[nc] = 0.0; } for (nc = nintci; nc <= nintcf; nc++){ cgup[nc] = 1.0 / bp[nc]; } int if1 = 0; int if2 = 0; int iter = 1; int nor = 1; int nor1 = nor - 1; /* finished initalization */ /*read PAPI HW counters and caculate performance of input phase*/ if ( PAPI_read_counters( values_i, NUM_EVENTS ) != PAPI_OK ){ printf("fail to stop papi counter"); } Lmirate[0] = (double) values_i[0] / values_i[1]; end_usec_1 = PAPI_get_real_usec(); mflops[0] = (double) values_i[2] / (end_usec_1-start_usec); util[0] = mflops[0] / PEAKPER; /* start computation loop */ while (iter < 10000){ /* start phase 1 */ // update the old values of direc for (nc = nintci; nc <= nintcf; nc++){ direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc]; } // compute new guess (approximation) for direc for (nc = nintci; nc <= nintcf; nc++){ direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[0][nc]] - bw[nc] * direc1[lcc[3][nc]] - bl[nc] * direc1[lcc[4][nc]] - bn[nc] * direc1[lcc[2][nc]] - be[nc] * direc1[lcc[1][nc]] - bh[nc] * direc1[lcc[5][nc]]; } /* end phase 1 */ /* start phase 2 */ // execute normalization steps double oc1, oc2, occ; if (nor1 == 1){ oc1 = 0; occ = 0; for (nc = nintci; nc <= nintcf; nc++){ occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; for (nc = nintci; nc <= nintcf; nc++){ direc2[nc] = direc2[nc] - oc1 * adxor1[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc]; } if1++; }else if (nor1 == 2){ oc1 = 0; occ = 0; for (nc = nintci; nc <= nintcf; nc++){ occ = occ + adxor1[nc] * direc2[nc]; } oc1 = occ / cnorm[1]; oc2 = 0; occ = 0; for (nc = nintci; nc <= nintcf; nc++){ occ = occ + adxor2[nc] * direc2[nc]; } oc2 = occ / cnorm[2]; for (nc = nintci; nc <= nintcf; nc++){ direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc]; direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc]; } if2++; } cnorm[nor] = 0; double omega = 0; // compute the new residual for (nc = nintci; nc <= nintcf; nc++){ cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc]; omega = omega + resvec[nc] * direc2[nc]; } omega = omega / cnorm[nor]; double resnew = 0.0; for (nc = nintci; nc <= nintcf; nc++){ var[nc] = var[nc] + omega * direc1[nc]; resvec[nc] = resvec[nc] - omega * direc2[nc]; resnew = resnew + resvec[nc] * resvec[nc]; } resnew = sqrt(resnew); ratio = resnew / resref; // exit on no improvements of residual if (ratio <= 1.0e-10){ break; } iter++; // prepare additional arrays for the next iteration step if (nor == nomax){ nor = 1; }else{ if (nor == 1){ for (nc = nintci; nc <= nintcf; nc++){ dxor1[nc] = direc1[nc]; adxor1[nc] = direc2[nc]; } } else if (nor == 2){ for (nc = nintci; nc <= nintcf; nc++){ dxor2[nc] = direc1[nc]; adxor2[nc] = direc2[nc]; } } nor++; } nor1 = nor - 1; }/* end phase 2 */ /* finished computation loop */ /*read PAPI HW counters and caculate performance of computation phase*/ end_cycles_2 = PAPI_get_real_cyc(); // Gets the ending time in clock cycles end_usec_2 = PAPI_get_real_usec(); // Gets the ending time in microseconds if ( PAPI_read_counters( values_c, NUM_EVENTS ) != PAPI_OK ){ printf("fail to read papi counter"); } Lmirate[1] = (double) values_c[0]/values_c[1]; mflops[1] = (double) values_c[2] / ( end_usec_2-end_usec_1 ); util[1] = mflops[1] / PEAKPER; /* write output file */ if ( write_result(file_in, file_out, nintci, nintcf, var, iter, ratio) != 0 ) printf("error when trying to write to file %s\n", file_out); //transfer volume to mesh if (vol2mesh(nintci, nintcf, lcc, &nodeCnt, &points, &elems) != 0 ){ printf("error when trying to converge topology to volume"); } //write output to vtk file if (write_result_vtk(str1, nintci, nintcf, nodeCnt, points, elems, su) != 0){ printf("error when write SU to vtk file"); } if (write_result_vtk(str2, nintci, nintcf, nodeCnt, points, elems, var) != 0){ printf("error when write VAR to vtk file"); } if (write_result_vtk(str3, nintci, nintcf, nodeCnt, points, elems, cgup) != 0){ printf("error when write CGUP to vtk file"); } /*read PAPI HW counters and caculate performance of output phase*/ if ( PAPI_stop_counters( values_o, NUM_EVENTS ) != PAPI_OK ){ printf("fail to stop papi counter"); } Lmirate[2] = (double) values_o[0]/values_o[1]; end_cycles_3 = PAPI_get_real_cyc(); // Gets the ending time in clock cycles end_usec_3 = PAPI_get_real_usec(); // Gets the ending time in microseconds mflops[2] = (double) (values_o[2])/(end_usec_3-end_usec_2); util[2] = mflops[2] / PEAKPER; /** Write all measured performance to pstats.dat*/ et[0] = end_usec_1-start_usec; et[1] = end_usec_2-end_usec_1; et[2] = end_usec_3-end_usec_2; if (write_result_dat(file_perf, values_i,values_c, values_o,Lmirate, et, mflops, util) != 0 ){ printf("error when write measured performance to data file"); } /* Free all the dynamically allocated memory */ free(direc2); free(direc1); free(dxor2); free(dxor1); free(adxor2); free(adxor1); free(cnorm); free(oc); free(var); free(cgup); free(resvec); free(su); free(bp); free(bh); free(bl); free(bw); free(bn); free(be); free(bs); printf("Simulation completed successfully!\n"); return EXIT_SUCCESS; }