int gc_heap(int arity) { #ifdef GC CPtr p; unsigned long begin_marktime, end_marktime, end_slidetime, end_copy_time; int marked = 0, marked_dregs = 0, i; int start_heap_size; DECL_GC_PROFILE; INIT_GC_PROFILE; if (flags[GARBAGE_COLLECT] != NO_GC) { num_gc++ ; GC_PROFILE_PRE_REPORT; slide = (flags[GARBAGE_COLLECT] == SLIDING_GC) | (flags[GARBAGE_COLLECT] == INDIRECTION_SLIDE_GC); if (fragmentation_only) slide = FALSE; heap_early_reset = ls_early_reset = 0; GC_PROFILE_START_SUMMARY; begin_marktime = cpu_time(); start_heap_size = hreg+1-(CPtr)glstack.low; /* make sure the top choice point heap pointer that might not point into heap, does */ if (hreg == cp_hreg(breg)) { *hreg = makeint(666) ; hreg++ ; } #ifdef SLG_GC /* same for the freeze heap pointer */ if (hfreg == hreg && hreg == cp_hreg(bfreg)) { *hreg = makeint(66600); hreg++; } #endif /* copy the aregs to the top of the heap - only if sliding */ /* just hope there is enough space */ /* this happens best before the stack_boundaries are computed */ if (slide) { if (delayreg != NULL) { arity++; reg[arity] = (Cell)delayreg; } for (i = 1; i <= arity; i++) { *hreg = reg[i]; hreg++; } } #ifdef SLG_GC /* in SLGWAM, copy hfreg to the heap */ if (slide) { *hreg = (unsigned long) hfreg; hreg++; } #endif marked = mark_heap(arity, &marked_dregs); end_marktime = cpu_time(); if (fragmentation_only) { /* fragmentation is expressed as ratio not-marked/total heap in use this is internal fragmentation only. we print marked and total, so that postprocessing can do what it wants with this info. */ xsb_dbgmsg((LOG_GC, "marked_used_missed(%d,%d,%d,%d).", marked,hreg+1-(CPtr)glstack.low, heap_early_reset,ls_early_reset)); free_marks: /* get rid of the marking areas - if they exist */ if (heap_marks) { free((heap_marks-1)); heap_marks = NULL; } if (tr_marks) { free(tr_marks); tr_marks = NULL; } if (ls_marks) { free(ls_marks); ls_marks = NULL; } if (cp_marks) { free(cp_marks); cp_marks = NULL; } goto end; } GC_PROFILE_MARK_SUMMARY; /* An attempt to add some gc/expansion policy; ideally this should be user-controlled */ #if (! defined(GC_TEST)) if (marked > ((hreg+1-(CPtr)glstack.low)*mark_threshold)) { GC_PROFILE_QUIT_MSG; if (slide) hreg -= arity; total_time_gc += (double) (end_marktime-begin_marktime)*1000/CLOCKS_PER_SEC; goto free_marks; /* clean-up temp areas and get out of here... */ } #endif total_collected += (start_heap_size - marked); if (slide) { GC_PROFILE_SLIDE_START_TIME; hreg = slide_heap(marked) ; if (hreg != (heap_bot+marked)) xsb_dbgmsg((LOG_GC, "heap sliding gc - inconsistent hreg")); #ifdef SLG_GC /* copy hfreg back from the heap */ hreg--; hfreg = (unsigned long*) *hreg; #endif /* copy the aregs from the top of the heap back */ hreg -= arity; hbreg = cp_hreg(breg); p = hreg; for (i = 1; i <= arity; i++) reg[i] = *p++ ; if (delayreg != NULL) delayreg = (CPtr)reg[arity--]; end_slidetime = cpu_time(); total_time_gc += (double) (end_slidetime - begin_marktime)*1000/CLOCKS_PER_SEC; GC_PROFILE_SLIDE_FINAL_SUMMARY; } else { /* else we call the copying collector a la Cheney */ CPtr begin_new_heap, end_new_heap; GC_PROFILE_COPY_START_TIME; begin_new_heap = (CPtr)malloc(marked*sizeof(Cell)); if (begin_new_heap == NULL) xsb_exit("copying garbage collection could not allocate new heap"); end_new_heap = begin_new_heap+marked; hreg = copy_heap(marked,begin_new_heap,end_new_heap,arity); free(begin_new_heap); adapt_hfreg_from_choicepoints(hreg); hbreg = cp_hreg(breg); #ifdef SLG_GC hfreg = hreg; #endif end_copy_time = cpu_time(); total_time_gc += (double) (end_copy_time - begin_marktime)*1000/CLOCKS_PER_SEC; GC_PROFILE_COPY_FINAL_SUMMARY; } if (print_on_gc) print_all_stacks(arity); /* get rid of the marking areas - if they exist */ if (heap_marks) { check_zero(heap_marks,(heap_top - heap_bot),"heap") ; free((heap_marks-1)) ; /* see its calloc */ heap_marks = NULL ; } if (tr_marks) { check_zero(tr_marks,(tr_top - tr_bot + 1),"tr") ; free(tr_marks) ; tr_marks = NULL ; } if (ls_marks) { check_zero(ls_marks,(ls_bot - ls_top + 1),"ls") ; free(ls_marks) ; ls_marks = NULL ; } if (cp_marks) { check_zero(cp_marks,(cp_bot - cp_top + 1),"cp") ; free(cp_marks) ; cp_marks = NULL ; } #ifdef SAFE_GC p = hreg; while (p < heap_top) *p++ = 0; #endif } /* if (flags[GARBAGE_COLLECT]) */ #else /* for no-GC, there is no gc, but stack expansion can be done */ #endif #ifdef GC end: GC_PROFILE_POST_REPORT; #endif /* GC */ return(TRUE); } /* gc_heap */
int main ( void ) /******************************************************************************/ /* Purpose: MAIN is the main program for FFT_SERIAL. Discussion: The "complex" vector A is actually stored as a double vector B. The "complex" vector entry A[I] is stored as: B[I*2+0], the real part, B[I*2+1], the imaginary part. Modified: 23 March 2009 Author: Original C version by Wesley Petersen. This C version by John Burkardt. Reference: Wesley Petersen, Peter Arbenz, Introduction to Parallel Computing - A practical guide with examples in C, Oxford University Press, ISBN: 0-19-851576-6, LC: QA76.58.P47. */ { double ctime; double ctime1; double ctime2; double error; int first; double flops; double fnm1; int i; int icase; int it; int ln2; double mflops; int n; int nits = 10000; static double seed; double sgn; double *w; double *x; double *y; double *z; double z0; double z1; timestamp ( ); printf ( "\n" ); printf ( "FFT_SERIAL\n" ); printf ( " C version\n" ); printf ( "\n" ); printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" ); printf ( " of a complex data vector.\n" ); /* Prepare for tests. */ printf ( "\n" ); printf ( " Accuracy check:\n" ); printf ( "\n" ); printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" ); printf ( "\n" ); printf ( " N NITS Error Time Time/Call MFLOPS\n" ); printf ( "\n" ); seed = 331.0; n = 1; /* LN2 is the log base 2 of N. Each increase of LN2 doubles N. */ for ( ln2 = 1; ln2 <= 20; ln2++ ) { n = 2 * n; /* Allocate storage for the complex arrays W, X, Y, Z. We handle the complex arithmetic, and store a complex number as a pair of doubles, a complex vector as a doubly dimensioned array whose second dimension is 2. */ w = ( double * ) malloc ( n * sizeof ( double ) ); x = ( double * ) malloc ( 2 * n * sizeof ( double ) ); y = ( double * ) malloc ( 2 * n * sizeof ( double ) ); z = ( double * ) malloc ( 2 * n * sizeof ( double ) ); first = 1; for ( icase = 0; icase < 2; icase++ ) { if ( first ) { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = ggl ( &seed ); z1 = ggl ( &seed ); x[i] = z0; z[i] = z0; x[i+1] = z1; z[i+1] = z1; } } else { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = 0.0; /* real part of array */ z1 = 0.0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* Initialize the sine and cosine tables. */ cffti ( n, w ); /* Transform forward, back */ if ( first ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); /* Results should be same as the initial data multiplied by N. */ fnm1 = 1.0 / ( double ) n; error = 0.0; for ( i = 0; i < 2 * n; i = i + 2 ) { error = error + pow ( z[i] - fnm1 * x[i], 2 ) + pow ( z[i+1] - fnm1 * x[i+1], 2 ); } error = sqrt ( fnm1 * error ); printf ( " %12d %8d %12e", n, nits, error ); first = 0; } else { ctime1 = cpu_time ( ); for ( it = 0; it < nits; it++ ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); } ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; flops = 2.0 * ( double ) nits * ( 5.0 * ( double ) n * ( double ) ln2 ); mflops = flops / 1.0E+06 / ctime; printf ( " %12e %12e %12f\n", ctime, ctime / ( double ) ( 2 * nits ), mflops ); } } if ( ( ln2 % 4 ) == 0 ) { nits = nits / 10; } if ( nits < 1 ) { nits = 1; } free ( w ); free ( x ); free ( y ); free ( z ); } printf ( "\n" ); printf ( "FFT_SERIAL:\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }
int main ( int argc, char *argv[] ) /******************************************************************************/ /* Purpose: MAIN is the main program for MD. Discussion: MD implements a simple molecular dynamics simulation. The velocity Verlet time integration scheme is used. The particles interact with a central pair potential. Usage: md nd np step_num print_step_num dt mass printinfo scale_factor scale_offset seed outFile trajectoryFile where * nd is the spatial dimension (2 or 3); * np is the number of particles (500, for instance); * step_num is the number of time steps (500, for instance); * print_step_num is the number of snapshot prints (10 for instance); * dt is size of timestep; * mass is particle mass; * printinfo is a string to append to each particle coord * scale_offset and scale_factor are used to scale particle positions for logging/rendering (FIXME) * seed sets the initial configuration Licensing: This code is distributed under the GNU LGPL license. Modified: 05 November 2010 Author: Original FORTRAN90 version by Bill Magro. C version by John Burkardt. Parameters: None */ { double ctime; double ctime1; double ctime2; double dt = 0.0001; int i; int id; double mass = 1.0 * .0001; int nd; int np; int seed = 123456789; int step; int step_num; int step_print; int step_print_index = 0; int step_print_num = 10; double *vel; timestamp ( ); printf ( "\n" ); printf ( "MD\n" ); printf ( " C version\n" ); printf ( " A molecular dynamics program.\n" ); /* Get the spatial dimension. */ if ( 1 < argc ) { nd = atoi ( argv[1] ); } else { printf ( "\n" ); printf ( " Enter ND, the spatial dimension (2 or 3).\n" ); scanf ( "%d", &nd ); } // // Get the number of points. // if ( 2 < argc ) { np = atoi ( argv[2] ); } else { printf ( "\n" ); printf ( " Enter NP, the number of points (500, for instance).\n" ); scanf ( "%d", &np ); } // // Get the number of time steps. // if ( 3 < argc ) { step_num = atoi ( argv[3] ); } else { printf ( "\n" ); printf ( " Enter ND, the number of time steps (500 or 1000, for instance).\n" ); scanf ( "%d", &step_num ); } /* Get any additional args (command-line only) md nd np step_num [ step__print_num dt mass printinfo scale_factor scale_offset randomseed outfile trjfile ] */ if ( 4 < argc ) { step_print_num = atoi ( argv[4] ); } if ( 5 < argc ) { dt = atof ( argv[5] ); } if ( 6 < argc ) { mass = atof ( argv[6] ); } if ( 7 < argc ) { printinfo = ( argv[7] ); } if ( 8 < argc ) { scale_factor = atof ( argv[8] ); } if ( 9 < argc ) { scale_offset = atof ( argv[9] ); } if ( 10 < argc ) { seed = atof ( argv[10] ); } if ( 11 < argc ) { outfile = argv[11]; } if ( 12 < argc ) { trjfile = argv[12]; } /* Report. */ printf ( "\n" ); printf ( " MD: Argument count: %d\n", argc ); printf ( " ND, the spatial dimension, is %d\n", nd ); printf ( " NP, the number of particles in the simulation, is %d\n", np ); printf ( " STEP_NUM, the number of time steps, is %d\n", step_num ); printf ( " STEP_PRINT_NUM, the number of snapshots to print, is %d\n", step_print_num ); printf ( " DT, the size of each time step, is %f\n", dt ); printf ( " MASS, the particle mass, is %f\n", mass ); printf ( " PRINTINFO, the pass-through info to c-ray, is %s\n", printinfo ); printf ( " SCALE_FACTOR, the particle position scaling factor, is %f\n", scale_factor ); printf ( " SCALE_OFFSET, the particle position scaling offset, is %f\n", scale_offset ); printf ( " SEED, the simulation randomization seed, is %d\n", seed ); ctime1 = cpu_time ( ); simulate (step_num, step_print_num, step_print, step_print_index, np, nd, mass, dt, seed, outfile, trjfile); ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; printf ( "\n" ); printf ( " Elapsed cpu time for main computation:\n" ); printf ( " %f seconds.\n", ctime ); #ifdef NOTDEF char tarcmd[2000]; sprintf(tarcmd,"tar zcf %s md??.trj",trjfile); system(tarcmd); #endif /* Terminate. */ printf ( "\n" ); printf ( "MD\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }
xsbBool glstack_realloc(int new_size, int arity) { CPtr new_heap_bot ; /* bottom of new Global Stack area */ CPtr new_ls_bot ; /* bottom of new Local Stack area */ long heap_offset ; /* offsets between the old and new */ long local_offset ; /* stack bottoms, measured in Cells */ CPtr *cell_ptr ; Cell cell_val ; size_t new_size_in_bytes, new_size_in_cells ; /* what a mess ! */ long expandtime ; if (new_size <= glstack.size) return 0; xsb_dbgmsg((LOG_REALLOC, "Reallocating the Heap and Local Stack data area")); #ifdef DEBUG_VERBOSE if (LOG_REALLOC <= cur_log_level) { if (glstack.size == glstack.init_size) { xsb_dbgmsg((LOG_REALLOC,"\tBottom:\t\t%p\t\tInitial Size: %ldK", glstack.low, glstack.size)); xsb_dbgmsg((LOG_REALLOC,"\tTop:\t\t%p", glstack.high)); } } #endif expandtime = (long)(1000*cpu_time()) ; new_size_in_bytes = new_size*K ; new_size_in_cells = new_size_in_bytes/sizeof(Cell) ; /* and let's hope K stays divisible by sizeof(Cell) */ stack_boundaries ; /* Expand the data area and push the Local Stack to the high end. */ new_heap_bot = (CPtr)realloc(heap_bot, new_size_in_bytes); if (new_heap_bot == NULL) { xsb_mesg("Not enough core to resize the Heap and Local Stack!"); return 1; /* return an error output -- will be picked up later */ } heap_offset = new_heap_bot - heap_bot ; new_ls_bot = new_heap_bot + new_size_in_cells - 1 ; local_offset = new_ls_bot - ls_bot ; memmove(ls_top + local_offset, /* move to */ ls_top + heap_offset, /* move from */ (ls_bot - ls_top + 1)*sizeof(Cell) ); /* number of bytes */ /* Update the Heap links */ for (cell_ptr = (CPtr *)(heap_top + heap_offset); cell_ptr-- > (CPtr *)new_heap_bot; ) { reallocate_heap_or_ls_pointer(cell_ptr) ; } /* Update the pointers in the Local Stack */ for (cell_ptr = (CPtr *)(ls_top + local_offset); cell_ptr <= (CPtr *)new_ls_bot; cell_ptr++) { reallocate_heap_or_ls_pointer(cell_ptr) ; } /* Update the trailed variable pointers */ for (cell_ptr = (CPtr *)top_of_trail - 1; cell_ptr > (CPtr *)tcpstack.low; cell_ptr = cell_ptr - 2) { /* first the value */ reallocate_heap_or_ls_pointer(cell_ptr); /* now the address */ cell_ptr-- ; cell_val = (Cell)*cell_ptr ; realloc_ref(cell_ptr,(CPtr)cell_val) ; } /* Update the CP Stack pointers */ for (cell_ptr = (CPtr *)top_of_cpstack; cell_ptr < (CPtr *)tcpstack.high; cell_ptr++) { reallocate_heap_or_ls_pointer(cell_ptr) ; } /* Update the argument registers */ while (arity) { cell_ptr = (CPtr *)(reg+arity) ; reallocate_heap_or_ls_pointer(cell_ptr) ; arity-- ; } /* Update the system variables */ glstack.low = (byte *)new_heap_bot ; glstack.high = (byte *)(new_ls_bot + 1) ; glstack.size = new_size ; hreg = (CPtr)hreg + heap_offset ; hbreg = (CPtr)hbreg + heap_offset ; hfreg = (CPtr)hfreg + heap_offset ; ereg = (CPtr)ereg + local_offset ; ebreg = (CPtr)ebreg + local_offset ; efreg = (CPtr)efreg + local_offset ; if (islist(delayreg)) delayreg = (CPtr)makelist(clref_val(delayreg) + heap_offset); expandtime = (long)(1000*cpu_time()) - expandtime; xsb_dbgmsg((LOG_REALLOC,"\tNew Bottom:\t%p\t\tNew Size: %ldK", glstack.low, glstack.size)); xsb_dbgmsg((LOG_REALLOC,"\tNew Top:\t%p", glstack.high)); xsb_dbgmsg((LOG_REALLOC, "Heap/Local Stack data area expansion - finished in %ld msecs\n", expandtime)); return 0; } /* glstack_realloc */
int main ( int argc, char *argv[] ) /******************************************************************************/ /* Purpose: MAIN is the main program for HEATED_PLATE. Discussion: This code solves the steady state heat equation on a rectangular region. The sequential version of this program needs approximately 18/epsilon iterations to complete. The physical region, and the boundary conditions, are suggested by this diagram; W = 0 +------------------+ | | W = 100 | | W = 100 | | +------------------+ W = 100 The region is covered with a grid of M by N nodes, and an N by N array W is used to record the temperature. The correspondence between array indices and locations in the region is suggested by giving the indices of the four corners: I = 0 [0][0]-------------[0][N-1] | | J = 0 | | J = N-1 | | [M-1][0]-----------[M-1][N-1] I = M-1 The steady state solution to the discrete heat equation satisfies the following condition at an interior grid point: W[Central] = (1/4) * ( W[North] + W[South] + W[East] + W[West] ) where "Central" is the index of the grid point, "North" is the index of its immediate neighbor to the "north", and so on. Given an approximate solution of the steady state heat equation, a "better" solution is given by replacing each interior point by the average of its 4 neighbors - in other words, by using the condition as an ASSIGNMENT statement: W[Central] <= (1/4) * ( W[North] + W[South] + W[East] + W[West] ) If this process is repeated often enough, the difference between successive estimates of the solution will go to zero. This program carries out such an iteration, using a tolerance specified by the user, and writes the final estimate of the solution to a file that can be used for graphic processing. Parameters: Commandline argument 1, double EPSILON, the error tolerance. Commandline argument 2, char *OUTPUT_FILE, the name of the file into which the steady state solution is written when the program has completed. Local parameters: Local, double DIFF, the norm of the change in the solution from one iteration to the next. Local, double MEAN, the average of the boundary values, used to initialize the values of the solution in the interior. Local, double U[M][N], the solution at the previous iteration. Local, double W[M][N], the solution computed at the latest iteration. */ { # define M 500 # define N 500 double ctime; double ctime1; double ctime2; double diff; double epsilon; FILE *fp; int i; int iterations; int iterations_print; int j; double mean; char output_file[80]; int success; double u[M][N]; double w[M][N]; printf ( "\n" ); printf ( "HEATED_PLATE\n" ); printf ( " A program to solve for the steady state temperature distribution\n" ); printf ( " over a rectangular plate.\n" ); printf ( "\n" ); printf ( " Spatial grid of %d by %d points.\n", M, N ); /* Read EPSILON from the command line or the user. */ if ( argc < 2 ) { printf ( " %s EPSILON, the error tolerance:\n",argv[0] ); exit(0); } else { epsilon=atof(argv[1]); success=1; } if ( success != 1 ) { printf ( "\n" ); printf ( "HEATED_PLATE\n" ); printf ( " Error reading in the value of EPSILON.\n"); return 1; } printf ( "\n" ); printf ( " The iteration will be repeated until the change is <= %f\n", epsilon ); diff = epsilon; /* Read OUTPUT_FILE from the command line or the user. */ if ( argc < 3 ) { printf ( "\n" ); printf ( " Enter OUTPUT_FILE, the name of the output file:\n" ); success = scanf ( "%s", output_file ); } else { success = sscanf ( argv[2], "%s", output_file ); } if ( success != 1 ) { printf ( "\n" ); printf ( "HEATED_PLATE\n" ); printf ( " Error reading in the value of OUTPUT_FILE.\n"); return 1; } printf ( "\n" ); printf ( " The steady state solution will be written to %s.\n", output_file ); /* Set the boundary values, which don't change. */ for ( i = 1; i < M - 1; i++ ) { w[i][0] = 100.0; } for ( i = 1; i < M - 1; i++ ) { w[i][N-1] = 100.0; } for ( j = 0; j < N; j++ ) { w[M-1][j] = 100.0; } for ( j = 0; j < N; j++ ) { w[0][j] = 0.0; } /* Average the boundary values, to come up with a reasonable initial value for the interior. */ mean = 0.0; for ( i = 1; i < M - 1; i++ ) { mean = mean + w[i][0]; } for ( i = 1; i < M - 1; i++ ) { mean = mean + w[i][N-1]; } for ( j = 0; j < N; j++ ) { mean = mean + w[M-1][j]; } for ( j = 0; j < N; j++ ) { mean = mean + w[0][j]; } mean = mean / ( double ) ( 2 * M + 2 * N - 4 ); /* Initialize the interior solution to the mean value. */ for ( i = 1; i < M - 1; i++ ) { for ( j = 1; j < N - 1; j++ ) { w[i][j] = mean; } } /* iterate until the new solution W differs from the old solution U by no more than EPSILON. */ iterations = 0; iterations_print = 1; printf ( "\n" ); printf ( " Iteration Change\n" ); printf ( "\n" ); ctime1 = cpu_time ( ); while ( epsilon <= diff ) { /* Save the old solution in U. */ for ( i = 0; i < M; i++ ) { for ( j = 0; j < N; j++ ) { u[i][j] = w[i][j]; } } /* Determine the new estimate of the solution at the interior points. The new solution W is the average of north, south, east and west neighbors. */ diff = 0.0; for ( i = 1; i < M - 1; i++ ) { for ( j = 1; j < N - 1; j++ ) { w[i][j] = ( u[i-1][j] + u[i+1][j] + u[i][j-1] + u[i][j+1] ) / 4.0; if ( diff < fabs ( w[i][j] - u[i][j] ) ) { diff = fabs ( w[i][j] - u[i][j] ); } } } iterations++; if ( iterations == iterations_print ) { printf ( " %8d %f\n", iterations, diff ); iterations_print = 2 * iterations_print; } } ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; printf ( "\n" ); printf ( " %8d %f\n", iterations, diff ); printf ( "\n" ); printf ( " Error tolerance achieved.\n" ); printf ( " CPU time = %f\n", ctime ); /* Write the solution to the output file. */ fp = fopen ( output_file, "w" ); fprintf ( fp, "%d\n", M ); fprintf ( fp, "%d\n", N ); for ( i = 0; i < M; i++ ) { for ( j = 0; j < N; j++) { fprintf ( fp, "%6.2f ", w[i][j] ); } fputc ( '\n', fp); } fclose ( fp ); printf ( "\n" ); printf (" Solution written to the output file %s\n", output_file ); /* All done! */ printf ( "\n" ); printf ( "HEATED_PLATE:\n" ); printf ( " Normal end of execution.\n" ); return 0; # undef M # undef N }
int main(int argc, char **argv) { /******************************************************************************/ /* Purpose: MAIN is the main program for LINPACK_BENCH. Discussion: LINPACK_BENCH drives the double precision LINPACK benchmark program. Modified: 25 July 2008 Parameters: N is the problem size. */ # define N 1000 # define LDA ( N + 1 ) double *a; double a_max; double *b; double b_max; double cray = 0.056; double eps; int i; int info; int *ipvt; int j; int job; double ops; double *resid; double resid_max; double residn; double *rhs; double t1; double t2; double time[6]; double total; double *x; int arg = argc > 1 ? argv[1][0] - '0' : 3; if (arg == 0) return 0; timestamp ( ); printf ( "\n" ); printf ( "LINPACK_BENCH\n" ); printf ( " C version\n" ); printf ( "\n" ); printf ( " The LINPACK benchmark.\n" ); printf ( " Language: C\n" ); printf ( " Datatype: Double precision real\n" ); printf ( " Matrix order N = %d\n", N ); printf ( " Leading matrix dimension LDA = %d\n", LDA ); ops = ( double ) ( 2 * N * N * N ) / 3.0 + 2.0 * ( double ) ( N * N ); /* Allocate space for arrays. */ a = r8mat_gen ( LDA, N ); b = ( double * ) malloc ( N * sizeof ( double ) ); ipvt = ( int * ) malloc ( N * sizeof ( int ) ); resid = ( double * ) malloc ( N * sizeof ( double ) ); rhs = ( double * ) malloc ( N * sizeof ( double ) ); x = ( double * ) malloc ( N * sizeof ( double ) ); a_max = 0.0; for ( j = 0; j < N; j++ ) { for ( i = 0; i < N; i++ ) { a_max = r8_max ( a_max, a[i+j*LDA] ); } } for ( i = 0; i < N; i++ ) { x[i] = 1.0; } for ( i = 0; i < N; i++ ) { b[i] = 0.0; for ( j = 0; j < N; j++ ) { b[i] = b[i] + a[i+j*LDA] * x[j]; } } t1 = cpu_time ( ); info = dgefa ( a, LDA, N, ipvt ); if ( info != 0 ) { printf ( "\n" ); printf ( "LINPACK_BENCH - Fatal error!\n" ); printf ( " The matrix A is apparently singular.\n" ); printf ( " Abnormal end of execution.\n" ); return 1; } t2 = cpu_time ( ); time[0] = t2 - t1; t1 = cpu_time ( ); job = 0; dgesl ( a, LDA, N, ipvt, b, job ); t2 = cpu_time ( ); time[1] = t2 - t1; total = time[0] + time[1]; free ( a ); /* Compute a residual to verify results. */ a = r8mat_gen ( LDA, N ); for ( i = 0; i < N; i++ ) { x[i] = 1.0; } for ( i = 0; i < N; i++ ) { rhs[i] = 0.0; for ( j = 0; j < N; j++ ) { rhs[i] = rhs[i] + a[i+j*LDA] * x[j]; } } for ( i = 0; i < N; i++ ) { resid[i] = -rhs[i]; for ( j = 0; j < N; j++ ) { resid[i] = resid[i] + a[i+j*LDA] * b[j]; } } resid_max = 0.0; for ( i = 0; i < N; i++ ) { resid_max = r8_max ( resid_max, r8_abs ( resid[i] ) ); } b_max = 0.0; for ( i = 0; i < N; i++ ) { b_max = r8_max ( b_max, r8_abs ( b[i] ) ); } eps = r8_epsilon ( ); residn = resid_max / ( double ) N / a_max / b_max / eps; time[2] = total; if ( 0.0 < total ) { time[3] = ops / ( 1.0E+06 * total ); } else { time[3] = -1.0; } time[4] = 2.0 / time[3]; time[5] = total / cray; printf ( "\n" ); printf ( " Norm. Resid Resid MACHEP X[1] X[N]\n" ); printf ( "\n" ); printf ( " %14f %14f %14e %14f %14f\n", residn, resid_max, eps, b[0], b[N-1] ); printf ( "\n" ); printf ( " Factor Solve Total Unit Cray-Ratio\n" ); printf ( "\n" ); printf ( " %9f %9f %9f %9f %9f\n", time[0], time[1], time[2], time[4], time[5] ); printf ( "\n" ); printf ( "Unrolled Double Precision %9f Mflops\n", time[3]); printf ( "\n" ); free ( a ); free ( b ); free ( ipvt ); free ( resid ); free ( rhs ); free ( x ); /* Terminate. */ printf ( "\n" ); printf ( "LINPACK_BENCH\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; # undef LDA # undef N }
int main ( int argc, char *argv[] ) /******************************************************************************/ /* Purpose: MAIN is the main program for MD. Discussion: MD implements a simple molecular dynamics simulation. The velocity Verlet time integration scheme is used. The particles interact with a central pair potential. Usage: md nd np step_num print_step_num dt mass printinfo scale_factor scale_offset seed outFile trajectoryFile where * nd is the spatial dimension (2 or 3); * np is the number of particles (500, for instance); * step_num is the number of time steps (500, for instance); * print_step_num is the number of snapshot prints (10 for instance); * dt is size of timestep; * mass is particle mass; * printinfo is a string to append to each particle coord * scale_offset and scale_factor are used to scale particle positions for logging/rendering (FIXME) * seed sets the initial configuration Licensing: This code is distributed under the GNU LGPL license. Modified: 05 November 2010 Author: Original FORTRAN90 version by Bill Magro. C version by John Burkardt. Parameters: None */ { double *acc; double *box; double ctime; double ctime1; double ctime2; double dt = 0.0001; double e0; double *force; int i; int id; double kinetic; double mass = 1.0 * .0001; int nd; int np; double *pos; double potential; int seed = 123456789; int step; int step_num; int step_print; int step_print_index; int step_print_num=10; double *vel; timestamp ( ); printf ( "\n" ); printf ( "MD\n" ); printf ( " C version\n" ); printf ( " A molecular dynamics program.\n" ); /* Get the spatial dimension. */ if ( 1 < argc ) { nd = atoi ( argv[1] ); } else { printf ( "\n" ); printf ( " Enter ND, the spatial dimension (2 or 3).\n" ); scanf ( "%d", &nd ); } // // Get the number of points. // if ( 2 < argc ) { np = atoi ( argv[2] ); } else { printf ( "\n" ); printf ( " Enter NP, the number of points (500, for instance).\n" ); scanf ( "%d", &np ); } // // Get the number of time steps. // if ( 3 < argc ) { step_num = atoi ( argv[3] ); } else { printf ( "\n" ); printf ( " Enter ND, the number of time steps (500 or 1000, for instance).\n" ); scanf ( "%d", &step_num ); } /* Get any additional args (command-line only) md nd np step_num [ step__print_num dt mass printinfo scale_factor scale_offset randomseed outfile trjfile ] */ if ( 4 < argc ) { step_print_num = atoi ( argv[4] ); } if ( 5 < argc ) { dt = atof ( argv[5] ); } if ( 6 < argc ) { mass = atof ( argv[6] ); } if ( 7 < argc ) { printinfo = ( argv[7] ); } if ( 8 < argc ) { scale_factor = atof ( argv[8] ); } if ( 9 < argc ) { scale_offset = atof ( argv[9] ); } if ( 10 < argc ) { seed = atof ( argv[10] ); } if ( 11 < argc ) { outfile = argv[11]; } if ( 12 < argc ) { trjfile = argv[12]; } /* Report. */ printf ( "\n" ); printf ( " MD: Argument count: %d\n", argc ); printf ( " ND, the spatial dimension, is %d\n", nd ); printf ( " NP, the number of particles in the simulation, is %d\n", np ); printf ( " STEP_NUM, the number of time steps, is %d\n", step_num ); printf ( " STEP_PRINT_NUM, the number of snapshots to print, is %d\n", step_print_num ); printf ( " DT, the size of each time step, is %f\n", dt ); printf ( " MASS, the particle mass, is %f\n", mass ); printf ( " PRINTINFO, the pass-through info to c-ray, is %s\n", printinfo ); printf ( " SCALE_FACTOR, the particle position scaling factor, is %f\n", scale_factor ); printf ( " SCALE_OFFSET, the particle position scaling offset, is %f\n", scale_offset ); printf ( " SEED, the simulation randomization seed, is %d\n", seed ); /* Allocate memory. */ acc = ( double * ) malloc ( nd * np * sizeof ( double ) ); box = ( double * ) malloc ( nd * sizeof ( double ) ); force = ( double * ) malloc ( nd * np * sizeof ( double ) ); pos = ( double * ) malloc ( nd * np * sizeof ( double ) ); vel = ( double * ) malloc ( nd * np * sizeof ( double ) ); /* Set the dimensions of the box. */ for ( i = 0; i < nd; i++ ) { box[i] = 10.0; } printf ( "\n" ); printf ( " Initializing positions, velocities, and accelerations.\n" ); /* Set initial positions, velocities, and accelerations. */ initialize ( np, nd, box, &seed, pos, vel, acc ); /* Compute the forces and energies. */ printf ( "\n" ); printf ( " Computing initial forces and energies.\n" ); compute ( np, nd, pos, vel, mass, force, &potential, &kinetic ); e0 = potential + kinetic; /* This is the main time stepping loop: Compute forces and energies, Update positions, velocities, accelerations. */ printf ( "\n" ); printf ( " At each step, we report the potential and kinetic energies.\n" ); printf ( " The sum of these energies should be a constant.\n" ); printf ( " As an accuracy check, we also print the relative error\n" ); printf ( " in the total energy.\n" ); printf ( "\n" ); printf ( " Step Potential Kinetic (P+K-E0)/E0\n" ); printf ( " Energy P Energy K Relative Energy Error\n" ); printf ( "\n" ); FILE *ofile = fopen(outfile,"w"); fprintf (ofile, " Step Potential Kinetic RelativeErr\n" ); step_print = 0; step_print_index = 0; step = 0; printf ( " %8d %14f %14f %14e\n", step, potential, kinetic, ( potential + kinetic - e0 ) / e0 ); fprintf ( ofile, " %8d %14f %14f %14e\n", step, potential, kinetic, ( potential + kinetic - e0 ) / e0 ); step_print_index = step_print_index + 1; step_print = ( step_print_index * step_num ) / step_print_num; ctime1 = cpu_time ( ); for ( step = 1; step <= step_num; step++ ) { compute ( np, nd, pos, vel, mass, force, &potential, &kinetic ); if ( step == step_print ) { printf ( " %8d %14f %14f %14e\n", step, potential, kinetic, ( potential + kinetic - e0 ) / e0 ); fprintf ( ofile, " %8d %14f %14f %14e\n", step, potential, kinetic, ( potential + kinetic - e0 ) / e0 ); step_print_index = step_print_index + 1; step_print = ( step_print_index * step_num ) / step_print_num; snap ( np, nd, pos, vel, force, acc, mass, dt ); } update ( np, nd, pos, vel, force, acc, mass, dt ); } ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; printf ( "\n" ); printf ( " Elapsed cpu time for main computation:\n" ); printf ( " %f seconds.\n", ctime ); free ( acc ); free ( box ); free ( force ); free ( pos ); free ( vel ); char tarcmd[2000]; sprintf(tarcmd,"tar zcf %s md??.trj",trjfile); system(tarcmd); /* Terminate. */ printf ( "\n" ); printf ( "MD\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); fclose(ofile); return 0; }
void test01 ( void ) /******************************************************************************/ /* Purpose: TEST01 uses POWER_METHOD on the Fibonacci2 matrix. Discussion: This matrix, despite having a single dominant eigenvalue, will generally converge only very slowly under the power method. This has to do with the fact that the matrix has only 3 eigenvectors. Licensing: This code is distributed under the GNU LGPL license. Modified: 20 July 2008 Author: John Burkardt */ { double *a; double cos_x1x2; double ctime; double ctime1; double ctime2; int i; int it_max; int it_num; double lambda; int n = 50; double norm; double phi; int seed; double sin_x1x2; double tol; double *x; double *x2; a = fibonacci2 ( n ); seed = 123456789; x = r8vec_uniform_01 ( n, &seed ); it_max = 300; tol = 0.000001; phi = ( 1.0 + sqrt ( 5.0 ) ) / 2.0; printf ( "\n" ); printf ( "TEST01\n" ); printf ( " Use the power method on the Fibonacci2 matrix.\n" ); printf ( "\n" ); printf ( " Matrix order N = %d\n", n ); printf ( " Maximum iterations = %d\n", it_max ); printf ( " Error tolerance = %e\n", tol ); ctime1 = cpu_time ( ); power_method ( n, a, x, it_max, tol, &lambda, &it_num ); ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; printf ( "\n" ); printf ( " Number of iterations = %d\n", it_num ); printf ( " CPU time = %f\n", ctime ); printf ( " Estimated eigenvalue = %24.16f\n", lambda ); printf ( " Correct value = %24.16f\n", phi ); printf ( " Error = %e\n", r8_abs ( lambda - phi ) ); /* X2 is the exact eigenvector. */ x2 = ( double * ) malloc ( n * sizeof ( double ) ); x2[0] = 1.0; for ( i = 1; i < n; i++ ) { x2[i] = phi * x2[i-1]; } norm = r8vec_norm_l2 ( n, x2 ); for ( i = 0; i < n; i++ ) { x2[i] = x2[i] / norm; } /* The sine of the angle between X and X2 is a measure of error. */ cos_x1x2 = r8vec_dot ( n, x, x2 ); sin_x1x2 = sqrt ( ( 1.0 - cos_x1x2 ) * ( 1.0 + cos_x1x2 ) ); printf ( "\n" ); printf ( " Sine of angle between true and estimated vectors = %e\n", sin_x1x2 ); free ( a ); free ( x ); free ( x2 ); return; }
void get_statistics(CTXTdecl) { int type; type = (int)ptoc_int(CTXTc 3); switch (type) { // runtime [since start of Prolog,since previous statistics] // CPU time used while executing, excluding time spent // garbage collecting, stack shifting, or in system calls. case RUNTIME: { double tot_cpu, incr_cpu; tot_cpu = cpu_time(); incr_cpu = tot_cpu - last_cpu; last_cpu = tot_cpu; ctop_float(CTXTc 4, tot_cpu); ctop_float(CTXTc 5, incr_cpu); break; } case WALLTIME: { double tot_wall,this_wall,incr_wall; this_wall = real_time(); tot_wall = this_wall - realtime_count_gl; if (!last_wall) last_wall = realtime_count_gl; incr_wall = this_wall - last_wall; last_wall = this_wall; ctop_float(CTXTc 4, tot_wall); ctop_float(CTXTc 5, incr_wall); break; } case SHARED_TABLESPACE: { #ifdef MULTI_THREAD get_memory_statistics(CTXTc type); #else xsb_abort("statistics/2 with parameter shared_tables not supported in this configuration\n"); #endif break; } case IDG_COUNTS: { ctop_int(CTXTc 4,current_call_node_count_gl); ctop_int(CTXTc 5,current_call_edge_count_gl); break; } case TABLE_OPS: { UInteger ttl_ops = ans_chk_ins + NumSubOps_AnswerCheckInsert, ttl_ins = ans_inserts + NumSubOps_AnswerInsert; ctop_int(CTXTc 4,NumSubOps_CallCheckInsert); ctop_int(CTXTc 5,NumSubOps_ProducerCall); ctop_int(CTXTc 6,var_subg_chk_ins_gl); ctop_int(CTXTc 7,var_subg_inserts_gl); ctop_int(CTXTc 8,ttl_ops); ctop_int(CTXTc 9,ttl_ins); } default: { get_memory_statistics(CTXTc type); break; } } }
int main(void) { register unsigned int i; float flops; unsigned long temps ; init_matf(Af,2.0); init_matf(Bf,3.0); init_matf(Cf,0.0); init_matd(Ad,7.0); init_vect(V1,2.3); //aff_matf(Af); //aff_matd(Ad); //aff_matf(Cf); printf("Calculs sur %d matrices\n", ITER); printf("Dimension des matrices : %d\n", N); printf("Nombre de threads : %d\n", THREADS); printf("Dépassement du cache : %s\n", depasseCache()); printf("// MULTIPLICATIONS //\n"); /* Affichage du temps et des MFLOPS pour différents types d'opérations */ printf("Multiplication | Lignes de la matrice de sortie\n"); top1(); for(i=0; i< ITER; i++) multLigneF_OMP(Af,Bf,Cf); top2(); temps = cpu_time(); printf("time = %ld.%03ldms\n", temps/1000, temps%1000); flops = (float)(2*(float)CUBE(N)) / (float)(temps * (1e-6)) *ITER; printf("MFLOPS : %f\n",flops/1e6); printf("Multiplication | Colonnes de la matrice de sortie\n"); top1(); for(i=0; i< ITER; i++) muxColonneF_OMP(Af,Af,Cf); top2(); temps = cpu_time(); printf("time = %ld.%03ldms\n", temps/1000, temps%1000); flops = (float)(2*(float)CUBE(N)) / (float)(temps * (1e-6)) *ITER; printf("MFLOPS : %f\n",flops/1e6); printf("Multiplication | Par blocs de %d valeurs de la matrice de sortie\n",BLOC); top1(); for(i=0; i< ITER; i++) multBlocF_OMP(Af,Bf,Cf); top2(); temps = cpu_time(); printf("time = %ld.%03ldms\n", temps/1000, temps%1000); flops = (float)(2*(float)CUBE(N)) / (float)(temps * (1e-6)) *ITER; printf("MFLOPS : %f\n",flops/1e6); printf("// SOMME //\n"); top1(); for(i=0; i< ITER; i++) sommeF(Af,Bf,Cf); top2(); temps = cpu_time(); printf("time = %ld.%03ldms\n", temps/1000, temps%1000); flops = (float)((N)*(N)) / (float)(temps * (1e-6)) *ITER; printf("MFLOPS : %f\n",flops/1e6); printf("// MULT MATxVECT //\n"); top1(); for(i=0; i< ITER; i++) multVect(Af,V1,V2); top2(); temps = cpu_time(); printf("time = %ld.%03ldms\n", temps/1000, temps%1000); flops = (float)((N)*(N)) / (float)(temps * (1e-6)) *ITER; printf("MFLOPS : %f\n",flops/1e6); printf("// GAXPY //\n"); top1(); for(i=0; i< ITER; i++) gaxpy(V1,V2,Af,V3); top2(); temps = cpu_time(); printf("time = %ld.%03ldms\n", temps/1000, temps%1000); flops = (float)((N*N)+N) / (float)(temps * (1e-6)) *ITER; printf("MFLOPS : %f\n",flops/1e6); return 0; }
void get_memory_statistics(CTXTdeclc int type) { #ifndef MULTI_THREAD cputime_count_gl = (cpu_time() - time_start_gl); #endif get_memory_statistics_1(CTXTc real_time()-realtime_count_gl,type); /* collect */ }
void perproc_stat(void) { time_count = cpu_time() - time_start; }
double test03 ( int n ) /******************************************************************************/ /* Purpose: TEST03 tests I4MAT_FLOYD. Discussion: The matrix size is input by the user. The matrix A has the property that A(I,J) = 1 if I is divisible by J. Example: N = 6 1 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 0 0 1 Licensing: This code is distributed under the GNU LGPL license. Modified: 20 July 2011 Author: John Burkardt Parameters: Input, int N, the size of the matrix. Output, double TEST03, the CPU time required by I4MAT_FLOYD. */ { int *a; int huge; int i; int j; double time1; double time2; double wtime; a = ( int * ) malloc ( n * n * sizeof ( int ) ); huge = i4_huge ( ) / 2; for ( j = 0; j < n; j++ ) { for ( i = 0; i < n; i++ ) { if ( ( i + 1 ) % ( j + 1 ) == 0 ) { a[i+j*n] = 1; } else { a[i+j*n] = huge; } } } time1 = cpu_time ( ); i4mat_floyd ( n, a ); time2 = cpu_time ( ); wtime = time2 - time1; free ( a ); return wtime; }
bool test_nw_srt_from(long test_consec, long start_rnd, avg_stat& spd_avg, bool DEBUG_SRT_FROM = false, bool SPEED_TEST = false) { std::ostream& os = std::cout; os << "TEST " << test_consec << "(seed=" << start_rnd << ")"; os.flush(); sort_glb glb1; //glb1.init_head_ss(); glb1.stab_mutual_init(); //os << "START_RND=" << start_rnd << std::endl; tak_mak rnd_gen(start_rnd); k_row<elem_sor> all_elem; row<op_sor> all_ops; if(SPEED_TEST){ all_ops.set_cap(10000); } long num_elem = rnd_gen.gen_rand_int32_ie(1, 1000); all_elem.set_cap(num_elem); for(long aa = 0; aa <= num_elem; aa++){ elem_sor& ele = all_elem.inc_sz(); ele.es_id = "e="; } row<elem_sor*> tmp_rr1; row<elem_sor*> tmp_rr2; bool stop_all = false; std::stringstream ss_tmp; long num_lvs = rnd_gen.gen_rand_int32_ie(1, 10); for(long bb = 1; ((bb < num_lvs) && ! stop_all); bb++){ glb1.sg_curr_stab_consec++; DBG_CK(glb1.sg_curr_stab_consec >= bb); DBG_CK(glb1.sg_curr_stab_consec >= glb1.sg_dbg_last_id); std::string bb_ss = long_to_str(bb); long num_reps_lv = rnd_gen.gen_rand_int32_ie(1, 10); for(long dd = 0; ((dd < num_reps_lv) && ! stop_all); dd++){ DBG_CK(glb1.sg_curr_stab_consec >= glb1.sg_dbg_last_id); long num_elem_rep = rnd_gen.gen_rand_int32_ie(1, num_elem); for(long cc = 0; ((cc < num_elem_rep) && ! stop_all); cc++){ DBG_CK(glb1.sg_curr_stab_consec >= glb1.sg_dbg_last_id); bool all_csecs = false; long elem_idx = rnd_gen.gen_rand_int32_ie(1, num_elem); SORTER_CK(all_elem.is_valid_idx(elem_idx)); elem_sor& ele = all_elem[elem_idx]; if(DEBUG_SRT_FROM){ ss_tmp.clear(); ss_tmp.seekg(0, std::ios::beg); ss_tmp.str(""); ss_tmp.flush(); glb1.sort_to_row_and_all_consec<elem_sor>(tmp_rr1, all_csecs); tmp_rr1.print_row_data(ss_tmp, true, "\n"); if(! tmp_rr1.is_sorted(cmp_elem_sr)){ //tmp_rr1.print_row_data(os, true, "\n"); std::string out_ss = ss_tmp.str(); os << out_ss; os << "FAILED starting" << std::endl << ele << std::endl; os << "START_RND=" << start_rnd << std::endl; stop_all = true; break; } } std::string dbg_id_start = ele.es_id; if(SPEED_TEST){ op_sor& the_op = all_ops.inc_sz(); the_op.op_elem = &ele; the_op.op_id = bb; } else { DBG_CK(glb1.sg_curr_stab_consec >= bb); DBG_CK(glb1.sg_curr_stab_consec >= glb1.sg_dbg_last_id); ele.es_srt_bdr.sort_from(glb1, bb); DBG_CK(glb1.sg_curr_stab_consec >= glb1.sg_dbg_last_id); } ele.es_id += "b" + bb_ss; ele.add(bb); std::string dbg_id_end = ele.es_id; if(DEBUG_SRT_FROM){ glb1.sort_to_row_and_all_consec<elem_sor>(tmp_rr2, all_csecs); if(! tmp_rr2.is_sorted(cmp_elem_sr)){ os << "STARTING" << std::endl; std::string out_ss = ss_tmp.str(); os << out_ss; //tmp_rr1.print_row_data(os, true, "\n"); os << std::endl << std::endl << "ENDING" << std::endl; tmp_rr2.print_row_data(os, true, "\n"); os << "FAILED ending" << std::endl << ele << std::endl; os << "curr_id=" << bb << std::endl; os << "dbg_id_start=" << dbg_id_start << std::endl; os << "dbg_id_end=" << dbg_id_end << std::endl; os << "START_RND=" << start_rnd << std::endl; stop_all = true; ck_sorted_elems(tmp_rr2, cmp_elem_sr); break; } } } } } os << "::"; os.flush(); //DBG_CK(glb1.sg_curr_stab_consec >= glb1.sg_dbg_last_id); long num_srt_from = 0; double run_speed = 0; if(SPEED_TEST){ double start_tm = cpu_time(); for(long aa = 0; aa < all_ops.size(); aa++){ op_sor& the_op = all_ops[aa]; SORTER_CK(the_op.op_elem != NULL_PT); SORTER_CK(the_op.op_id > 0); the_op.op_elem->es_srt_bdr.sort_from(glb1, the_op.op_id); num_srt_from++; } double finish_tm = cpu_time(); run_speed = num_srt_from / (finish_tm - start_tm); if(finite(run_speed)){ spd_avg.add_val(run_speed); } } //DBG_CK(glb1.sg_curr_stab_consec >= glb1.sg_dbg_last_id); if(! stop_all){ row<elem_sor*> s_rr; bool all_in_consec = false; glb1.sort_to_row_and_all_consec<elem_sor>(s_rr, all_in_consec); os << " #elem=" << num_elem << " #ops=" << num_srt_from << " speed=" << run_speed << " finite=" << finite(run_speed); //os << "SIZE=" << s_rr.size() << bj_eol; //s_rr.print_row_data(os, true, "\n"); //os << "NUM_ELEM=" << num_elem << std::endl; //os << "START_RND=" << start_rnd << std::endl; //os << std::endl; bool finish_ok = true; if(! s_rr.is_sorted(cmp_elem_sr)){ s_rr.print_row_data(os, true, "\n"); os << " START_RND=" << start_rnd << std::endl; SORTER_CK(ck_sorted_elems(s_rr, cmp_elem_sr)); finish_ok = false; } glb1.sort_to_tmp_srss(); if(! glb1.sg_tmp_srss.is_sorted(cmp_sorsets)){ s_rr.print_row_data(os, true, "\n"); os << " START_RND=" << start_rnd << std::endl; ck_sorted_sorsets(glb1.sg_tmp_srss, cmp_sorsets); SORTER_CK(false); finish_ok = false; } //os << "FINISHED OK=" << start_rnd << std::endl; if(finish_ok){ os << " finished ok"; } else { os << " FAILED !!!!!!!!!!!!!!!!!!!!!!!!!!!!"; os.flush(); abort_func(0); } //os << std::endl; //glb1.release_all(); //glb1.init_head_ss(); glb1.stab_mutual_init(); } //os << "START_RND=" << start_rnd << std::endl; return ! stop_all; }
// -------------------------------------------------------------------------- // // void Test_Hash::testHashCustomRateInput() { // Setup a configuration and a process. // ----------------------------------------------------------------------- // Setup a valid configuration. std::vector<std::vector<double> > coords(2, std::vector<double>(3, 0.0)); // One cell with two atoms. coords[0][0] = 0.0; coords[0][1] = 0.0; coords[0][2] = 0.0; coords[1][0] = 0.5; coords[1][1] = 0.3; coords[1][2] = 0.1; // Setup elements. std::vector<std::vector<std::string> > elements(2); elements[0] = std::vector<std::string>(1,"A"); elements[1] = std::vector<std::string>(1,"B"); // Setup the mapping from element to integer. std::map<std::string, int> possible_types; possible_types["*"] = 0; possible_types["A"] = 1; possible_types["B"] = 2; possible_types["C"] = 3; possible_types["D"] = 4; possible_types["E"] = 5; possible_types["F"] = 6; // Construct the configuration. Configuration config(coords, elements, possible_types); // Setup a non periodic cooresponding lattice map. const std::vector<int> repetitions(3, 1); const std::vector<bool> periodicity(3, false); const int basis = 2; std::vector<int> basis_sites; basis_sites.push_back(1); basis_sites.push_back(0); LatticeMap lattice_map(basis, repetitions, periodicity); config.initMatchLists(lattice_map, 13); // Construct a process that should match the second index. // Setup the two configurations. std::vector<std::vector<std::string> > elements1; elements1.push_back(std::vector<std::string>(1,"B")); elements1.push_back(std::vector<std::string>(1,"A")); std::vector<std::vector<std::string> > elements2; elements2.push_back(std::vector<std::string>(1,"C")); elements2.push_back(std::vector<std::string>(1,"A")); // Setup coordinates. std::vector<std::vector<double> > process_coords(2,std::vector<double>(3,0.0)); process_coords[1][0] = -0.5; process_coords[1][1] = -0.5; process_coords[1][2] = -0.5; // The configurations. const Configuration config1(process_coords, elements1, possible_types); const Configuration config2(process_coords, elements2, possible_types); // Construct the process with a random rate. seedRandom(19, true); const double rate = 13.7*randomDouble01(); const CustomRateProcess process1(config1, config2, rate, basis_sites, 12.0, std::vector<int>(0), std::vector<Coordinate>(0), 917); const CustomRateProcess process2(config1, config2, rate, basis_sites, 12.0, std::vector<int>(0), std::vector<Coordinate>(0), 916); int index; // ----------------------------------------------------------------------- { // Get the hash. index = 1; const unsigned long int hash1 = hashCustomRateInput(index, process1, config); const unsigned long int ref1 = 18009609292013583759u; CPPUNIT_ASSERT_EQUAL(hash1, ref1); // Get the hash. index = 0; const unsigned long int hash0 = hashCustomRateInput(index, process1, config); const unsigned long int ref0 = 4224368175550234772u; CPPUNIT_ASSERT_EQUAL(hash0, ref0); } { // Check against another process that differs in the process number. index = 1; const unsigned long int hash1 = hashCustomRateInput(index, process2, config); const unsigned long int ref1 = 4824710481459367137u; CPPUNIT_ASSERT_EQUAL(hash1, ref1); index = 0; const unsigned long int hash0 = hashCustomRateInput(index, process2, config); const unsigned long int ref0 = 17780468236463825071u; CPPUNIT_ASSERT_EQUAL(hash0, ref0); } // Check performance. if (false) { double t1 = cpu_time(); unsigned long int hash_loop; for (int i = 0; i < 10000000; ++i) { hash_loop = hashCustomRateInput(index, process2, config); } double t2 = cpu_time(); // Printout to avoid optimization. printf("hash0 %lx\n %e", hash_loop, (t2-t1)/10000000); } }
int main(int argc,char *argv[]) { mpf_t pi,qi,ci; mpz_t pstack,qstack,gstack; long d=100,out=0,threads=1,depth,psize,qsize; double begin, mid0, mid3, mid4, end; double wbegin, wmid0, wmid3, wmid4, wend; prog_name = argv[0]; if (argc==1) { fprintf(stderr,"\nSyntax: %s <digits> <option> <threads>\n",prog_name); fprintf(stderr," <digits> digits of pi to output\n"); fprintf(stderr," <option> 0 - just run (default)\n"); fprintf(stderr," 1 - output digits\n"); fprintf(stderr," <threads> number of threads (default 1)\n"); exit(1); } if (argc>1) d = strtoul(argv[1],0,0); if (argc>2) out = atoi(argv[2]); if (argc>3) threads = atoi(argv[3]); terms = d/DIGITS_PER_ITER; depth = 0; while ((1L<<depth)<terms) depth++; depth++; fprintf(stderr,"#terms=%ld, depth=%ld, threads=%ld cores=%d\n", terms, depth, threads, get_nprocs()); begin = cpu_time(); wbegin = wall_clock(); mpz_init(pstack); mpz_init(qstack); mpz_init(gstack); /* begin binary splitting process */ if (terms<=0) { mpz_set_ui(pstack,1); mpz_set_ui(qstack,0); mpz_set_ui(gstack,1); } else { #ifdef _OPENMP #pragma omp parallel num_threads(threads) #pragma omp single nowait { bs(0,terms,1,pstack,qstack,gstack); } #else bs(0,terms,1,pstack,qstack,gstack); #endif } mid0 = cpu_time(); wmid0 = wall_clock(); fprintf(stderr,"bs cputime = %6.2f wallclock = %6.2f factor = %6.1f\n", mid0-begin,wmid0-wbegin,(mid0-begin)/(wmid0-wbegin)); fflush(stderr); mpz_clear(gstack); /* prepare to convert integers to floats */ mpf_set_default_prec((long)(d*BITS_PER_DIGIT+16)); /* p*(C/D)*sqrt(C) pi = ----------------- (q+A*p) */ psize = mpz_sizeinbase(pstack,10); qsize = mpz_sizeinbase(qstack,10); mpz_addmul_ui(qstack,pstack,A); mpz_mul_ui(pstack,pstack,C/D); mpf_init(pi); mpf_set_z(pi,pstack); mpz_clear(pstack); mpf_init(qi); mpf_set_z(qi,qstack); mpz_clear(qstack); /* final step */ mid3 = cpu_time(); wmid3 = wall_clock(); #ifdef _OPENMP #pragma omp parallel num_threads(threads) #pragma omp single nowait { #pragma omp task shared(qi,pi) { mpf_div(qi,pi,qi); mpf_clear(pi); } #pragma omp task shared(ci) { mpf_init(ci); mpf_sqrt_ui(ci,C); } #pragma omp taskwait } #else mpf_div(qi, pi, qi); mpf_clear(pi); mpf_init(ci); mpf_sqrt_ui(ci, C); #endif mid4 = cpu_time(); wmid4 = wall_clock(); fprintf(stderr,"div/sqrt cputime = %6.2f wallclock = %6.2f factor = %6.1f\n", mid4-mid3,wmid4-wmid3,(mid4-mid3)/(wmid4-wmid3)); mpf_mul(qi,qi,ci); mpf_clear(ci); end = cpu_time(); wend = wall_clock(); fprintf(stderr,"mul cputime = %6.2f wallclock = %6.2f factor = %6.1f\n", end-mid4,wend-wmid4,(end-mid4)/(wend-wmid4)); fprintf(stderr,"total cputime = %6.2f wallclock = %6.2f factor = %6.1f\n", end-begin,wend-wbegin,(end-begin)/(wend-wbegin)); fflush(stderr); fprintf(stderr," P size=%ld digits (%f)\n" " Q size=%ld digits (%f)\n", psize, (double)psize/d, qsize, (double)qsize/d); /* output Pi and timing statistics */ if (out&1) { fprintf(stdout,"pi(0,%ld)=\n", terms); mpf_out_str(stdout,10,d,qi); fprintf(stdout,"\n"); } /* free float resources */ mpf_clear(qi); exit (0); }
int main ( int argc, char *argv[] ) /******************************************************************************/ /* Purpose: MAIN is the main program for QUAD2D_SERIAL. Licensing: This code is distributed under the GNU LGPL license. Modified: 25 October 2011 Author: John Burkardt */ { double a; double b; double error; double exact; int i; int j; int n; int nx; int ny; double pi; double total; double wtime; double x; double y; a = 0.0; b = 1.0; nx = 32768; ny = 32768; n = nx * ny; pi = 3.141592653589793; exact = pi * pi / 6.0; timestamp ( ); printf ( "\n" ); printf ( "QUAD2D:\n" ); printf ( " C version\n" ); printf ( " Estimate the integral of f(x,y) over [0,1]x[0,1].\n" ); printf ( " f(x,y) = 1 / ( 1 - x * y ).\n" ); printf ( "\n" ); printf ( " A = %f\n", a ); printf ( " B = %f\n", b ); printf ( " NX = %d\n", nx ); printf ( " NY = %d\n", ny ); printf ( " N = %d\n", n ); printf ( " Exact = %24.16f\n", exact ); wtime = cpu_time ( ); total = 0.0; for ( i = 1; i <= nx; i++ ) { x = ( ( 2 * nx - 2 * i + 1 ) * a + ( 2 * i - 1 ) * b ) / ( 2 * nx ); for ( j = 1; j <= ny; j++ ) { y = ( ( 2 * ny - 2 * j + 1 ) * a + ( 2 * j - 1 ) * b ) / ( 2 * ny ); total = total + f ( x, y ); } } wtime = cpu_time ( ) - wtime; total = ( b - a ) * ( b - a ) * total / ( double ) ( nx ) / ( double ) ( ny ); error = fabs ( total - exact ); printf ( "\n" ); printf ( " Estimate = %24.16f\n", total ); printf ( " Error = %e\n", error ); printf ( " Time = %f\n", wtime ); /* Terminate. */ printf ( "\n" ); printf ( "QUAD2D_SERIAL:\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }