// looks more complicated , is faster. GLU_complex par_polar_box( const uint32_t thread ) { register const GLU_real u = (GLU_real)( 2. * par_rng_dbl( thread ) - 1. ) ; register const GLU_real v = (GLU_real)( 2. * par_rng_dbl( thread ) - 1. ) ; const GLU_real s = u * u + v * v ; return s < 1. ? sqrt( -log( s ) / s ) * ( u + I * v ) : par_polar_box( thread ) ; }
// overrelaxation algorithm static void overrelax( GLU_complex U[ NCNC ] , const GLU_complex staple[ NCNC ] , const uint32_t thread ) { GLU_complex s0 GLUalign , s1 GLUalign ; double scale GLUalign ; size_t i ; #ifdef NSTOCH for( i = 0 ; i < NSTOCH ; i++ ) { const size_t stoch = (size_t)( par_rng_dbl( thread ) * NSU2SUBGROUPS ) ; only_subgroup( &s0 , &s1 , &scale , U , staple , stoch ) ; microcanonical( &s0 , &s1 ) ; #ifdef CHROMA_RELAX su2_rotate( U , s0 , s1 , stoch ) ; #else su2_rotate( U , s0 , s1 , stoch ) ; su2_rotate( U , s0 , s1 , stoch ) ; #endif } #else for( i = 0 ; i < NSU2SUBGROUPS ; i++ ) { #ifdef SOR // stochastic-OR? if( par_rng_dbl( thread ) < 0.5 ) continue ; #endif only_subgroup( &s0 , &s1 , &scale , U , staple , i ) ; microcanonical( &s0 , &s1 ) ; #ifdef CHROMA_RELAX su2_rotate( U , s0 , s1 , i ) ; #else su2_rotate( U , s0 , s1 , i ) ; su2_rotate( U , s0 , s1 , i ) ; #endif } #endif return ; }
// seed the rng int initialise_par_rng( const char *rng_file ) { if( RNG_inited == GLU_FALSE ) { // tell us our rng #if (defined KISS_RNG) fprintf( stdout , "[PAR_RNG] KISS_RNG \n" ) ; #elif (defined MWC_4096_RNG) fprintf( stdout , "[PAR_RNG] MWC_4096\n" ) ; #elif (defined MWC_1038_RNG) fprintf( stdout , "[PAR_RNG] MWC_1038\n" ) ; #elif (defined XOR_1024_RNG) fprintf( stdout , "[PAR_RNG] XOR_1024\n" ) ; #else fprintf( stdout , "[PAR_RNG] WELL_512\n" ) ; #endif if( rng_file == NULL ) { // pull from the entropy pool? uint32_t *Seeds = malloc( Latt.Nthreads * sizeof( uint32_t ) ) ; size_t i ; if( Latt.Seed[0] == 0 ) { FILE *urandom = fopen( "/dev/urandom" , "r" ) ; if( urandom == NULL ) { fprintf( stderr , "[RNG] /dev/urandom not opened!! ... Exiting \n" ) ; return GLU_FAILURE ; } // read them from urandom if( fread( Seeds , sizeof( Latt.Seed ) , Latt.Nthreads , urandom ) != Latt.Nthreads ) { fprintf( stderr , "[RNG] Entropy pool Seed not read properly ! " "... Exiting \n" ) ; return GLU_FAILURE ; } fclose( urandom ) ; } else { // for( i = 0 ; i < Latt.Nthreads ; i++ ) { Seeds[ i ] = Latt.Seed[0] + i ; } // } fprintf( stdout , "[PAR_RNG] Entropy read \n" ) ; for( i = 0 ; i < Latt.Nthreads ; i++ ) { fprintf( stdout , "[PAR_RNG] Seed_%zu %u \n" , i , Seeds[i] ) ; } // do the seeding #if (defined KISS_RNG) GLU_set_par_KISS_table( Seeds ) ; #elif (defined MWC_4096_RNG) GLU_set_par_MWC_4096_table( Seeds ) ; #elif (defined MWC_1038_RNG) GLU_set_par_MWC_1038_table( Seeds ) ; #elif (defined XOR_1024_RNG) GLU_set_par_XOR_1024_table( Seeds ) ; #else GLU_set_par_WELL_512_table( Seeds ) ; #endif // warm up the rng #pragma omp parallel for private(i) for( i = 0 ; i < Latt.Nthreads ; i++ ) { size_t j ; const uint32_t thread = get_GLU_thread( ) ; for( j = 0 ; j < 10000 ; j++ ) { par_rng_dbl( thread ) ; } } fprintf( stdout , "[PAR_RNG] warmed up\n" ) ; // free the seeds free( Seeds ) ; } else { return read_par_rng_state( rng_file ) ; } RNG_inited = GLU_TRUE ; } return GLU_SUCCESS ; }
// accessor for ints uint32_t par_rng_int( const uint32_t thread ) { return (uint32_t)( UINT32_MAX * par_rng_dbl( thread ) ) ; }
// Gaussian distributed doubles ocassionally called GLU_real par_polar( const uint32_t thread ) { const GLU_real u = par_rng_dbl( thread ) , v = par_rng_dbl( thread ) ; return sqrt( -2. * log( u ) ) * cos( TWOPI * v ) ; }