void FiniteIntervalIntegrator<T_model>::executeDiffusion( typename Base::DiffusionStepWidth stepSize, bool advanceInTime) { //std::cout << "Finite Intervall"; advanceTime(stepSize); for (uint component = 0; component < Components::number_of_Variables; ++component) { if (Base::diffusion_[component] == 0.0) continue; //diffMatrix = blitz::Laplacian2D4n( Base::lattice[ component ] ); double scaleFactor = 0; if (stepSize == Base::HalfStep) { scaleFactor = Base::diffusion_[component] / Base::scaleX() / Base::scaleY() * Base::tau / 2.; } else if (stepSize == Base::WholeStep) { scaleFactor = Base::diffusion_[component] / Base::scaleX() / Base::scaleY() * Base::tau; } blitz::Array<double, 2> diffMatrix(Base::lattice[component].shape()); diffMatrix = 0; for (int x = 0; x < Base::latticeSizeX(); ++x) { for (int y = 0; y < Base::latticeSizeY(); ++y) { if (x == 0 || x == (Base::latticeSizeX() - 1) || y == 0 || y == (Base::latticeSizeY() - 1)) { if (Base::boundaryCondition_ == PeriodicBoundary) { diffMatrix(x, y) += my_laplacian_periodic( Base::lattice[component], x, y) * scaleFactor; } } else { // if ( abs( Base::lattice[ component ]( x, y ) - Base::fixpoint()[component] ) < 0.1 ) // return; //double diffScale = sqrt( pow( ( 2.0 * x / Base::latticeSizeX() - 1 ), 2 ) + pow( ( 2.0 * y / Base::latticeSizeY() - 1 ), 2 ) ); diffMatrix(x, y) += my_laplacian(Base::lattice[component], x, y) * scaleFactor; /* scaleFactor = scaleFactor * Base::lattice[ component ]( x, y ); diffMatrix( x, y ) += (-20.0 * scaleFactor); diffMatrix( x - 1, y ) += 4.0 * scaleFactor; diffMatrix( x + 1, y ) += 4.0 * scaleFactor; diffMatrix( x, y - 1 ) += 4.0 * scaleFactor; diffMatrix( x, y + 1 ) += 4.0 * scaleFactor; diffMatrix( x + 1, y + 1 ) += scaleFactor; diffMatrix( x - 1, y + 1 ) += scaleFactor; diffMatrix( x + 1, y - 1 ) += scaleFactor; diffMatrix( x - 1, y - 1 ) += scaleFactor; */} } } Base::lattice[component] += diffMatrix; } }
double getPercentBinarySubset(Matrix<int> m, Matrix<int> n){ if((m.getXdim() != n.getXdim()) || (m.getYdim() != n.getYdim())){ throw 1; return -1.0; } Matrix<int> diffMatrix(m.getXdim(), m.getYdim()); diffMatrix = getBinaryIntersect(m, n); double temp = 0.0; temp = double(diffMatrix.getSum())/double(m.getSum()); return fabs(temp); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing claswp */ int main( int argc, char** argv) { /* Initialize */ magma_queue_t queue; magma_device_t device[ MagmaMaxGPUs ]; int num = 0; magma_err_t err; magma_init(); err = magma_get_devices( device, MagmaMaxGPUs, &num ); if ( err != 0 || num < 1 ) { fprintf( stderr, "magma_get_devices failed: %d\n", err ); exit(-1); } err = magma_queue_create( device[0], &queue ); if ( err != 0 ) { fprintf( stderr, "magma_queue_create failed: %d\n", err ); exit(-1); } magmaFloatComplex *h_A1, *h_A2, *h_A3, *h_AT; magmaFloatComplex_ptr d_A1; real_Double_t gpu_time, cpu_time1, cpu_time2; /* Matrix size */ int M=0, N=0, n2, lda, ldat; int size[7] = {1000,2000,3000,4000,5000,6000,7000}; int i, j; int ione = 1; int ISEED[4] = {0,0,0,1}; int *ipiv; int k1, k2, r, c, incx; if (argc != 1){ for(i = 1; i<argc; i++){ if (strcmp("-N", argv[i])==0) N = atoi(argv[++i]); if (strcmp("-M", argv[i])==0) M = atoi(argv[++i]); } if (M>0 && N>0) printf(" testing_claswp -M %d -N %d\n\n", M, N); else { printf("\nUsage: \n"); printf(" testing_claswp -M %d -N %d\n\n", 1024, 1024); exit(1); } } else { printf("\nUsage: \n"); printf(" testing_claswp -M %d -N %d\n\n", 1024, 1024); M = N = size[6]; } lda = M; n2 = M*N; /* Allocate host memory for the matrix */ TESTING_MALLOC_CPU( h_A1, magmaFloatComplex, n2 ); TESTING_MALLOC_CPU( h_A2, magmaFloatComplex, n2 ); TESTING_MALLOC_CPU( h_A3, magmaFloatComplex, n2 ); TESTING_MALLOC_CPU( h_AT, magmaFloatComplex, n2 ); TESTING_MALLOC_DEV( d_A1, magmaFloatComplex, n2 ); ipiv = (int*)malloc(M * sizeof(int)); if (ipiv == 0) { fprintf (stderr, "!!!! host memory allocation error (ipiv)\n"); } printf("\n\n"); printf(" M N CPU_BLAS (sec) CPU_LAPACK (sec) GPU (sec) \n"); printf("=============================================================================\n"); for(i=0; i<7; i++) { if(argc == 1){ M = N = size[i]; } lda = M; ldat = N; n2 = M*N; /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A1 ); lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A1, &lda, h_A2, &lda ); for(r=0;r<M;r++){ for(c=0;c<N;c++){ h_AT[c+r*ldat] = h_A1[r+c*lda]; } } magma_csetmatrix( N, M, h_AT, 0, ldat, d_A1, 0, ldat, queue); for(j=0; j<M; j++) { ipiv[j] = (int)((rand()*1.*M) / (RAND_MAX * 1.)) + 1; } /* * BLAS swap */ /* Column Major */ cpu_time1 = magma_wtime(); for ( j=0; j<M; j++) { if ( j != (ipiv[j]-1)) { blasf77_cswap( &N, h_A1+j, &lda, h_A1+(ipiv[j]-1), &lda); } } cpu_time1 = magma_wtime() - cpu_time1; /* * LAPACK laswp */ cpu_time2 = magma_wtime(); k1 = 1; k2 = M; incx = 1; lapackf77_claswp(&N, h_A2, &lda, &k1, &k2, ipiv, &incx); cpu_time2 = magma_wtime() - cpu_time2; /* * GPU swap */ /* Col swap on transpose matrix*/ gpu_time = magma_wtime(); magma_cpermute_long2(N, d_A1, 0, ldat, ipiv, M, 0, queue); gpu_time = magma_wtime() - gpu_time; /* Check Result */ magma_cgetmatrix( N, M, d_A1, 0, ldat, h_AT, 0, ldat, queue); for(r=0;r<N;r++){ for(c=0;c<M;c++){ h_A3[c+r*lda] = h_AT[r+c*ldat]; } } int check_bl, check_bg, check_lg; check_bl = diffMatrix( h_A1, h_A2, M, N, lda ); check_bg = diffMatrix( h_A1, h_A3, M, N, lda ); check_lg = diffMatrix( h_A2, h_A3, M, N, lda ); printf("%5d %5d %6.2f %6.2f %6.2f %s %s %s\n", M, N, cpu_time1, cpu_time2, gpu_time, (check_bl == 0) ? "SUCCESS" : "FAILED", (check_bg == 0) ? "SUCCESS" : "FAILED", (check_lg == 0) ? "SUCCESS" : "FAILED"); if(check_lg !=0){ printf("lapack swap results:\n"); magma_cprint(M, N, h_A1, lda); printf("gpu swap transpose matrix result:\n"); magma_cprint(M, N, h_A3, lda); } if (argc != 1) break; } /* clean up */ TESTING_FREE_CPU( ipiv ); TESTING_FREE_CPU( h_A1 ); TESTING_FREE_CPU( h_A2 ); TESTING_FREE_CPU( h_A3 ); TESTING_FREE_CPU( h_AT ); TESTING_FREE_DEV( d_A1 ); magma_queue_destroy( queue ); magma_finalize(); }