void test_vmul_drv() { int n = 216 * 216 * 216; int repeat = 20; std::vector<double_complex> v1(n); std::vector<double> v2(n); mdarray<double_complex, 1> f1(n); mdarray<double, 1> f2(n); for (int i = 0; i < n; i++) { v1[i] = 1.0 / double_complex(i + 1, i + 1); v2[i] = 2.0; f1[i] = 1.0 / double_complex(i + 1, i + 1); f2[i] = 2.0; } std::cout << "vector size: " << n << std::endl; double t = kernel1(repeat, n, v1, v2); std::cout << "kernel1 time: " << t << " speed: " << double(n * (2 * sizeof(double_complex) + sizeof(double)) * repeat) / t / (1 << 30) << " GBs" << std::endl; t = kernel2(repeat, n, f1, f2); std::cout << "kernel2 time: " << t << " speed: " << double(n * (2 * sizeof(double_complex) + sizeof(double)) * repeat) / t / (1 << 30) << " GBs" << std::endl; t = kernel3(repeat, n, f1, v2); std::cout << "kernel3 time: " << t << " speed: " << double(n * (2 * sizeof(double_complex) + sizeof(double)) * repeat) / t / (1 << 30) << " GBs" << std::endl; }
/* sets the force due to the surface tension, according to equation (19) * sets also the color field and its gradient * ASSUMPTION: kernel(r,h)=kernel(-r,h) * */ void setSurfaceT(System *sys, float(*kernel)(float*, float), float*(*kernel2)(float*, float), float(*kernel3)(float*, float)){ Particle *part1,*part2,*partM; float r[3],h,coef,*coefGrad,coefLap,coef1,coef2,cfLap; float norm,threshold=0.001; int alpha; partM=sys->parts+sys->nbParts; h=sys->h; for(part1=sys->parts;part1<partM;part1++){ part1->cf=0; for(alpha=0;alpha<3;alpha++){ part1->fs[alpha]=0; part1->gradcf[alpha]=0; } } for(part1=sys->parts;part1<partM;part1++){ for(part2=part1;part2<partM;part2++){ r[0]=part1->pos[0]-part2->pos[0]; r[1]=part1->pos[1]-part2->pos[1]; r[2]=part1->pos[2]-part2->pos[2]; coef=kernel(r,h); if(coef!=0){ coefGrad=kernel2(r,h); coefLap=kernel3(r,h); coef1=part2->mass/part2->ro; coef2=part1->mass/part1->ro; part1->cf+=coef1*coef; for(alpha=0;alpha<3;alpha++) part1->gradcf[alpha]+=coef1*coefGrad[alpha]; part1->fs[0]+=coef1*coefLap; if(part2!=part1){ part2->cf+=coef2*coef; for(alpha=0;alpha<3;alpha++) part2->gradcf[alpha]-=coef2*coefGrad[alpha]; part2->fs[0]+=coef2*coefLap; } } } } for(part1=sys->parts;part1<partM;part1++){ cfLap=part1->fs[0]; part1->fs[0]=0; for(norm=0,alpha=0;alpha<3;alpha++) norm+=part1->gradcf[alpha]*part1->gradcf[alpha]; norm=sqrt(norm); if(norm>threshold){ cfLap*=-part1->sigma/norm; for(alpha=0;alpha<3;alpha++) part1->fs[alpha]=cfLap*part1->gradcf[alpha]; } } }