int JacobiCL2(QVector< QVector<float> > A, int n, QCLContext *context, float *x, float eps) { if(!context) { context = new QCLContext(); if(!context->create(QCLDevice::GPU)) { qFatal("Could not create OpenCL context"); } } float *A2 = new float[n*(n+1)]; for(int i = 0; i < n; i++) { memcpy(&A2[i*(n+1)], A[i].data(), (n+1)*sizeof(float)); } QCLProgram program; program = context->buildProgramFromSourceFile(QLatin1String("cl/jacobi.cl")); QCLKernel jacobi_pre2 = program.createKernel("jacobi_pre2"); QCLKernel jacobi2 = program.createKernel("jacobi2"); jacobi_pre2.setGlobalWorkSize(n, n); jacobi2.setGlobalWorkSize(n, 1); QCLBuffer buffA = context->createBufferDevice(n*(n+1)*sizeof(float), QCLMemoryObject::ReadWrite); QCLBuffer buffA2 = context->createBufferDevice(n*(n+1)*sizeof(float), QCLMemoryObject::ReadWrite); buffA.write(A2, n*(n+1)*sizeof(float)); QCLVector<float> xcl = context->createVector<float>(n, QCLMemoryObject::ReadWrite); float *x2 = new float[n]; memset(x, 0, n*sizeof(float)); memset(x2, 0, n*sizeof(float)); xcl.write(x, n); QTime t; t.start(); int it = 0; float norm = 1; while(norm > eps) { memcpy(x2, x, n*sizeof(float)); jacobi_pre2(buffA, buffA2, xcl, n).waitForFinished(); jacobi2(buffA, buffA2, xcl, n).waitForFinished(); xcl.read(x, n); norm = normMax(x, x2, n); qDebug() << "JCL2:"<< it++ << norm; } return t.elapsed(); }
int main(int argc, char* argv[]) { bool verb; int j, k, n, n2, i3, n3, iter, niter; sf_complex **a=NULL, *e=NULL; float s2; sf_file mat=NULL, val=NULL; sf_init(argc,argv); mat = sf_input("in"); val = sf_output("out"); if (SF_COMPLEX != sf_gettype(mat)) sf_error("Need complex input"); if (!sf_histint(mat,"n1",&n)) sf_error("No n1= in input"); if (!sf_histint(mat,"n2",&n2) || n2 != n) sf_error("Need n1=n2 in input"); n3 = sf_leftsize(mat,2); sf_putint(val,"n2",1); if (!sf_getint("niter",&niter)) niter=10; if (!sf_getbool("verb",&verb)) verb=false; a = sf_complexalloc2(n,n); e = sf_complexalloc(n); jacobi2_init(n,verb); for (i3=0; i3 < n3; i3++) { sf_complexread(a[0],n*n,mat); for (iter=0; iter < niter; iter++) { s2 = 0.; for (j=0; j < n; j++) { for (k=0; k < n; k++) { s2 += jacobi2(a,n,j,k); } } sf_warning("iter=%d s2=%g",iter+1,s2); } for (j=0; j < n; j++) { e[j]=a[j][j]; } sf_complexwrite(e,n, val); } exit(0); }
void ceig(int niter /* number of iterations */, float tol /* tolerance */, int m /* effective matrix size */, sf_complex** a /* [n][n] matrix */, sf_complex *e /* [n] eigenvalues */) /*< find eigenvalues >*/ { int iter, j, k, info; float s2,s0=1.; if (niter > 0) { /* Jacobi iterations */ for (iter=0; iter < niter; iter++) { s2 = 0.; for (j=0; j < m; j++) { for (k=0; k < m; k++) { s2 += jacobi2(a,m,j,k); } } if (verb) sf_warning("iter=%d s2=%g",iter+1,s2); if (0==iter) { s0 = s2; } else { if (s2 <= s0*tol) break; } } for (j=0; j < m; j++) { e[j]=a[j][j]; } } else { for (j=0; j < m; j++) { for (k=0; k < m; k++) { b[k+j*m] = a[j][k]; } } #ifdef SF_HAS_LAPACK cgeev_( "N", "N", &m, b, &m, e, work, &n2, work, &n2, work, &n2, rwork, &info ); if (info) sf_error("cgeev_ failed"); #else sf_error("No LAPACK"); #endif } for (j=m; j < n-1; j++) { e[j]=sf_cmplx(0.,0.); } }