Exemplo n.º 1
0
int JacobiCL2(QVector< QVector<float> > A, int n, QCLContext *context, float *x, float eps)
{
    if(!context) {
        context = new QCLContext();

        if(!context->create(QCLDevice::GPU)) {
            qFatal("Could not create OpenCL context");
        }
    }

    float *A2 = new float[n*(n+1)];
    for(int i = 0; i < n; i++) {
        memcpy(&A2[i*(n+1)], A[i].data(), (n+1)*sizeof(float));
    }

    QCLProgram program;

    program = context->buildProgramFromSourceFile(QLatin1String("cl/jacobi.cl"));

    QCLKernel jacobi_pre2 = program.createKernel("jacobi_pre2");
    QCLKernel jacobi2 = program.createKernel("jacobi2");

    jacobi_pre2.setGlobalWorkSize(n, n);
    jacobi2.setGlobalWorkSize(n, 1);

    QCLBuffer buffA = context->createBufferDevice(n*(n+1)*sizeof(float), QCLMemoryObject::ReadWrite);
    QCLBuffer buffA2 = context->createBufferDevice(n*(n+1)*sizeof(float), QCLMemoryObject::ReadWrite);

    buffA.write(A2, n*(n+1)*sizeof(float));

    QCLVector<float> xcl = context->createVector<float>(n, QCLMemoryObject::ReadWrite);

    float *x2 = new float[n];

    memset(x, 0, n*sizeof(float));
    memset(x2, 0, n*sizeof(float));

    xcl.write(x, n);

    QTime t;
    t.start();

    int it = 0;
    float norm = 1;
    while(norm > eps)
    {
        memcpy(x2, x, n*sizeof(float));

        jacobi_pre2(buffA, buffA2, xcl, n).waitForFinished();
        jacobi2(buffA, buffA2, xcl, n).waitForFinished();

        xcl.read(x, n);
        norm = normMax(x, x2, n);

        qDebug() << "JCL2:"<< it++ << norm;
    }

    return t.elapsed();
}
Exemplo n.º 2
0
int main(int argc, char* argv[])
{
    bool verb;
    int j, k, n, n2, i3, n3, iter, niter;
    sf_complex **a=NULL, *e=NULL;
    float s2;
    sf_file mat=NULL, val=NULL;

    sf_init(argc,argv);
    mat = sf_input("in");
    val = sf_output("out");

    if (SF_COMPLEX != sf_gettype(mat)) sf_error("Need complex input");
    if (!sf_histint(mat,"n1",&n)) sf_error("No n1= in input");
    if (!sf_histint(mat,"n2",&n2) || n2 != n) sf_error("Need n1=n2 in input");
    n3 = sf_leftsize(mat,2);

    sf_putint(val,"n2",1);

    if (!sf_getint("niter",&niter)) niter=10;
    if (!sf_getbool("verb",&verb)) verb=false;

    a = sf_complexalloc2(n,n);
    e = sf_complexalloc(n);
    jacobi2_init(n,verb);

    for (i3=0; i3 < n3; i3++) {
	sf_complexread(a[0],n*n,mat);
	
	for (iter=0; iter < niter; iter++) {
	    s2 = 0.;
	    for (j=0; j < n; j++) {
		for (k=0; k < n; k++) {
		    s2 += jacobi2(a,n,j,k);
		}
	    }
	    sf_warning("iter=%d s2=%g",iter+1,s2);
	}

	for (j=0; j < n; j++) {
	    e[j]=a[j][j];
	}
	
	sf_complexwrite(e,n, val);
    }

    exit(0);
}
Exemplo n.º 3
0
Arquivo: ceig.c Projeto: 717524640/src
void ceig(int niter      /* number of iterations */, 
	  float tol      /* tolerance */, 
	  int m          /* effective matrix size */,
	  sf_complex** a /* [n][n] matrix */, 
	  sf_complex *e  /* [n] eigenvalues */)
/*< find eigenvalues >*/
{
    int iter, j, k, info;
    float s2,s0=1.;

    if (niter > 0) { /* Jacobi iterations */
	for (iter=0; iter < niter; iter++) {
	    s2 = 0.;
	    for (j=0; j < m; j++) {
		for (k=0; k < m; k++) {
		    s2 += jacobi2(a,m,j,k);
		}
	    }
	    if (verb) sf_warning("iter=%d s2=%g",iter+1,s2);
	    if (0==iter) {
		s0 = s2;
	    } else {
		if (s2 <= s0*tol) break;
	    }
	}
	
	for (j=0; j < m; j++) {
	    e[j]=a[j][j];
	}
    } else {
	for (j=0; j < m; j++) {
	    for (k=0; k < m; k++) {
		b[k+j*m] = a[j][k];
	    }
	}
#ifdef SF_HAS_LAPACK
	cgeev_( "N", "N", &m, b, &m, e, work, &n2, work, &n2, work, &n2, rwork, &info );
	if (info) sf_error("cgeev_ failed");
#else
	sf_error("No LAPACK");
#endif
    }
    for (j=m; j < n-1; j++) {
	e[j]=sf_cmplx(0.,0.);
    }
}