Exemplo n.º 1
0
void terminate(void)
{

	fprintf(stderr, "unpartition !!\n");
	starpu_data_unpartition(C_handle, 0);

	starpu_data_unregister(C_handle);

	gettimeofday(&end, NULL);

	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));

	display_stats(timing);

#ifdef CHECK_OUTPUT
	/* check results */
	/* compute C = C - AB */

	SGEMM("N", "N", ydim, xdim, zdim, -1.0f, A, ydim, B, zdim, 1.0f, C, ydim);
		
	/* make sure C = 0 */
	float err;
	err = SASUM(xdim*ydim, C, 1);	
	
	if (err < xdim*ydim*0.001) {
		fprintf(stderr, "Results are OK\n");
	}
	else {
		fprintf(stderr, "There were errors ... err = %f\n", err);
	}
#endif // CHECK_OUTPUT
}
Exemplo n.º 2
0
int
slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase)

{


    /* Table of constant values */
    int c__1 = 1;
    float      zero = 0.0;
    float      one = 1.0;
    
    /* Local variables */
    static int iter;
    static int jump, jlast;
    static float altsgn, estold;
    static int i, j;
    float temp;
#ifdef _CRAY
    extern int ISAMAX(int *, float *, int *);
    extern float SASUM(int *, float *, int *);
    extern int SCOPY(int *, float *, int *, float *, int *);
#else
    extern int isamax_(int *, float *, int *);
    extern float sasum_(int *, float *, int *);
    extern int scopy_(int *, float *, int *, float *, int *);
#endif
#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
#define i_dnnt(a) \
	( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */

    if ( *kase == 0 ) {
	for (i = 0; i < *n; ++i) {
	    x[i] = 1. / (float) (*n);
	}
	*kase = 1;
	jump = 1;
	return 0;
    }

    switch (jump) {
	case 1:  goto L20;
	case 2:  goto L40;
	case 3:  goto L70;
	case 4:  goto L110;
	case 5:  goto L140;
    }

    /*     ................ ENTRY   (JUMP = 1)   
	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
  L20:
    if (*n == 1) {
	v[0] = x[0];
	*est = fabs(v[0]);
	/*        ... QUIT */
	goto L150;
    }
#ifdef _CRAY
    *est = SASUM(n, x, &c__1);
#else
    *est = sasum_(n, x, &c__1);
#endif

    for (i = 0; i < *n; ++i) {
	x[i] = d_sign(one, x[i]);
	isgn[i] = i_dnnt(x[i]);
    }
    *kase = 2;
    jump = 2;
    return 0;

    /*     ................ ENTRY   (JUMP = 2)   
	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
L40:
#ifdef _CRAY
    j = ISAMAX(n, &x[0], &c__1);
#else
    j = isamax_(n, &x[0], &c__1);
#endif
    --j;
    iter = 2;

    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
L50:
    for (i = 0; i < *n; ++i) x[i] = zero;
    x[j] = one;
    *kase = 1;
    jump = 3;
    return 0;

    /*     ................ ENTRY   (JUMP = 3)   
	   X HAS BEEN OVERWRITTEN BY A*X. */
L70:
#ifdef _CRAY
    SCOPY(n, x, &c__1, v, &c__1);
#else
    scopy_(n, x, &c__1, v, &c__1);
#endif
    estold = *est;
#ifdef _CRAY
    *est = SASUM(n, v, &c__1);
#else
    *est = sasum_(n, v, &c__1);
#endif

    for (i = 0; i < *n; ++i)
	if (i_dnnt(d_sign(one, x[i])) != isgn[i])
	    goto L90;

    /*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */
    goto L120;

L90:
    /*     TEST FOR CYCLING. */
    if (*est <= estold) goto L120;

    for (i = 0; i < *n; ++i) {
	x[i] = d_sign(one, x[i]);
	isgn[i] = i_dnnt(x[i]);
    }
    *kase = 2;
    jump = 4;
    return 0;

    /*     ................ ENTRY   (JUMP = 4)   
	   X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
L110:
    jlast = j;
#ifdef _CRAY
    j = ISAMAX(n, &x[0], &c__1);
#else
    j = isamax_(n, &x[0], &c__1);
#endif
    --j;
    if (x[jlast] != fabs(x[j]) && iter < 5) {
	++iter;
	goto L50;
    }

    /*     ITERATION COMPLETE.  FINAL STAGE. */
L120:
    altsgn = 1.;
    for (i = 1; i <= *n; ++i) {
	x[i-1] = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.);
	altsgn = -altsgn;
    }
    *kase = 1;
    jump = 5;
    return 0;
    
    /*     ................ ENTRY   (JUMP = 5)   
	   X HAS BEEN OVERWRITTEN BY A*X. */
L140:
#ifdef _CRAY
    temp = SASUM(n, x, &c__1) / (float)(*n * 3) * 2.;
#else
    temp = sasum_(n, x, &c__1) / (float)(*n * 3) * 2.;
#endif
    if (temp > *est) {
#ifdef _CRAY
	SCOPY(n, &x[0], &c__1, &v[0], &c__1);
#else
	scopy_(n, &x[0], &c__1, &v[0], &c__1);
#endif
	*est = temp;
    }

L150:
    *kase = 0;
    return 0;

} /* slacon_ */
int
dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase)

{
/*
    Purpose   
    =======   

    DLACON estimates the 1-norm of a square matrix A.   
    Reverse communication is used for evaluating matrix-vector products. 
  

    Arguments   
    =========   

    N      (input) INT
           The order of the matrix.  N >= 1.   

    V      (workspace) DOUBLE PRECISION array, dimension (N)   
           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
           (W is not returned).   

    X      (input/output) DOUBLE PRECISION array, dimension (N)   
           On an intermediate return, X should be overwritten by   
                 A * X,   if KASE=1,   
                 A' * X,  if KASE=2,
           and DLACON must be re-called with all the other parameters   
           unchanged.   

    ISGN   (workspace) INT array, dimension (N)

    EST    (output) DOUBLE PRECISION   
           An estimate (a lower bound) for norm(A).   

    KASE   (input/output) INT
           On the initial call to DLACON, KASE should be 0.   
           On an intermediate return, KASE will be 1 or 2, indicating   
           whether X should be overwritten by A * X  or A' * X.   
           On the final return from DLACON, KASE will again be 0.   

    Further Details   
    ======= =======   

    Contributed by Nick Higham, University of Manchester.   
    Originally named CONEST, dated March 16, 1988.   

    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
    a real or complex matrix, with applications to condition estimation", 
    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
    ===================================================================== 
*/

    /* Table of constant values */
    int c__1 = 1;
    double      zero = 0.0;
    double      one = 1.0;
    
    /* Local variables */
    static int iter;
    static int jump, jlast;
    static double altsgn, estold;
    static int i, j;
    double temp;
#ifdef _CRAY
    extern int ISAMAX(int *, double *, int *);
    extern double SASUM(int *, double *, int *);
    extern int SCOPY(int *, double *, int *, double *, int *);
#else
    extern int idamax_(int *, double *, int *);
    extern double dasum_(int *, double *, int *);
    extern int dcopy_(int *, double *, int *, double *, int *);
#endif
#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
#define i_dnnt(a) \
	( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */

    if ( *kase == 0 ) {
	for (i = 0; i < *n; ++i) {
	    x[i] = 1. / (double) (*n);
	}
	*kase = 1;
	jump = 1;
	return 0;
    }

    switch (jump) {
	case 1:  goto L20;
	case 2:  goto L40;
	case 3:  goto L70;
	case 4:  goto L110;
	case 5:  goto L140;
    }

    /*     ................ ENTRY   (JUMP = 1)   
	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
  L20:
    if (*n == 1) {
	v[0] = x[0];
	*est = fabs(v[0]);
	/*        ... QUIT */
	goto L150;
    }
#ifdef _CRAY
    *est = SASUM(n, x, &c__1);
#else
    *est = dasum_(n, x, &c__1);
#endif

    for (i = 0; i < *n; ++i) {
	x[i] = d_sign(one, x[i]);
	isgn[i] = i_dnnt(x[i]);
    }
    *kase = 2;
    jump = 2;
    return 0;

    /*     ................ ENTRY   (JUMP = 2)   
	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
L40:
#ifdef _CRAY
    j = ISAMAX(n, &x[0], &c__1);
#else
    j = idamax_(n, &x[0], &c__1);
#endif
    --j;
    iter = 2;

    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
L50:
    for (i = 0; i < *n; ++i) x[i] = zero;
    x[j] = one;
    *kase = 1;
    jump = 3;
    return 0;

    /*     ................ ENTRY   (JUMP = 3)   
	   X HAS BEEN OVERWRITTEN BY A*X. */
L70:
#ifdef _CRAY
    SCOPY(n, x, &c__1, v, &c__1);
#else
    dcopy_(n, x, &c__1, v, &c__1);
#endif
    estold = *est;
#ifdef _CRAY
    *est = SASUM(n, v, &c__1);
#else
    *est = dasum_(n, v, &c__1);
#endif

    for (i = 0; i < *n; ++i)
	if (i_dnnt(d_sign(one, x[i])) != isgn[i])
	    goto L90;

    /*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */
    goto L120;

L90:
    /*     TEST FOR CYCLING. */
    if (*est <= estold) goto L120;

    for (i = 0; i < *n; ++i) {
	x[i] = d_sign(one, x[i]);
	isgn[i] = i_dnnt(x[i]);
    }
    *kase = 2;
    jump = 4;
    return 0;

    /*     ................ ENTRY   (JUMP = 4)   
	   X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
L110:
    jlast = j;
#ifdef _CRAY
    j = ISAMAX(n, &x[0], &c__1);
#else
    j = idamax_(n, &x[0], &c__1);
#endif
    --j;
    if (x[jlast] != fabs(x[j]) && iter < 5) {
	++iter;
	goto L50;
    }

    /*     ITERATION COMPLETE.  FINAL STAGE. */
L120:
    altsgn = 1.;
    for (i = 1; i <= *n; ++i) {
	x[i-1] = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.);
	altsgn = -altsgn;
    }
    *kase = 1;
    jump = 5;
    return 0;
    
    /*     ................ ENTRY   (JUMP = 5)   
	   X HAS BEEN OVERWRITTEN BY A*X. */
L140:
#ifdef _CRAY
    temp = SASUM(n, x, &c__1) / (double)(*n * 3) * 2.;
#else
    temp = dasum_(n, x, &c__1) / (double)(*n * 3) * 2.;
#endif
    if (temp > *est) {
#ifdef _CRAY
	SCOPY(n, &x[0], &c__1, &v[0], &c__1);
#else
	dcopy_(n, &x[0], &c__1, &v[0], &c__1);
#endif
	*est = temp;
    }

L150:
    *kase = 0;
    return 0;

} /* dlacon_ */
Exemplo n.º 4
0
int main( int argc, char *argv[] )
{
	unsigned int numspes = 1, i;
	unsigned int numblocks = 8, blocksize = 4;
	unsigned int type = 1;

	// There are arguments
	if ( argc > 1 )
	{
		// The first argument is present
		if ( argc > 1 )
		{
			numblocks = atoi( argv[1] );
		}

		if ( argc > 2 )
		{
			blocksize = atoi( argv[2] );
		}

		if ( argc > 3 )
		{
			type = atoi( argv[3] );
		}

		if ( argc > 4 )
		{
			numspes = atoi( argv[4] );
		}

	}
	else
	{
		printf( "Usage pputest <numblocks> <blocksize^2> <type> <num spes>\n" );
		printf( "type: 1: sdot, 2: sdotv, 3: snrm2, 4: snrm2v\n" );
		return -1;
	}

	init( numspes );

	paddingx = 0;
	paddingy = 1;

	unsigned int size, numelements = numblocks*blocksize*blocksize;

	printf( "Testing BLAS()\n" );
	printf( "Num SPEs:\t%u\n", speThreads );
	printf( "------------------\n" );
	printf( "Vector size: \t\t%u\n", blocksize*blocksize*numblocks-paddingx );
	printf( "Num blocks:\t\t%u\n", numblocks );
	printf( "Num elements pr block: \t%u\n", blocksize*blocksize );
	printf( "1 block in bytes: \t%u\n", blocksize*blocksize*4 );
	printf( "Num elements:\t\t%u\n", numblocks*blocksize*blocksize );
	printf( "Total size in MB: \t%f\n", (double)(numblocks*blocksize*blocksize*4)/(1024*1024) );
	printf( "Total size in MB: \t%f\n", (double)(numblocks*blocksize*blocksize*4)/(1024*1024)*2 );
	printf( "------------------\n" );

	// PyArrayObject
	PyArrayObject pyobj1;
	PyArrayObject pyobj2;
	PyArrayObject pyscalar1;
	sMakeMatrix( numblocks, 1, blocksize, 1.0f, &pyobj1 );
	sMakeMatrix( numblocks, 1, blocksize, 1.0f, &pyobj2 );
	sMakeMatrix( 1, 1, 4, 2.334f, &pyscalar1 );

//	printf( "First block address=%#x\n", pyobj1.blockData[0] );
//	printf( "Sum is %f\n", SumBlock( pyobj1.blockData[0], blocksize ) );


	unsigned int *shader;

	double time;

	switch( type )
	{
	case 1:
		shader = blas_1_sdot;
		printf( "Calling SDOT();\n" );
		size = (numblocks*blocksize*blocksize*4) * 2;
		time = SDOT( &pyobj1, &pyobj2, blas_1_sdot_size, shader );
		break;

	case 2:
		shader = blas_1_sdotv;
		printf( "Calling SDOTvvvv();\n" );
		size = (numblocks*blocksize*blocksize*4) * 2;
		time = SDOT( &pyobj1, &pyobj2, blas_1_sdotv_size, shader );
		break;

	case 3:
		shader = snrm2;
		printf( "Calling SNRM2();\n" );
		size = (numblocks*blocksize*blocksize*4);
		time = SNRM2( &pyobj1, snrm2_size, shader );
		break;

	case 4:
		shader = snrm2v;
		printf( "Calling SNRM2vvvvv();\n" );
		size = (numblocks*blocksize*blocksize*4);
		time = SNRM2( &pyobj1, snrm2v_size, shader );
		break;

	case 5:
		shader = blas_1_sscal;
		printf( "Calling SSCAL();\n" );
		size = (numblocks*blocksize*blocksize*4);
		time = SSCAL( &pyobj1, &pyscalar1, blas_1_sscal_size, shader );
		break;

	case 7:
		shader = blas_1_sasum;
		printf( "Calling SASUM();\n" );
		size = (numblocks*blocksize*blocksize*4);
		time = SASUM( &pyobj1, blas_1_sasum_size, shader );
		break;

	case 8:
		shader = blas_1_sasumv;
		printf( "Calling SASUMv();\n" );
		size = (numblocks*blocksize*blocksize*4);
		time = SASUM( &pyobj1, blas_1_sasumv_size, shader );
		break;

	case 9:
		shader = blas_1_isamaxv;
		printf( "Calling ISAMAXv();\n" );
		size = (numblocks*blocksize*blocksize*4);
		//ISAMAX( &pyobj1,blas_1_isamaxv_size, shader );
		break;



	case 102:
		shader = blas_1_sdotv;
		printf( "Calling SDOT2vvvv();\n" );
		size = (numblocks*blocksize*blocksize*4) * 2;
		time = SDOT2( &pyobj1, &pyobj2, blas_1_sdotv_size, shader );
		break;
	}


 	unsigned int state = 0;
  	for ( i = 0 ; i < speThreads ; i++ )
	{
  		spe_in_mbox_write ( speData[i].spe_ctx, &state, 1, SPE_MBOX_ALL_BLOCKING );
	}


  	// Wait for all the SPE threads to complete.

  	for ( i = 0 ; i < speThreads ; i++ )
	{
		CompleteSPEThreads( &speData[i] );
	}

  	double GB = (double)size / (1024*1024*1024);
  	printf( "Time: %f\n", time );
  	printf( "Size: %u\n", size );
  	printf( "GigaBytes: %f\n", GB );
  	printf( "GB/s: %f\n", GB/time );
  	printf( "GFlops: %f\n", ( numelements/time ) / ( 1024*1024*1024 ) );

	return 1;
}