Ejemplo n.º 1
0
static bool flush_ringbuf_client(void)
{
	char buf[1024];

	/* 書くデータなし */
	if (fresh_queue.next == fresh_queue.tail) return (FALSE);

	/* まだ書くべき時でない */
	if (fresh_queue.time[fresh_queue.next] > get_current_time() - epoch_time) return (FALSE);

	/* 時間情報(区切り)が得られるまで書く */
	while (get_nextbuf(buf))
	{
		char id;
		int x, y, len, col;
		int i;
		unsigned char tmp1, tmp2, tmp3, tmp4;
		char *mesg;

		sscanf(buf, "%c%c%c%c%c", &id, &tmp1, &tmp2, &tmp3, &tmp4);
		x = tmp1-1; y = tmp2-1; len = tmp3; col = tmp4;
		if (id == 's')
		{
			col = tmp3;
			mesg = &buf[4];
		}
		else mesg = &buf[5];
#ifndef WINDOWS
		win2unix(col, mesg);
#endif

		switch (id)
		{
		case 't': /* 通常 */
#ifdef SJIS
			euc2sjis(mesg);
#endif
			update_term_size(x, y, len);
			(void)((*angband_term[0]->text_hook)(x, y, len, (byte)col, mesg));
			strncpy(&Term->scr->c[y][x], mesg, len);
			for (i = x; i < x+len; i++)
			{
				Term->scr->a[y][i] = col;
			}
			break;

		case 'n': /* 繰り返し */
			for (i = 1; i < len; i++)
			{
				mesg[i] = mesg[0];
			}
			mesg[i] = '\0';
			update_term_size(x, y, len);
			(void)((*angband_term[0]->text_hook)(x, y, len, (byte)col, mesg));
			strncpy(&Term->scr->c[y][x], mesg, len);
			for (i = x; i < x+len; i++)
			{
				Term->scr->a[y][i] = col;
			}
			break;

		case 's': /* 一文字 */
			update_term_size(x, y, 1);
			(void)((*angband_term[0]->text_hook)(x, y, 1, (byte)col, mesg));
			strncpy(&Term->scr->c[y][x], mesg, 1);
			Term->scr->a[y][x] = col;
			break;

		case 'w':
			update_term_size(x, y, len);
			(void)((*angband_term[0]->wipe_hook)(x, y, len));
			break;

		case 'x':
			if (x == TERM_XTRA_CLEAR) Term_clear();
			(void)((*angband_term[0]->xtra_hook)(x, 0));
			break;

		case 'c':
			update_term_size(x, y, 1);
			(void)((*angband_term[0]->curs_hook)(x, y));
			break;
		case 'C':
			update_term_size(x, y, 1);
			(void)((*angband_term[0]->bigcurs_hook)(x, y));
			break;
		}
	}

	fresh_queue.next++;
	if (fresh_queue.next == FRESH_QUEUE_SIZE) fresh_queue.next = 0;
	return (TRUE);
}
Ejemplo n.º 2
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing chegvd
*/
int main( int argc, char** argv) 
{
    TESTING_CUDA_INIT();

    cuFloatComplex *h_A, *h_R, *h_B, *h_S, *h_work;
    float *rwork, *w1, *w2;
    magma_int_t *iwork;
    float gpu_time, cpu_time;

    magma_timestr_t start, end;

    /* Matrix size */
    magma_int_t N=0, n2;
    magma_int_t size[4] = {1024,2048,4100,6001};

    magma_int_t i, itype, info;
    magma_int_t ione = 1, izero = 0;
    magma_int_t five = 5;

    cuFloatComplex c_zero    = MAGMA_C_ZERO;
    cuFloatComplex c_one     = MAGMA_C_ONE;
    cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE;

    float d_one     =  1.;
    float d_neg_one = -1.;
    float d_ten     = 10.;
    magma_int_t ISEED[4] = {0,0,0,1};

    const char *uplo = MagmaLowerStr;
    const char *jobz = MagmaVectorsStr;
    itype = 1;

    magma_int_t checkres;
    float result[4];

    int flagN = 0;

    if (argc != 1){
        for(i = 1; i<argc; i++){
            if (strcmp("-N", argv[i])==0){
                N = atoi(argv[++i]);
                if (N>0){
                   printf("  testing_chegvd -N %d\n\n", (int) N);
                   flagN=1;
                }
                else {
                   printf("\nUsage: \n");
                   printf("  testing_chegvd -N %d\n\n", (int) N);
                   exit(1);
                }
            }
            if (strcmp("-itype", argv[i])==0){
                itype = atoi(argv[++i]);
                if (itype>0 && itype <= 3){
                   printf("  testing_chegvd -itype %d\n\n", (int) itype);
                }
                else {
                   printf("\nUsage: \n");
                   printf("  testing_chegvd -itype %d\n\n", (int) itype);
                   exit(1);
                }
            }
            if (strcmp("-L", argv[i])==0){
              uplo = MagmaLowerStr;
              printf("  testing_chegvd -L");
            }
            if (strcmp("-U", argv[i])==0){
              uplo = MagmaUpperStr;
              printf("  testing_chegvd -U");              
            }
          
        }
      
    } else {
        printf("\nUsage: \n");
        printf("  testing_chegvd -L/U -N %d -itype %d\n\n", 1024, 1);
    }

    if(!flagN)
        N = size[3];

    checkres  = getenv("MAGMA_TESTINGS_CHECK") != NULL;
    n2  = N * N;

    /* Allocate host memory for the matrix */
    TESTING_MALLOC(   h_A, cuFloatComplex, n2);
    TESTING_MALLOC(   h_B, cuFloatComplex, n2);
    TESTING_MALLOC(    w1, float         ,  N);
    TESTING_MALLOC(    w2, float         ,  N);
    TESTING_HOSTALLOC(h_R, cuFloatComplex, n2);
    TESTING_HOSTALLOC(h_S, cuFloatComplex, n2);

    magma_int_t nb = magma_get_chetrd_nb(N);
    magma_int_t lwork = 2*N*nb + N*N;
    magma_int_t lrwork = 1 + 5*N +2*N*N;
    magma_int_t liwork = 3 + 5*N;

    TESTING_HOSTALLOC(h_work, cuFloatComplex,  lwork);
    TESTING_MALLOC(    rwork,          float, lrwork);
    TESTING_MALLOC(    iwork,     magma_int_t, liwork);
    
    printf("  N     CPU Time(s)    GPU Time(s) \n");
    printf("===================================\n");
    for(i=0; i<4; i++){
        if (!flagN){
            N = size[i];
            n2 = N*N;
        }

        /* Initialize the matrix */
        lapackf77_clarnv( &ione, ISEED, &n2, h_A );
        //lapackf77_clatms( &N, &N, "U", ISEED, "P", w1, &five, &d_ten,
        //                 &d_one, &N, &N, uplo, h_B, &N, h_work, &info);
        //lapackf77_claset( "A", &N, &N, &c_zero, &c_one, h_B, &N);
        lapackf77_clarnv( &ione, ISEED, &n2, h_B );
        /* increase the diagonal */
        {
          magma_int_t i, j;
          for(i=0; i<N; i++) {
            MAGMA_C_SET2REAL( h_B[i*N+i], MAGMA_C_REAL(h_B[i*N+i]) + 1.*N );
            MAGMA_C_SET2REAL( h_A[i*N+i], MAGMA_C_REAL(h_A[i*N+i]) );
          }
        }
        lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
        lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );

        magma_chegvd(itype, jobz[0], uplo[0],
                     N, h_R, N, h_S, N, w1,
                     h_work, lwork, 
                     rwork, lrwork, 
                     iwork, liwork, 
                     &info);
        
        lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
        lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );


        /* ====================================================================
           Performs operation using MAGMA
           =================================================================== */
        start = get_current_time();
        magma_chegvd(itype, jobz[0], uplo[0],
                     N, h_R, N, h_S, N, w1,
                     h_work, lwork,
                     rwork, lrwork,
                     iwork, liwork,
                     &info);
        end = get_current_time();

        gpu_time = GetTimerValue(start,end)/1000.;

        if ( checkres ) {
          /* =====================================================================
             Check the results following the LAPACK's [zc]hegvd routine.
             A x = lambda B x is solved
             and the following 3 tests computed:
             (1)    | A Z - B Z D | / ( |A||Z| N )  (itype = 1)
                    | A B Z - Z D | / ( |A||Z| N )  (itype = 2)
                    | B A Z - Z D | / ( |A||Z| N )  (itype = 3)
             (2)    | I - V V' B | / ( N )           (itype = 1,2)
                    | B - V V' | / ( |B| N )         (itype = 3)
             (3)    | S(with V) - S(w/o V) | / | S |
             =================================================================== */
          float temp1, temp2;
          cuFloatComplex *tau;

          if (itype == 1 || itype == 2){
            lapackf77_claset( "A", &N, &N, &c_zero, &c_one, h_S, &N);
            blasf77_cgemm("N", "C", &N, &N, &N, &c_one, h_R, &N, h_R, &N, &c_zero, h_work, &N);
            blasf77_chemm("R", uplo, &N, &N, &c_neg_one, h_B, &N, h_work, &N, &c_one, h_S, &N);
            result[1]= lapackf77_clange("1", &N, &N, h_S, &N, rwork) / N;
          }
          else if (itype == 3){
            lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N);
            blasf77_cherk(uplo, "N", &N, &N, &d_neg_one, h_R, &N, &d_one, h_S, &N); 
            result[1]= lapackf77_clanhe("1",uplo, &N, h_S, &N, rwork) / N / lapackf77_clanhe("1",uplo, &N, h_B, &N, rwork);
          }

          result[0] = 1.;
          result[0] /= lapackf77_clanhe("1",uplo, &N, h_A, &N, rwork);
          result[0] /= lapackf77_clange("1",&N , &N, h_R, &N, rwork);

          if (itype == 1){
            blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
            for(int i=0; i<N; ++i)
              blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione);
            blasf77_chemm("L", uplo, &N, &N, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N);
            result[0] *= lapackf77_clange("1", &N, &N, h_work, &N, rwork)/N;
          }
          else if (itype == 2){
            blasf77_chemm("L", uplo, &N, &N, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N);
            for(int i=0; i<N; ++i)
              blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione);
            blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N);
            result[0] *= lapackf77_clange("1", &N, &N, h_R, &N, rwork)/N;
          }
          else if (itype == 3){
            blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
            for(int i=0; i<N; ++i)
              blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione);
            blasf77_chemm("L", uplo, &N, &N, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N);
            result[0] *= lapackf77_clange("1", &N, &N, h_R, &N, rwork)/N;
          }

/*          lapackf77_chet21(&ione, uplo, &N, &izero,
                           h_A, &N,
                           w1, w1,
                           h_R, &N,
                           h_R, &N,
                           tau, h_work, rwork, &result[0]);
*/          
          lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
          lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
 
          magma_chegvd(itype, 'N', uplo[0],
                       N, h_R, N, h_S, N, w2,
                       h_work, lwork,
                       rwork, lrwork,
                       iwork, liwork,
                       &info);

          temp1 = temp2 = 0;
          for(int j=0; j<N; j++){
            temp1 = max(temp1, absv(w1[j]));
            temp1 = max(temp1, absv(w2[j]));
            temp2 = max(temp2, absv(w1[j]-w2[j]));
          }
          result[2] = temp2 / temp1;
        }

        /* =====================================================================
           Performs operation using LAPACK
           =================================================================== */
        start = get_current_time();
        lapackf77_chegvd(&itype, jobz, uplo,
                         &N, h_A, &N, h_B, &N, w2,
                         h_work, &lwork,
                         rwork, &lrwork,
                         iwork, &liwork,
                         &info);
        end = get_current_time();
        if (info < 0)
          printf("Argument %d of chegvd had an illegal value.\n", (int) -info);

        cpu_time = GetTimerValue(start,end)/1000.;


        /* =====================================================================
           Print execution time
           =================================================================== */
        printf("%5d     %6.2f         %6.2f\n",
               (int) N, cpu_time, gpu_time);
        if ( checkres ){
          printf("Testing the eigenvalues and eigenvectors for correctness:\n");
          if(itype==1)
             printf("(1)    | A Z - B Z D | / (|A| |Z| N) = %e\n", result[0]);
          else if(itype==2)
             printf("(1)    | A B Z - Z D | / (|A| |Z| N) = %e\n", result[0]);
          else if(itype==3)
             printf("(1)    | B A Z - Z D | / (|A| |Z| N) = %e\n", result[0]);
          if(itype==1 || itype ==2)
             printf("(2)    | I -   Z Z' B | /  N         = %e\n", result[1]);
          else
             printf("(2)    | B -  Z Z' | / (|B| N)       = %e\n", result[1]);
          printf("(3)    | D(w/ Z)-D(w/o Z)|/ |D|      = %e\n\n", result[2]);
        }

        if (flagN)
            break;
    }
 
    /* Memory clean up */
    TESTING_FREE(       h_A);
    TESTING_FREE(       h_B);
    TESTING_FREE(        w1);
    TESTING_FREE(        w2);
    TESTING_FREE(     rwork);
    TESTING_FREE(     iwork);
    TESTING_HOSTFREE(h_work);
    TESTING_HOSTFREE(   h_R);
    TESTING_HOSTFREE(   h_S);

    /* Shutdown */
    TESTING_CUDA_FINALIZE();
}
Ejemplo n.º 3
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zpotrf_mc
*/
int main( magma_int_t argc, char** argv) 
{
    cuDoubleComplex *h_A, *h_R, *h_work, *h_A2;
    cuDoubleComplex *d_A;
    float gpu_perf, cpu_perf, cpu_perf2;

    magma_timestr_t start, end;

    /* Matrix size */
    magma_int_t N=0, n2, lda;
    magma_int_t size[10] = {1024,2048,3072,4032,5184,6048,7200,8064,8928,10080};
    
    magma_int_t i, j, info[1];

    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};

    magma_int_t num_cores = 4;
    int num_gpus = 0;

    magma_int_t loop = argc;
    
    if (argc != 1){
      for(i = 1; i<argc; i++){      
        if (strcmp("-N", argv[i])==0)
          N = atoi(argv[++i]);
        else if (strcmp("-C", argv[i])==0)
          num_cores = atoi(argv[++i]);
      }
      if (N==0) {
        N = size[9];
        loop = 1;
      } else {
        size[0] = size[9] = N;
      }
    } else {
      printf("\nUsage: \n");
      printf("  testing_zpotrf_mc -N %d -B 128 \n\n", 1024);
      N = size[9];
    }

    lda = N;
    n2 = size[9] * size[9];

    /* Allocate host memory for the matrix */
    h_A = (cuDoubleComplex*)malloc(n2 * sizeof(h_A[0]));
    if (h_A == 0) {
        fprintf (stderr, "!!!! host memory allocation error (A)\n");
    }

    /* Allocate host memory for the matrix */
    h_A2 = (cuDoubleComplex*)malloc(n2 * sizeof(h_A2[0]));
    if (h_A2 == 0) {
        fprintf (stderr, "!!!! host memory allocation error (A2)\n");
    }

    /* Initialize MAGMA hardware context, seeting how many CPU cores 
       and how many GPUs to be used in the consequent computations  */
    magma_context *context;
    context = magma_init(NULL, NULL, 0, num_cores, num_gpus, argc, argv);

    
    printf("\n\n");
    printf("  N    Multicore GFlop/s    ||R||_F / ||A||_F\n");
    printf("=============================================\n");
    for(i=0; i<10; i++)
      {
    N = lda = size[i];
    n2 = N*N;

    lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
    
    for(j=0; j<N; j++) 
      MAGMA_Z_SET2REAL( h_A[j*lda+j], ( MAGMA_Z_GET_X(h_A[j*lda+j]) + 2000. ) );

    for(j=0; j<n2; j++)
      h_A2[j] = h_A[j];

    /* =====================================================================
       Performs operation using LAPACK 
       =================================================================== */

    //lapackf77_zpotrf("L", &N, h_A, &lda, info);
    lapackf77_zpotrf("U", &N, h_A, &lda, info);
    
    if (info[0] < 0)  
      printf("Argument %d of zpotrf had an illegal value.\n", -info[0]);     

    /* =====================================================================
       Performs operation using multi-core 
       =================================================================== */
    start = get_current_time();
    //magma_zpotrf_mc(context, "L", &N, h_A2, &lda, info);
    magma_zpotrf_mc(context, "U", &N, h_A2, &lda, info);
    end = get_current_time();
    
    if (info[0] < 0)  
      printf("Argument %d of magma_zpotrf_mc had an illegal value.\n", -info[0]);     
  
    cpu_perf2 = FLOPS( (double)N ) / (1000000.*GetTimerValue(start,end));
    
    /* =====================================================================
       Check the result compared to LAPACK
       =================================================================== */
    double work[1], matnorm = 1.;
    cuDoubleComplex mone = MAGMA_Z_NEG_ONE;
    int one = 1;

    matnorm = lapackf77_zlange("f", &N, &N, h_A, &N, work);
    blasf77_zaxpy(&n2, &mone, h_A, &one, h_A2, &one);
    printf("%5d     %6.2f                %e\n", 
           size[i], cpu_perf2,  
           lapackf77_zlange("f", &N, &N, h_A2, &N, work) / matnorm);

    if (loop != 1)
      break;
      }
    
    /* Memory clean up */
    free(h_A);
    free(h_A2);

    /* Shut down the MAGMA context */
    magma_finalize(context);


}