static bool flush_ringbuf_client(void) { char buf[1024]; /* 書くデータなし */ if (fresh_queue.next == fresh_queue.tail) return (FALSE); /* まだ書くべき時でない */ if (fresh_queue.time[fresh_queue.next] > get_current_time() - epoch_time) return (FALSE); /* 時間情報(区切り)が得られるまで書く */ while (get_nextbuf(buf)) { char id; int x, y, len, col; int i; unsigned char tmp1, tmp2, tmp3, tmp4; char *mesg; sscanf(buf, "%c%c%c%c%c", &id, &tmp1, &tmp2, &tmp3, &tmp4); x = tmp1-1; y = tmp2-1; len = tmp3; col = tmp4; if (id == 's') { col = tmp3; mesg = &buf[4]; } else mesg = &buf[5]; #ifndef WINDOWS win2unix(col, mesg); #endif switch (id) { case 't': /* 通常 */ #ifdef SJIS euc2sjis(mesg); #endif update_term_size(x, y, len); (void)((*angband_term[0]->text_hook)(x, y, len, (byte)col, mesg)); strncpy(&Term->scr->c[y][x], mesg, len); for (i = x; i < x+len; i++) { Term->scr->a[y][i] = col; } break; case 'n': /* 繰り返し */ for (i = 1; i < len; i++) { mesg[i] = mesg[0]; } mesg[i] = '\0'; update_term_size(x, y, len); (void)((*angband_term[0]->text_hook)(x, y, len, (byte)col, mesg)); strncpy(&Term->scr->c[y][x], mesg, len); for (i = x; i < x+len; i++) { Term->scr->a[y][i] = col; } break; case 's': /* 一文字 */ update_term_size(x, y, 1); (void)((*angband_term[0]->text_hook)(x, y, 1, (byte)col, mesg)); strncpy(&Term->scr->c[y][x], mesg, 1); Term->scr->a[y][x] = col; break; case 'w': update_term_size(x, y, len); (void)((*angband_term[0]->wipe_hook)(x, y, len)); break; case 'x': if (x == TERM_XTRA_CLEAR) Term_clear(); (void)((*angband_term[0]->xtra_hook)(x, 0)); break; case 'c': update_term_size(x, y, 1); (void)((*angband_term[0]->curs_hook)(x, y)); break; case 'C': update_term_size(x, y, 1); (void)((*angband_term[0]->bigcurs_hook)(x, y)); break; } } fresh_queue.next++; if (fresh_queue.next == FRESH_QUEUE_SIZE) fresh_queue.next = 0; return (TRUE); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing chegvd */ int main( int argc, char** argv) { TESTING_CUDA_INIT(); cuFloatComplex *h_A, *h_R, *h_B, *h_S, *h_work; float *rwork, *w1, *w2; magma_int_t *iwork; float gpu_time, cpu_time; magma_timestr_t start, end; /* Matrix size */ magma_int_t N=0, n2; magma_int_t size[4] = {1024,2048,4100,6001}; magma_int_t i, itype, info; magma_int_t ione = 1, izero = 0; magma_int_t five = 5; cuFloatComplex c_zero = MAGMA_C_ZERO; cuFloatComplex c_one = MAGMA_C_ONE; cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE; float d_one = 1.; float d_neg_one = -1.; float d_ten = 10.; magma_int_t ISEED[4] = {0,0,0,1}; const char *uplo = MagmaLowerStr; const char *jobz = MagmaVectorsStr; itype = 1; magma_int_t checkres; float result[4]; int flagN = 0; if (argc != 1){ for(i = 1; i<argc; i++){ if (strcmp("-N", argv[i])==0){ N = atoi(argv[++i]); if (N>0){ printf(" testing_chegvd -N %d\n\n", (int) N); flagN=1; } else { printf("\nUsage: \n"); printf(" testing_chegvd -N %d\n\n", (int) N); exit(1); } } if (strcmp("-itype", argv[i])==0){ itype = atoi(argv[++i]); if (itype>0 && itype <= 3){ printf(" testing_chegvd -itype %d\n\n", (int) itype); } else { printf("\nUsage: \n"); printf(" testing_chegvd -itype %d\n\n", (int) itype); exit(1); } } if (strcmp("-L", argv[i])==0){ uplo = MagmaLowerStr; printf(" testing_chegvd -L"); } if (strcmp("-U", argv[i])==0){ uplo = MagmaUpperStr; printf(" testing_chegvd -U"); } } } else { printf("\nUsage: \n"); printf(" testing_chegvd -L/U -N %d -itype %d\n\n", 1024, 1); } if(!flagN) N = size[3]; checkres = getenv("MAGMA_TESTINGS_CHECK") != NULL; n2 = N * N; /* Allocate host memory for the matrix */ TESTING_MALLOC( h_A, cuFloatComplex, n2); TESTING_MALLOC( h_B, cuFloatComplex, n2); TESTING_MALLOC( w1, float , N); TESTING_MALLOC( w2, float , N); TESTING_HOSTALLOC(h_R, cuFloatComplex, n2); TESTING_HOSTALLOC(h_S, cuFloatComplex, n2); magma_int_t nb = magma_get_chetrd_nb(N); magma_int_t lwork = 2*N*nb + N*N; magma_int_t lrwork = 1 + 5*N +2*N*N; magma_int_t liwork = 3 + 5*N; TESTING_HOSTALLOC(h_work, cuFloatComplex, lwork); TESTING_MALLOC( rwork, float, lrwork); TESTING_MALLOC( iwork, magma_int_t, liwork); printf(" N CPU Time(s) GPU Time(s) \n"); printf("===================================\n"); for(i=0; i<4; i++){ if (!flagN){ N = size[i]; n2 = N*N; } /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); //lapackf77_clatms( &N, &N, "U", ISEED, "P", w1, &five, &d_ten, // &d_one, &N, &N, uplo, h_B, &N, h_work, &info); //lapackf77_claset( "A", &N, &N, &c_zero, &c_one, h_B, &N); lapackf77_clarnv( &ione, ISEED, &n2, h_B ); /* increase the diagonal */ { magma_int_t i, j; for(i=0; i<N; i++) { MAGMA_C_SET2REAL( h_B[i*N+i], MAGMA_C_REAL(h_B[i*N+i]) + 1.*N ); MAGMA_C_SET2REAL( h_A[i*N+i], MAGMA_C_REAL(h_A[i*N+i]) ); } } lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); magma_chegvd(itype, jobz[0], uplo[0], N, h_R, N, h_S, N, w1, h_work, lwork, rwork, lrwork, iwork, liwork, &info); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ start = get_current_time(); magma_chegvd(itype, jobz[0], uplo[0], N, h_R, N, h_S, N, w1, h_work, lwork, rwork, lrwork, iwork, liwork, &info); end = get_current_time(); gpu_time = GetTimerValue(start,end)/1000.; if ( checkres ) { /* ===================================================================== Check the results following the LAPACK's [zc]hegvd routine. A x = lambda B x is solved and the following 3 tests computed: (1) | A Z - B Z D | / ( |A||Z| N ) (itype = 1) | A B Z - Z D | / ( |A||Z| N ) (itype = 2) | B A Z - Z D | / ( |A||Z| N ) (itype = 3) (2) | I - V V' B | / ( N ) (itype = 1,2) | B - V V' | / ( |B| N ) (itype = 3) (3) | S(with V) - S(w/o V) | / | S | =================================================================== */ float temp1, temp2; cuFloatComplex *tau; if (itype == 1 || itype == 2){ lapackf77_claset( "A", &N, &N, &c_zero, &c_one, h_S, &N); blasf77_cgemm("N", "C", &N, &N, &N, &c_one, h_R, &N, h_R, &N, &c_zero, h_work, &N); blasf77_chemm("R", uplo, &N, &N, &c_neg_one, h_B, &N, h_work, &N, &c_one, h_S, &N); result[1]= lapackf77_clange("1", &N, &N, h_S, &N, rwork) / N; } else if (itype == 3){ lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N); blasf77_cherk(uplo, "N", &N, &N, &d_neg_one, h_R, &N, &d_one, h_S, &N); result[1]= lapackf77_clanhe("1",uplo, &N, h_S, &N, rwork) / N / lapackf77_clanhe("1",uplo, &N, h_B, &N, rwork); } result[0] = 1.; result[0] /= lapackf77_clanhe("1",uplo, &N, h_A, &N, rwork); result[0] /= lapackf77_clange("1",&N , &N, h_R, &N, rwork); if (itype == 1){ blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_chemm("L", uplo, &N, &N, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N); result[0] *= lapackf77_clange("1", &N, &N, h_work, &N, rwork)/N; } else if (itype == 2){ blasf77_chemm("L", uplo, &N, &N, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N); result[0] *= lapackf77_clange("1", &N, &N, h_R, &N, rwork)/N; } else if (itype == 3){ blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_chemm("L", uplo, &N, &N, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N); result[0] *= lapackf77_clange("1", &N, &N, h_R, &N, rwork)/N; } /* lapackf77_chet21(&ione, uplo, &N, &izero, h_A, &N, w1, w1, h_R, &N, h_R, &N, tau, h_work, rwork, &result[0]); */ lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); magma_chegvd(itype, 'N', uplo[0], N, h_R, N, h_S, N, w2, h_work, lwork, rwork, lrwork, iwork, liwork, &info); temp1 = temp2 = 0; for(int j=0; j<N; j++){ temp1 = max(temp1, absv(w1[j])); temp1 = max(temp1, absv(w2[j])); temp2 = max(temp2, absv(w1[j]-w2[j])); } result[2] = temp2 / temp1; } /* ===================================================================== Performs operation using LAPACK =================================================================== */ start = get_current_time(); lapackf77_chegvd(&itype, jobz, uplo, &N, h_A, &N, h_B, &N, w2, h_work, &lwork, rwork, &lrwork, iwork, &liwork, &info); end = get_current_time(); if (info < 0) printf("Argument %d of chegvd had an illegal value.\n", (int) -info); cpu_time = GetTimerValue(start,end)/1000.; /* ===================================================================== Print execution time =================================================================== */ printf("%5d %6.2f %6.2f\n", (int) N, cpu_time, gpu_time); if ( checkres ){ printf("Testing the eigenvalues and eigenvectors for correctness:\n"); if(itype==1) printf("(1) | A Z - B Z D | / (|A| |Z| N) = %e\n", result[0]); else if(itype==2) printf("(1) | A B Z - Z D | / (|A| |Z| N) = %e\n", result[0]); else if(itype==3) printf("(1) | B A Z - Z D | / (|A| |Z| N) = %e\n", result[0]); if(itype==1 || itype ==2) printf("(2) | I - Z Z' B | / N = %e\n", result[1]); else printf("(2) | B - Z Z' | / (|B| N) = %e\n", result[1]); printf("(3) | D(w/ Z)-D(w/o Z)|/ |D| = %e\n\n", result[2]); } if (flagN) break; } /* Memory clean up */ TESTING_FREE( h_A); TESTING_FREE( h_B); TESTING_FREE( w1); TESTING_FREE( w2); TESTING_FREE( rwork); TESTING_FREE( iwork); TESTING_HOSTFREE(h_work); TESTING_HOSTFREE( h_R); TESTING_HOSTFREE( h_S); /* Shutdown */ TESTING_CUDA_FINALIZE(); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zpotrf_mc */ int main( magma_int_t argc, char** argv) { cuDoubleComplex *h_A, *h_R, *h_work, *h_A2; cuDoubleComplex *d_A; float gpu_perf, cpu_perf, cpu_perf2; magma_timestr_t start, end; /* Matrix size */ magma_int_t N=0, n2, lda; magma_int_t size[10] = {1024,2048,3072,4032,5184,6048,7200,8064,8928,10080}; magma_int_t i, j, info[1]; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t num_cores = 4; int num_gpus = 0; magma_int_t loop = argc; if (argc != 1){ for(i = 1; i<argc; i++){ if (strcmp("-N", argv[i])==0) N = atoi(argv[++i]); else if (strcmp("-C", argv[i])==0) num_cores = atoi(argv[++i]); } if (N==0) { N = size[9]; loop = 1; } else { size[0] = size[9] = N; } } else { printf("\nUsage: \n"); printf(" testing_zpotrf_mc -N %d -B 128 \n\n", 1024); N = size[9]; } lda = N; n2 = size[9] * size[9]; /* Allocate host memory for the matrix */ h_A = (cuDoubleComplex*)malloc(n2 * sizeof(h_A[0])); if (h_A == 0) { fprintf (stderr, "!!!! host memory allocation error (A)\n"); } /* Allocate host memory for the matrix */ h_A2 = (cuDoubleComplex*)malloc(n2 * sizeof(h_A2[0])); if (h_A2 == 0) { fprintf (stderr, "!!!! host memory allocation error (A2)\n"); } /* Initialize MAGMA hardware context, seeting how many CPU cores and how many GPUs to be used in the consequent computations */ magma_context *context; context = magma_init(NULL, NULL, 0, num_cores, num_gpus, argc, argv); printf("\n\n"); printf(" N Multicore GFlop/s ||R||_F / ||A||_F\n"); printf("=============================================\n"); for(i=0; i<10; i++) { N = lda = size[i]; n2 = N*N; lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); for(j=0; j<N; j++) MAGMA_Z_SET2REAL( h_A[j*lda+j], ( MAGMA_Z_GET_X(h_A[j*lda+j]) + 2000. ) ); for(j=0; j<n2; j++) h_A2[j] = h_A[j]; /* ===================================================================== Performs operation using LAPACK =================================================================== */ //lapackf77_zpotrf("L", &N, h_A, &lda, info); lapackf77_zpotrf("U", &N, h_A, &lda, info); if (info[0] < 0) printf("Argument %d of zpotrf had an illegal value.\n", -info[0]); /* ===================================================================== Performs operation using multi-core =================================================================== */ start = get_current_time(); //magma_zpotrf_mc(context, "L", &N, h_A2, &lda, info); magma_zpotrf_mc(context, "U", &N, h_A2, &lda, info); end = get_current_time(); if (info[0] < 0) printf("Argument %d of magma_zpotrf_mc had an illegal value.\n", -info[0]); cpu_perf2 = FLOPS( (double)N ) / (1000000.*GetTimerValue(start,end)); /* ===================================================================== Check the result compared to LAPACK =================================================================== */ double work[1], matnorm = 1.; cuDoubleComplex mone = MAGMA_Z_NEG_ONE; int one = 1; matnorm = lapackf77_zlange("f", &N, &N, h_A, &N, work); blasf77_zaxpy(&n2, &mone, h_A, &one, h_A2, &one); printf("%5d %6.2f %e\n", size[i], cpu_perf2, lapackf77_zlange("f", &N, &N, h_A2, &N, work) / matnorm); if (loop != 1) break; } /* Memory clean up */ free(h_A); free(h_A2); /* Shut down the MAGMA context */ magma_finalize(context); }