C4Err TestHashKAT( int algor, char *name, uint8_t *msg, size_t msgsize, int passes, uint8_t * expected, size_t expectedLen) { uint8_t hashBuf [256]; uint8_t hashState [kHASH_ContextAllocSize]; size_t hashStateLen = 0; C4Err err = kC4Err_NoErr; int i; HASH_ContextRef hash = kInvalidHASH_ContextRef; err = HASH_Init(algor, &hash); err = HASH_Export(hash, hashState, sizeof(hashState), &hashStateLen); HASH_Free(hash); hash = NULL; err = HASH_Import(hashState, hashStateLen, &hash); CKERR; ZERO(hashState, sizeof(hashState)); /* calculate the hash.. */ for (i = 0; i < passes; i++) { err = HASH_Update( hash, msg, msgsize); CKERR; err = HASH_Export(hash, hashState, sizeof(hashState), &hashStateLen); HASH_Free(hash); hash = NULL; err = HASH_Import(hashState, hashStateLen, &hash); CKERR; ZERO(hashState, sizeof(hashState)); } err = HASH_Final (hash, hashBuf); CKERR; err = ( compareResults( expected, hashBuf, expectedLen , kResultFormat_Byte, "Message Digest")); CKERR; /* quick HASH API */ if(passes == 1) { err = HASH_DO(algor, msg, msgsize, sizeof(hashBuf), hashBuf); CKERR; err = ( compareResults( expected, hashBuf, expectedLen , kResultFormat_Byte, "Quick HASH")); CKERR; } done: if( HASH_ContextRefIsValid(hash)) HASH_Free(hash); return err; }
S4Err testZbase32() { S4Err err = kS4Err_NoErr; katvector kat_vector_array[] = { { 1, (uint8_t*)"\x00", "y" }, { 1, (uint8_t*)"\x80", "o" }, { 2, (uint8_t*)"\x40", "e" }, { 2, (uint8_t*)"\xC0", "a" }, { 10, (uint8_t*)"\x00\x00", "yy" }, { 10, (uint8_t*)"\x80\x80", "on" }, { 20, (uint8_t*)"\x8B\x88\x80", "tqre" }, { 24, (uint8_t*)"\xF0\xBF\xC7", "6n9hq" }, { 24, (uint8_t*)"\xD4\x7A\x04", "4t7ye" }, { 30, (uint8_t*)"\xF5\x57\xBD\x0C", "6im54d" }, // the spec says "6im5sd" but I think it is wrong { 64, (uint8_t*)"\x28\x6F\x20\x29\x28\x20\x6F\x29", "fbz1ykjerbz11" }, { 128, (uint8_t*)"\x00\x01\x02\x03\x05\x06\x07\x08\x0A\x0B\x0C\x0D\x0F\x10\x11\x12", "yyyoryafyadoonombogo6rytne" }, { 160, (uint8_t*) "\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E\x25\x71\x78\x50\xC2\x6C" "\x9C\xD0\xD8\x9D", "igcuhp18y4ysiqt6riazowgnp1qpbsr7" } }; int i; for (i = 0; i < sizeof(kat_vector_array)/ sizeof(katvector) ; i++) { katvector* kat = &kat_vector_array[i]; uint8_t encoded[64] = {0}; uint8_t decoded[64] = {0}; int len, len2; char* binString[128] = {0}; bin_to_chars((uint8_t*)kat->base2, kat->bits, 24, (char*)binString); OPTESTLogVerbose("\t\t%4lu %2d %-30s %-32s\n", kat->bits, INT_CEIL(kat->bits, 8), binString, kat->zbase32); len = zbase32_encode((uint8_t*)encoded, (uint8_t*)kat->base2, kat->bits); /* check against encoded */ err = compareResults( kat->zbase32, encoded, len , kResultFormat_Byte, "Encoded"); CKERR; len2 = zbase32_decode((uint8_t*) decoded, (uint8_t*)encoded, kat->bits); err = compareResults(decoded, kat->base2, len/8 , kResultFormat_Byte, "Decoded"); CKERR; }; done: return err; }
int main(int argc, char** argv) { double t_start, t_end; /* Array declaration */ DATA_TYPE A[N][M]; DATA_TYPE C[N][N]; DATA_TYPE C_outputFromGpu[N][N]; /* Initialize array. */ init_arrays(A, C, C_outputFromGpu); #pragma hmpp syrk allocate #pragma hmpp syrk advancedload, args[a,c] t_start = rtclock(); #pragma hmpp syrk callsite, args[a,c].advancedload=true, asynchronous runSyrk(A, C_outputFromGpu); #pragma hmpp syrk synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp syrk delegatedstore, args[c] #pragma hmpp syrk release t_start = rtclock(); runSyrk(A, C); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(C, C_outputFromGpu); return 0; }
int main() { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* C; DATA_TYPE* D; A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); D = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); fprintf(stdout, "<< Symmetric rank-k operations >>\n"); init_arrays(A, C, D); syrkGPU(A, D); t_start = rtclock(); syrk(A, C); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(C, D); free(A); free(C); free(D); return 0; }
int main(int argc, char* argv[]) //int main(void) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* B; DATA_TYPE* C; DATA_TYPE* D; DATA_TYPE* E; DATA_TYPE* F; DATA_TYPE* G; DATA_TYPE* G_outputFromGpu; if(argc==2){ printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]); cpu_offset = atoi(argv[1]); } A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); C = (DATA_TYPE*)malloc(NJ*NM*sizeof(DATA_TYPE)); D = (DATA_TYPE*)malloc(NM*NL*sizeof(DATA_TYPE)); E = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); F = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE)); G = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); G_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); int i; init_array(A, B, C, D); read_cl_file(); cl_initialization_fusion(); //cl_initialization(); cl_mem_init(A, B, C, D, E, F, G); cl_load_prog(); cl_launch_kernel(); errcode = clEnqueueReadBuffer(clCommandQue[0], g_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, G_outputFromGpu, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); t_start = rtclock(); mm3_cpu(A, B, C, D, E, F, G); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(G, G_outputFromGpu); cl_clean_up(); free(A); free(B); free(C); free(D); free(E); free(F); free(G); free(G_outputFromGpu); return 0; }
int TestPK(prng_state * PRNG) { int err = CRYPT_OK; int i; ecc_key eccKey; uint8_t PT[PTsize]; uint8_t CT[256]; uint8_t DT[PTsize]; unsigned long z,w; uint8_t PrivKey[256]; uint8_t PubKey[256]; // uint8_t tempBuf[256]; // unsigned long tempLen; printf("\nTesting PK\n"); // fill PT for(i = 0; i< PTsize; i++) PT[i]= i; DO( ecc_make_key(PRNG, find_prng ("yarrow"), 384/8, &eccKey)); z = sizeof(PubKey); DO( ecc_export(PubKey, &z, PK_PUBLIC, &eccKey)); printf("\tPub Key (%ld bytes)\n", z); dumpHex(PubKey, z, 8); z = sizeof(PrivKey); DO( ecc_export(PrivKey, &z, PK_PRIVATE, &eccKey)); printf("\n\tPriv Key (%ld bytes)\n", z); dumpHex(PrivKey, z, 8); z = 384; DO( ecc_encrypt_key(PT, PTsize, CT, &z, PRNG, find_prng("yarrow"), find_hash("sha256"), &eccKey)); printf("\n\tEncrypted message (%ld bytes)\n", z); dumpHex(CT, z, 0); DO( ecc_decrypt_key(CT, z, DT, &w, &eccKey)); /* check against know-answer */ DO(compareResults( DT, PT, PTsize , kResultFormat_Byte, "ECC Decrypt")); printf("\n\tDecrypted OK\n"); dumpHex(DT, w, 0); ecc_free(&eccKey); return err; }
int main(int argc, char** argv) { double t_start, t_end; /* Array declaration */ DATA_TYPE A[NI][NK]; DATA_TYPE B[NK][NJ]; DATA_TYPE C[NJ][NM]; DATA_TYPE D[NM][NL]; DATA_TYPE E[NI][NJ]; DATA_TYPE E_gpu[NI][NJ]; DATA_TYPE F[NJ][NL]; DATA_TYPE F_gpu[NJ][NL]; DATA_TYPE G[NI][NL]; DATA_TYPE G_outputFromGpu[NI][NL]; /* INItialize array. */ iNIt_array(A, B, C, D); #pragma hmpp <group1> allocate #pragma hmpp <group1> loopa advancedload, args[a;b;e] #pragma hmpp <group1> loopb advancedload, args[f;c;d] #pragma hmpp <group1> loopc advancedload, args[g] t_start = rtclock(); #pragma hmpp <group1> loopa callsite, args[a;b;e].advancedload=true, asynchronous threeMMloopa(A, B, E_gpu); #pragma hmpp <group1> loopa synchronize #pragma hmpp <group1> loopb callsite, args[f;c;d].advancedload=true, asynchronous threeMMloopb(C, D, F_gpu); #pragma hmpp <group1> loopb synchronize #pragma hmpp <group1> loopc callsite, args[g;e;f].advancedload=true, asynchronous threeMMloopc(E_gpu, F_gpu, G_outputFromGpu); #pragma hmpp <group1> loopc synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp <group1> loopa delegatedstore, args[a;b] #pragma hmpp <group1> loopb delegatedstore, args[c;d] #pragma hmpp <group1> loopc delegatedstore, args[g;e;f] #pragma hmpp <group1> release t_start = rtclock(); threeMMloopa(A, B, E); threeMMloopb(C, D, F); threeMMloopc(E, F, G); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(G, G_outputFromGpu); return 0; }
int main(int argc, char** argv) { int m = M; int n = N; double t_start, t_end; /* Array declaration */ DATA_TYPE float_n = 321414134.01; DATA_TYPE data[M + 1][N + 1]; DATA_TYPE data_Gpu[M + 1][N + 1]; DATA_TYPE symmat[M + 1][M + 1]; DATA_TYPE symmat_outputFromGpu[M + 1][M + 1]; DATA_TYPE mean[M + 1]; DATA_TYPE mean_Gpu[M + 1]; /* Initialize array. */ init_arrays(data, data_Gpu); #pragma hmpp <group1> allocate #pragma hmpp <group1> loopa advancedload, args[pmean;pdata;pfloat_n] #pragma hmpp <group1> loopc advancedload, args[psymmat] t_start = rtclock(); #pragma hmpp <group1> loopa callsite, args[pmean;pdata;pfloat_n].advancedload=true, asynchronous covarLoopa(mean_Gpu, data_Gpu, float_n); #pragma hmpp <group1> loopa synchronize #pragma hmpp <group1> loopb callsite, args[pdata;pmean].advancedload=true, asynchronous covarLoopb(data_Gpu, mean_Gpu); #pragma hmpp <group1> loopb synchronize #pragma hmpp <group1> loopc callsite, args[psymmat;pdata].advancedload=true, asynchronous covarLoopc(symmat_outputFromGpu, data_Gpu); #pragma hmpp <group1> loopc synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp <group1> loopb delegatedstore, args[pmean] #pragma hmpp <group1> loopc delegatedstore, args[psymmat;pdata] #pragma hmpp <group1> release t_start = rtclock(); covarLoopa(mean, data, float_n); covarLoopb(data, mean); covarLoopc(symmat, data); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(symmat, symmat_outputFromGpu); return 0; }
int main(int argc, char *argv[]) { le_int32 failures = 0; for (le_int32 test = 0; test < testCount; test += 1) { LEErrorCode fontStatus = LE_NO_ERROR; printf("Test %d, font = %s... ", test, testInputs[test].fontName); PortableFontInstance fontInstance(testInputs[test].fontName, 12, fontStatus); if (LE_FAILURE(fontStatus)) { printf("could not open font.\n"); continue; } LEErrorCode success = LE_NO_ERROR; LayoutEngine *engine = LayoutEngine::layoutEngineFactory(&fontInstance, testInputs[test].scriptCode, -1, success); le_int32 textLength = testInputs[test].textLength; le_bool result; TestResult actual; if (LE_FAILURE(success)) { // would be nice to print the script name here, but // don't want to maintain a table, and don't want to // require ICU just for the script name... printf("could not create a LayoutEngine.\n"); continue; } actual.glyphCount = engine->layoutChars(testInputs[test].text, 0, textLength, textLength, testInputs[test].rightToLeft, 0, 0, success); actual.glyphs = new LEGlyphID[actual.glyphCount]; actual.indices = new le_int32[actual.glyphCount]; actual.positions = new float[actual.glyphCount * 2 + 2]; engine->getGlyphs(actual.glyphs, success); engine->getCharIndices(actual.indices, success); engine->getGlyphPositions(actual.positions, success); result = compareResults(test, &testResults[test], &actual); if (result) { printf("passed.\n"); } else { failures += 1; printf("failed.\n"); } delete[] actual.positions; delete[] actual.indices; delete[] actual.glyphs; delete engine; } return failures; }
int main(int argc, char *argv[]) { /* Retrieve problem size. */ int ni = NI; int nj = NJ; /* Variable declaration/allocation. */ DATA_TYPE alpha; DATA_TYPE beta; POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NI,NJ,ni,nj); POLYBENCH_2D_ARRAY_DECL(B,DATA_TYPE,NI,NJ,ni,nj); POLYBENCH_2D_ARRAY_DECL(C,DATA_TYPE,NI,NI,ni,ni); POLYBENCH_2D_ARRAY_DECL(C_outputFromGpu,DATA_TYPE,NI,NI,ni,ni); init_arrays(ni, nj, &alpha, &beta, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C)); read_cl_file(); cl_initialization(); cl_mem_init(POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C)); cl_load_prog(); cl_launch_kernel(ni, nj, alpha, beta); errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, NI*NJ*sizeof(DATA_TYPE), POLYBENCH_ARRAY(C_outputFromGpu), 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); #ifdef RUN_ON_CPU /* Start timer. */ polybench_start_instruments; syr2kCpu(ni, nj, alpha, beta, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C)); /* Stop and print timer. */ printf("CPU Time in seconds:\n"); polybench_stop_instruments; polybench_print_instruments; compareResults(ni, POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(C_outputFromGpu)); #else //prevent dead code elimination polybench_prevent_dce(print_array(ni, POLYBENCH_ARRAY(C_outputFromGpu))); #endif //RUN_ON_CPU cl_clean_up(); POLYBENCH_FREE_ARRAY(A); POLYBENCH_FREE_ARRAY(B); POLYBENCH_FREE_ARRAY(C); POLYBENCH_FREE_ARRAY(C_outputFromGpu); return 0; }
int main(int argc, char* argv[]) //int main(void) { double t_start, t_end; DATA_TYPE* data; DATA_TYPE* mean; DATA_TYPE* stddev; DATA_TYPE* symmat; DATA_TYPE* symmat_outputFromGpu; if(argc==2){ printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]); cpu_offset = atoi(argv[1]); } data = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE)); mean = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE)); stddev = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE)); symmat = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE)); symmat_outputFromGpu = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE)); init_arrays(data); read_cl_file(); cl_initialization_fusion(); //cl_initialization(); cl_mem_init(data, mean, stddev, symmat); cl_load_prog(); double start = rtclock(); cl_launch_kernel(); double end = rtclock(); fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, (end - start)); //fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, 1000*(end - start)); errcode = clEnqueueReadBuffer(clCommandQue[0], symmat_mem_obj, CL_TRUE, 0, (M+1) * (N+1) * sizeof(DATA_TYPE), symmat_outputFromGpu, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); t_start = rtclock(); correlation(data, mean, stddev, symmat); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(symmat, symmat_outputFromGpu); cl_clean_up(); free(data); free(mean); free(stddev); free(symmat); free(symmat_outputFromGpu); return 0; }
int main(int argc, char *argv[]) { int tmax = TMAX; int nx = NX; int ny = NY; POLYBENCH_1D_ARRAY_DECL(_fict_,DATA_TYPE,TMAX,TMAX); POLYBENCH_2D_ARRAY_DECL(ex,DATA_TYPE,NX,NY,nx,ny); POLYBENCH_2D_ARRAY_DECL(ey,DATA_TYPE,NX,NY,nx,ny); POLYBENCH_2D_ARRAY_DECL(hz,DATA_TYPE,NX,NY,nx,ny); POLYBENCH_2D_ARRAY_DECL(hz_outputFromGpu,DATA_TYPE,NX,NY,nx,ny); init_arrays(tmax, nx, ny, POLYBENCH_ARRAY(_fict_), POLYBENCH_ARRAY(ex), POLYBENCH_ARRAY(ey), POLYBENCH_ARRAY(hz)); read_cl_file(); cl_initialization(); cl_mem_init(POLYBENCH_ARRAY(_fict_), POLYBENCH_ARRAY(ex), POLYBENCH_ARRAY(ey), POLYBENCH_ARRAY(hz)); cl_load_prog(); cl_launch_kernel(tmax, nx, ny); errcode = clEnqueueReadBuffer(clCommandQue, hz_mem_obj, CL_TRUE, 0, NX * NY * sizeof(DATA_TYPE), POLYBENCH_ARRAY(hz_outputFromGpu), 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); #ifdef RUN_ON_CPU /* Start timer. */ polybench_start_instruments; runFdtd(tmax, nx, ny, POLYBENCH_ARRAY(_fict_), POLYBENCH_ARRAY(ex), POLYBENCH_ARRAY(ey), POLYBENCH_ARRAY(hz)); /* Stop and print timer. */ printf("CPU Time in seconds:\n"); polybench_stop_instruments; polybench_print_instruments; compareResults(nx, ny, POLYBENCH_ARRAY(hz), POLYBENCH_ARRAY(hz_outputFromGpu)); #else //prevent dead code elimination polybench_prevent_dce(print_array(nx, ny, POLYBENCH_ARRAY(hz_outputFromGpu))); #endif //RUN_ON_CPU POLYBENCH_FREE_ARRAY(_fict_); POLYBENCH_FREE_ARRAY(ex); POLYBENCH_FREE_ARRAY(ey); POLYBENCH_FREE_ARRAY(hz); POLYBENCH_FREE_ARRAY(hz_outputFromGpu); cl_clean_up(); return 0; }
int main() { double t_start, t_end; init_arrays(); syrkGPU(); t_start = rtclock(); syrk(); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(); return 0; }
static unsigned int processDocuments( const strus::PatternMatcherInstanceInterface* ptinst, const KeyTokenMap& keytokenmap, const std::vector<TreeNode*> treear, const std::vector<strus::utils::Document>& docs, std::map<std::string,double>& stats, const char* outputpath) { unsigned int totalNofmatches = 0; std::vector<strus::utils::Document>::const_iterator di = docs.begin(), de = docs.end(); std::size_t didx = 0; for (; di != de; ++di,++didx) { #ifdef STRUS_LOWLEVEL_DEBUG std::cout << "document " << di->tostring() << std::endl; #endif std::vector<strus::analyzer::PatternMatcherResult> results = eliminateDuplicates( sortResults( processDocument( ptinst, *di, stats))); if (outputpath) { std::ostringstream out; out << "number of matches " << results.size() << std::endl; strus::utils::printResults( out, std::vector<strus::SegmenterPosition>(), results); std::string outputfile( outputpath); outputfile.push_back( strus::dirSeparator()); outputfile.append( "res.txt"); strus::writeFile( outputfile, out.str()); } std::vector<strus::analyzer::PatternMatcherResult> expectedResults = eliminateDuplicates( sortResults( processDocumentAlt( keytokenmap, treear, *di))); if (outputpath) { std::ostringstream out; out << "number of matches " << expectedResults.size() << std::endl; strus::utils::printResults( out, std::vector<strus::SegmenterPosition>(), expectedResults); std::string outputfile( outputpath); outputfile.push_back( strus::dirSeparator()); outputfile.append( "exp.txt"); strus::writeFile( outputfile, out.str()); } if (!compareResults( results, expectedResults)) { throw std::runtime_error(std::string( "results differ to expected for document ") + di->id); } totalNofmatches += results.size(); if (g_errorBuffer->hasError()) { throw std::runtime_error("error matching rule"); } } return totalNofmatches; }
int main(void) { int nx = NX; int ny = NY; POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NX,NY,nx,ny); POLYBENCH_1D_ARRAY_DECL(x,DATA_TYPE,NY,ny); POLYBENCH_1D_ARRAY_DECL(y,DATA_TYPE,NY,ny); POLYBENCH_1D_ARRAY_DECL(y_outputFromGpu,DATA_TYPE,NY,ny); POLYBENCH_1D_ARRAY_DECL(tmp,DATA_TYPE,NX,nx); init_array(nx, ny, POLYBENCH_ARRAY(x), POLYBENCH_ARRAY(A)); read_cl_file(); cl_initialization(); cl_mem_init(POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(x), POLYBENCH_ARRAY(y), POLYBENCH_ARRAY(tmp)); cl_load_prog(); cl_launch_kernel(nx, ny); errcode = clEnqueueReadBuffer(clCommandQue, y_mem_obj, CL_TRUE, 0, NY*sizeof(DATA_TYPE), POLYBENCH_ARRAY(y_outputFromGpu), 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); #ifdef RUN_ON_CPU /* Start timer. */ polybench_start_instruments; atax_cpu(nx, ny, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(x), POLYBENCH_ARRAY(y), POLYBENCH_ARRAY(tmp)); /* Stop and print timer. */ printf("CPU Time in seconds:\n"); polybench_stop_instruments; polybench_print_instruments; compareResults(ny, POLYBENCH_ARRAY(y), POLYBENCH_ARRAY(y_outputFromGpu)); #else print_array(ny, POLYBENCH_ARRAY(y_outputFromGpu)); #endif //RUN_ON_CPU cl_clean_up(); POLYBENCH_FREE_ARRAY(A); POLYBENCH_FREE_ARRAY(x); POLYBENCH_FREE_ARRAY(y); POLYBENCH_FREE_ARRAY(y_outputFromGpu); POLYBENCH_FREE_ARRAY(tmp); return 0; }
int main(int argc, char** argv) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* B; DATA_TYPE* C; DATA_TYPE* D; DATA_TYPE* E; DATA_TYPE* F; DATA_TYPE* G; DATA_TYPE* G_outputFromGpu; A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); C = (DATA_TYPE*)malloc(NJ*NM*sizeof(DATA_TYPE)); D = (DATA_TYPE*)malloc(NM*NL*sizeof(DATA_TYPE)); E = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); F = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE)); G = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); G_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); fprintf(stdout, "<< Linear Algebra: 3 Matrix Multiplications (E=A.B; F=C.D; G=E.F) >>\n"); init_array(A, B, C, D); t_start = rtclock(); mm3_OMP(A, B, C, D, E, F, G_outputFromGpu); t_end = rtclock(); fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); t_start = rtclock(); mm3_cpu(A, B, C, D, E, F, G); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(G, G_outputFromGpu); free(A); free(B); free(C); free(D); free(E); free(F); free(G); free(G_outputFromGpu); return 0; }
int main(int argc, char** argv) { double t_start, t_end; /* Array declaration */ DATA_TYPE A[NI][NK]; DATA_TYPE B[NK][NJ]; DATA_TYPE C[NI][NJ]; DATA_TYPE C_gpu[NI][NJ]; DATA_TYPE D[NJ][NL]; DATA_TYPE E[NI][NL]; DATA_TYPE E_outputFromGpu[NI][NL]; /* Initialize array. */ init_array(A, B, C, C_gpu, D, E, E_outputFromGpu); #pragma hmpp <group1> allocate #pragma hmpp <group1> loopa advancedload, args[a;b;c] #pragma hmpp <group1> loopb advancedload, args[d;e] t_start = rtclock(); #pragma hmpp <group1> loopa callsite, args[a;b;c].advancedload=true, asynchronous twoMMloopa(A, B, C_gpu); #pragma hmpp <group1> loopa synchronize #pragma hmpp <group1> loopb callsite, args[c;d;e].advancedload=true, asynchronous twoMMloopb(C_gpu, D, E_outputFromGpu); #pragma hmpp <group1> loopb synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp <group1> loopa delegatedstore, args[a;b] #pragma hmpp <group1> loopb delegatedstore, args[c;d;e] #pragma hmpp <group1> release t_start = rtclock(); twoMMloopa(A, B, C); twoMMloopb(C, D, E); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(E, E_outputFromGpu); return 0; }
int main(int argc, char *argv[]) { int ni = NI; int nj = NJ; int nk = NK; POLYBENCH_3D_ARRAY_DECL(A,DATA_TYPE,NI,NJ,NK,ni,nj,nk); POLYBENCH_3D_ARRAY_DECL(B,DATA_TYPE,NI,NJ,NK,ni,nj,nk); POLYBENCH_3D_ARRAY_DECL(B_outputFromGpu,DATA_TYPE,NI,NJ,NK,ni,nj,nk); init(ni, nj, nk, POLYBENCH_ARRAY(A)); read_cl_file(); cl_initialization(); cl_mem_init(POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B)); cl_load_prog(); cl_launch_kernel(ni, nj, nk); errcode = clEnqueueReadBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, NI * NJ * NK * sizeof(DATA_TYPE), POLYBENCH_ARRAY(B_outputFromGpu), 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); #ifdef RUN_ON_CPU /* Start timer. */ polybench_start_instruments; conv3D(ni, nj, nk, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B)); /* Stop and print timer. */ printf("CPU Time in seconds:\n"); polybench_stop_instruments; polybench_print_instruments; compareResults(ni, nj, nk, POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(B_outputFromGpu)); #else //prevent dead code elimination polybench_prevent_dce(print_array(ni, nj, nk, POLYBENCH_ARRAY(B_outputFromGpu))); #endif //RUN_ON_CPU cl_clean_up(); POLYBENCH_FREE_ARRAY(A); POLYBENCH_FREE_ARRAY(B); POLYBENCH_FREE_ARRAY(B_outputFromGpu); return 0; }
int RunStorageCipherKAT( storage_katvector *kat) { int err = CRYPT_OK; char* name = NULL; uint64_t *out = NULL; symmetric_key KEY; out = malloc(kat->STORlen); ZERO(out, kat->STORlen); name = cipher_name(kat->algor); printf("\t%-7s %d ", name, kat->keysize); DO( threefish_setup_key(kat->key, kat->keysize, kat->tweek, &KEY)); DO( threefish_word_encrypt(kat->PT, out, &KEY)) /* check against know-answer */ DO( compareResults( kat->STOR, out, kat->STORlen , kResultFormat_Long, "Word Encrypt")); DO( threefish_word_decrypt(out, out, &KEY)) /* check against orginal plain-text */ DO( compareResults( kat->PT, out, kat->STORlen , kResultFormat_Long, "Word Decrypt")); done: threefish_done(&KEY); free(out); printf("\n"); return err; }
int main(void) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* r; DATA_TYPE* s; DATA_TYPE* p; DATA_TYPE* q; DATA_TYPE* s_outputFromGpu; DATA_TYPE* q_outputFromGpu; A = (DATA_TYPE*)malloc(NX*NY*sizeof(DATA_TYPE)); r = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE)); s = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); p = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); q = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE)); s_outputFromGpu = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); q_outputFromGpu = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE)); init_array(A, p, r); read_cl_file(); cl_initialization(); cl_mem_init(A, r, s, p, q); cl_load_prog(); cl_launch_kernel(); errcode = clEnqueueReadBuffer(clCommandQue, s_mem_obj, CL_TRUE, 0, NY*sizeof(DATA_TYPE), s_outputFromGpu, 0, NULL, NULL); errcode = clEnqueueReadBuffer(clCommandQue, q_mem_obj, CL_TRUE, 0, NX*sizeof(DATA_TYPE), q_outputFromGpu, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); t_start = rtclock(); bicg_cpu(A, r, s, p, q); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(s, s_outputFromGpu, q, q_outputFromGpu); cl_clean_up(); free(A); free(r); free(s); free(p); free(q); free(s_outputFromGpu); free(q_outputFromGpu); return 0; }
int main(int argc, char** argv) { int m = M; int n = N; double t_start, t_end; /* Array declaration */ DATA_TYPE float_n = 321414134.01; DATA_TYPE eps = 0.005; DATA_TYPE data[M + 1][N + 1]; DATA_TYPE data_Gpu[M + 1][N + 1]; DATA_TYPE mean[M + 1]; DATA_TYPE mean_Gpu[M + 1]; DATA_TYPE stddev[M + 1]; DATA_TYPE stddev_Gpu[M + 1]; DATA_TYPE symmat[M + 1][M + 1]; DATA_TYPE symmat_outputFromGpu[M + 1][M + 1]; /* Initialize array. */ init_arrays(data, data_Gpu); #pragma hmpp corr allocate #pragma hmpp corr advancedload, args[pdata;psymmat;pstddev;pmean;pfloat_n;peps] t_start = rtclock(); #pragma hmpp corr callsite, args[pdata;psymmat;pstddev;pmean;pfloat_n;peps].advancedload=true, asynchronous runCorr(data_Gpu, symmat_outputFromGpu, stddev_Gpu, mean_Gpu, float_n, eps); #pragma hmpp corr synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp corr delegatedstore, args[pdata;psymmat;pstddev;pmean] #pragma hmpp corr release t_start = rtclock(); runCorr(data, symmat, stddev, mean, float_n, eps); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(symmat, symmat_outputFromGpu); return 0; }
int main(int argc, char** argv) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* r; DATA_TYPE* s; DATA_TYPE* p; DATA_TYPE* q; DATA_TYPE* s_GPU; DATA_TYPE* q_GPU; A = (DATA_TYPE*)malloc(NX*NY*sizeof(DATA_TYPE)); r = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE)); s = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); p = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); q = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE)); s_GPU = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); q_GPU = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE)); fprintf(stdout, "<< BiCG Sub Kernel of BiCGStab Linear Solver >>\n"); init_array(A, p, r); t_start = rtclock(); bicg_OMP(A, r, s_GPU, p, q_GPU); t_end = rtclock(); fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); t_start = rtclock(); bicg_cpu(A, r, s, p, q); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(s, s_GPU, q, q_GPU); free(A); free(r); free(s); free(p); free(q); free(s_GPU); free(q_GPU); return 0; }
nsresult RelationalExpr::evaluate(txIEvalContext* aContext, txAExprResult** aResult) { *aResult = nullptr; RefPtr<txAExprResult> lResult; nsresult rv = mLeftExpr->evaluate(aContext, getter_AddRefs(lResult)); NS_ENSURE_SUCCESS(rv, rv); RefPtr<txAExprResult> rResult; rv = mRightExpr->evaluate(aContext, getter_AddRefs(rResult)); NS_ENSURE_SUCCESS(rv, rv); aContext->recycler()->getBoolResult( compareResults(aContext, lResult, rResult), aResult); return NS_OK; }
int main() { double t_start, t_end; DATA_TYPE a[N][N]; DATA_TYPE x1[N]; DATA_TYPE x1_outputFromGpu[N]; DATA_TYPE x2[N]; DATA_TYPE x2_outputFromGpu[N]; DATA_TYPE y1[N]; DATA_TYPE y2[N]; //initialize the arrays for running on the CPU and GPU init_array(a, x1, x1_outputFromGpu, x2, x2_outputFromGpu, y1, y2); #pragma hmpp mvt allocate #pragma hmpp mvt advancedload, args[a,x1,x2,y1,y2] t_start = rtclock(); //run the algorithm on the GPU #pragma hmpp mvt callsite, args[x1,x2].advancedload=true, asynchronous runMvt(a, x1_outputFromGpu, x2_outputFromGpu, y1, y2); // parameters are initialized in decls.h and are initialized with init_array() #pragma hmpp mvt synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lf\n", t_end - t_start); #pragma hmpp mvt delegatedstore, args[x1,x2] #pragma hmpp mvt release t_start = rtclock(); //run the algorithm on the CPU runMvt(a, x1, x2, y1, y2); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lf\n", t_end - t_start); compareResults(x1, x1_outputFromGpu, x2, x2_outputFromGpu); return 0; }
int compare2Results(const void* expected, size_t expectedLen, const void* calculated, size_t calculatedLen, DumpFormatType format, char* comment ) { C4Err err = kC4Err_NoErr; if(calculatedLen != expectedLen) { OPTESTLogError( "\n\t\tFAILED %s \n",comment ); OPTESTLogError( "\t\texpected %d bytes , calculated %d bytes \n", expectedLen, calculatedLen); err = kC4Err_SelfTestFailed; } else err = compareResults(expected,calculated , expectedLen, format, comment ); return err; }
int main(void) { double t_start, t_end; int i; DATA_TYPE* A, *A_2; DATA_TYPE* C4, *C4_2; DATA_TYPE* sum, *sum_2; A = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); C4 = (DATA_TYPE*)malloc(NP * NP * sizeof(DATA_TYPE)); sum = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); A_2 = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); C4_2 = (DATA_TYPE*)malloc(NP * NP * sizeof(DATA_TYPE)); sum_2 = (DATA_TYPE*)malloc(NR * NQ * NP * sizeof(DATA_TYPE)); init_array(A, C4); init_array(A_2, C4_2); read_cl_file(); cl_initialization(); cl_mem_init(A, C4, sum); cl_load_prog(); t_start = rtclock(); int r; for (r = 0; r < NR; r++) { cl_launch_kernel1(r); cl_launch_kernel2(r); } t_end = rtclock(); errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, NR * NQ * NP * sizeof(DATA_TYPE), sum, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); t_start = rtclock(); doitgen(sum_2, A_2, C4_2); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(sum, sum_2); cl_clean_up(); return 0; }
int main(void) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* B; DATA_TYPE* x; DATA_TYPE* y; DATA_TYPE* y_outputFromGpu; DATA_TYPE* tmp; A = (DATA_TYPE*)malloc(N*N*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(N*N*sizeof(DATA_TYPE)); x = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE)); y = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE)); y_outputFromGpu = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE)); tmp = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE)); init(A, x); read_cl_file(); cl_initialization(); cl_mem_init(A, B, x, y, tmp); cl_load_prog(); cl_launch_kernel(); errcode = clEnqueueReadBuffer(clCommandQue, y_mem_obj, CL_TRUE, 0, N*sizeof(DATA_TYPE), y_outputFromGpu, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); t_start = rtclock(); gesummv(A, B, x, y, tmp); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(y, y_outputFromGpu); cl_clean_up(); free(A); free(B); free(x); free(y); free(y_outputFromGpu); free(tmp); return 0; }
int main(int argc, char** argv) { double t_start, t_end; /* Array declaration. */ DATA_TYPE A[N][N]; DATA_TYPE x[N]; DATA_TYPE u1[N]; DATA_TYPE u2[N]; DATA_TYPE v2[N]; DATA_TYPE v1[N]; DATA_TYPE w[N]; DATA_TYPE wi[N]; DATA_TYPE y[N]; DATA_TYPE z[N]; /* Initialize array. */ init(A, x, u1, u2, v2, v1, w, wi, y, z); #pragma hmpp conv allocate #pragma hmpp conv advancedload, args[A;x;u1;u2;v2;v1;w;y;z] t_start = rtclock(); #pragma hmpp conv callsite, args[A;x;u1;u2;v2;v1;w;y;z].advancedload=true, asynchronous loop(A, x, u1, u2, v2, v1, wi, y, z); #pragma hmpp conv synchronize t_end = rtclock();//); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp conv delegatedstore, args[w] #pragma hmpp conv release t_start = rtclock(); loop(A, x, u1, u2, v2, v1, w, y, z); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(w, wi); return 0; }
int main(int argc, char** argv) { double t_start, t_end; /* Array declaration */ DATA_TYPE A[NX][NY]; DATA_TYPE p[NY]; DATA_TYPE q[NX]; DATA_TYPE q_outputFromGpu[NX]; DATA_TYPE r[NX]; DATA_TYPE s[NY]; DATA_TYPE s_outputFromGpu[NY]; /* Initialize array. */ init_array(A, p, r); #pragma hmpp bicg allocate #pragma hmpp bicg advancedload, args[a;p;q;r;s] t_start = rtclock(); #pragma hmpp bicg callsite, args[a;p;q;r;s].advancedload=true, asynchronous runBicg(A, p, q_outputFromGpu, r, s_outputFromGpu); #pragma hmpp bicg synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp bicg delegatedstore, args[a;p;q;r;s] #pragma hmpp bicg release t_start = rtclock(); runBicg(A, p, q, r, s); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(s, s_outputFromGpu, q, q_outputFromGpu); return 0; }
float checkMultithreaded (string const & myResult, string const & learning) { ofstream cmpLog("compare_log.txt", ofstream::trunc); if (!cmpLog.is_open()) { cerr << "Cannot create compare_log file.\n"; return 0; } map<int, vector<int> > myOdds = readOutput(myResult); map<int, vector<int> > trueOdds = readOutput(learning); int currentSetN = StartSet; int n_wrong = 0; float mark = 0; Stitcher stitcher; while (currentSetN < FinishSet) { vector<int> v1 = getNextSet(currentSetN, myOdds); vector<int> v2 = getNextSet(currentSetN, trueOdds); float tmpMark = compareResults(v1, v2); if (tmpMark != 1.0) { cmpLog << currentSetN << "\nmy odds:"; printIntVector(cmpLog, v1); cmpLog << "true odds:"; printIntVector(cmpLog, v2); int tmp; printMat_(cmpLog, stitcher.calcSetStitch2(currentSetN, &tmp)); //Stitcher::printStitchMatrix(cmpLog, stitcher.calcSetStitch2(currentSetN)); n_wrong += 1; } mark += tmpMark; currentSetN += 1; } cmpLog << n_wrong << " classified wrong."; cmpLog.close(); return (currentSetN-StartSet>0) ? mark/(currentSetN-StartSet) : mark; }