int main(){ // generate an appropriate test input printf("Lets find some good floats to run our tests against!\n"); fertile_valley fv = FarmEscapers(100000, 500, 150000); printf("Gee golly, %p is a good place to sow wild floats. \n", &fv); // execute one recurrence per loop printf("Running loop with one iteration going on per loop.\n"); time_start(); SinglyExecuteRecurrence(&fv); time_end(); // execute two recurrences per loop printf("Running loop with two iterations going on per loop\n"); time_start(); DoubleExecuteRecurrence(&fv); time_end(); // execute three recurrences per loop printf("Running loop with three iterations going on per loop\n"); time_start(); TriplyExecuteRecurrence(&fv); time_end(); // execute three recurrences per loop printf("Running loop with four iteration going on per loop\n"); time_start(); TriplyExecuteRecurrence(&fv); time_end(); return 0; }
void bw(uint64_t size) { uint8_t *H, *D; int i; printf("%"PRIu64" ", size); cudaInit(); printf("0 "); // reg func H = (uint8_t*)malloc(sizeof(uint8_t)*size); time_begin(); cudaMalloc((void**)&D, sizeof(uint8_t)*size); printf("%u ", time_end()); for(i=0; i<size; i++) { H[i]=i%255; } time_begin(); cudaMemcpy(D, H, size*sizeof(uint8_t), cudaMemcpyHostToDevice); printf("%u ", time_end()); printf("0 "); // exec kernel for(i=0; i<size; i++) { H[i]=0; } time_begin(); cudaMemcpy(H, D, size*sizeof(uint8_t), cudaMemcpyDeviceToHost); printf("%u ", time_end()); for(i=0; i<size; i++) { if(H[i]!=i%255) printf("error %d\n", i); } free(H); time_begin(); cudaFree(D); printf("%u ", time_end()); cudaFini(); printf("\n"); }
void testCombinaisonEstNombreArmstrong_Executer371() { puts("Debut testCombinaisonEstNombreArmstrong_Executer371"); CachePuissance10 *cachePuissance10; cachePuissance10 = InitCachePuissance10(19); CachePuissanceDigit *cachePuissanceDigit; cachePuissanceDigit = InitCachePuissanceDigit(1, 19); char combinaison[3] = {1, 7, 3}; char combinaison2[19] = {3,2,8,9,5,8,2,9,8,4,4,4,3,1,8,7,0,3,2}; char combinaison3[2] = {3,2}; TacheCombinaisonEstNombreArmstrong *tache1, *tache2, *tache3; tache1 = TacheCombinaisonEstNombreArmstrong_Init(3, 300, 1000, cachePuissanceDigit, cachePuissance10); tache2 = TacheCombinaisonEstNombreArmstrong_Init(19, 1, 4000000000000000000LL, cachePuissanceDigit, cachePuissance10); tache3 = TacheCombinaisonEstNombreArmstrong_Init(2, 31, 34, cachePuissanceDigit, cachePuissance10); long long test_time; // Test1 TacheCombinaisonEstNombreArmstrong_SetCombinaison(tache1, combinaison); test_time = time_start(); TacheCombinaisonEstNombreArmstrong_Executer(tache1); printf("T: 371 : %lld\n", time_end(test_time)); assert(tache1 -> resultat == 371); // Test2 TacheCombinaisonEstNombreArmstrong_SetCombinaison(tache2, combinaison2); test_time = time_start(); TacheCombinaisonEstNombreArmstrong_Executer(tache2); //printf("D: tache2 -> result %lld\n", tache2 -> resultat); printf("T: 3289582984443187032 : %lld\n", time_end(test_time)); assert(tache2 -> resultat == 3289582984443187032LLU); // Test3 TacheCombinaisonEstNombreArmstrong_SetCombinaison(tache3, combinaison3); test_time = time_start(); TacheCombinaisonEstNombreArmstrong_Executer(tache3); printf("T: 23 : %lld\n", time_end(test_time)); assert(tache3 -> resultat == -1); TacheCombinaisonEstNombreArmstrong_Detruire(tache1); TacheCombinaisonEstNombreArmstrong_Detruire(tache2); TacheCombinaisonEstNombreArmstrong_Detruire(tache3); }
//int main(int argc, char *argv[]) int main() { time_start(); { mthread_t thread1, thread2; void *retval1, *retval2; int err; printf("le main lance 2 threads...\n"); err = mthread_create(&thread1, threadfunc, "thread1"); assert(!err); err = mthread_create(&thread2, threadfunc, "thread2"); assert(!err); printf("le main a lancé les threads %p et %p\n", thread1, thread2); printf("le main attend les threads\n"); err = mthread_join(thread2, &retval2); assert(!err); err = mthread_join(thread1, &retval1); assert(!err); printf("les threads ont terminé en renvoyant '%s' and '%s'\n", (char *) retval1, (char *) retval2); } time_end() ; return 0; }
cudaError_t cudaSetupArgument( const void *arg, size_t size, size_t offset) { pfunc(); /* cudaKernelPara: uint32_t uint32_t uint32_t ============================================================================= | number of arg | arg1 size | arg1 data | arg2 size | arg2 data | ..... ============================================================================= */ // set data size memcpy(&cudaKernelPara[cudaParaSize], &size, sizeof(uint32_t)); ptrace("size= %u\n", *(uint32_t*)&cudaKernelPara[cudaParaSize]); cudaParaSize += sizeof(uint32_t); // set data memcpy(&cudaKernelPara[cudaParaSize], arg, size); ptrace("value= %llx\n", *(unsigned long long*)&cudaKernelPara[cudaParaSize]); cudaParaSize += size; (*((uint32_t*)cudaKernelPara))++; time_end(t_SetArg); return cudaSuccess; }
cudaError_t cudaConfigureCall( dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream) { pfunc(); time_begin(); ptrace("gridDim= %d %d %d\n", gridDim.x, gridDim.y, gridDim.z); ptrace("blockDim= %d %d %d\n", blockDim.x, blockDim.y, blockDim.z); ptrace("sharedMem= %lu\n", sharedMem); ptrace("stream= %p\n", (void*)stream); //ptrace("size= %lu\n", sizeof(cudaStream_t)); cudaKernelConf[0] = gridDim.x; cudaKernelConf[1] = gridDim.y; cudaKernelConf[2] = gridDim.z; cudaKernelConf[3] = blockDim.x; cudaKernelConf[4] = blockDim.y; cudaKernelConf[5] = blockDim.z; cudaKernelConf[6] = sharedMem; cudaKernelConf[7] = (stream==NULL)?(uint64_t)-1:(uint64_t)stream; memset(cudaKernelPara, 0, cudaKernelParaMaxSize); cudaParaSize = sizeof(uint32_t); time_end(t_ConfigCall); return cudaSuccess; }
void testPuissance10Clone() { puts("L: Debut du test testPuissance10Exposant5"); CachePuissance10 *cachePuissance10Exposant5, *cloneCache; cachePuissance10Exposant5 = InitCachePuissance10(5); long long test_time; test_time= time_start(); cloneCache = CloneCachePuissance10(cachePuissance10Exposant5); // Tests assert(cloneCache -> cache != cachePuissance10Exposant5 -> cache); assert(GetPuissance10(cloneCache,0) == 1); assert(GetPuissance10(cloneCache,1) == 10); assert(GetPuissance10(cloneCache,5) == 100000); DetruireCachePuissance10(cloneCache); DetruireCachePuissance10(cachePuissance10Exposant5); printf("T: %lld\n", time_end(test_time)); puts("L: Fin du test testPuissance10Exposant5"); puts(""); }
void testPuissanceDigitClone() { puts("L: Debut du test testPuissanceDigitClone"); CachePuissanceDigit *cachePuissanceDigitExposant5, *cloneCache; cachePuissanceDigitExposant5 = InitCachePuissanceDigit(0, 20); long long test_time; test_time= time_start(); cloneCache = CloneCachePuissanceDigit(cachePuissanceDigitExposant5); // Tests assert(cloneCache -> cache != cachePuissanceDigitExposant5 -> cache); assert(GetPuissanceDigit(cachePuissanceDigitExposant5, 4, 3) == 64); assert(GetPuissanceDigit(cachePuissanceDigitExposant5, 0, 3) == 0); assert(GetPuissanceDigit(cachePuissanceDigitExposant5, 0, 5) == 0); DetruireCachePuissanceDigit(cloneCache); DetruireCachePuissanceDigit(cachePuissanceDigitExposant5); printf("T: %lld\n", time_end(test_time)); puts("L: Fin du test testPuissanceDigitClone"); puts(""); }
/* Returns second per function call */ static double time_function(void (*f)(void *arg), void *arg) { unsigned ncalls; double elapsed; /* Warm up */ f(arg); for (ncalls = 10 ;;) { unsigned i; time_start(); for (i = 0; i < ncalls; i++) f(arg); elapsed = time_end(); if (elapsed > BENCH_INTERVAL) break; else if (elapsed < BENCH_INTERVAL / 10) ncalls *= 10; else ncalls *= 2; } return elapsed / ncalls; }
/*------------------------------------------------------------------*/ void AzRgforest::optimize_resetTarget() { clock_t b_time; time_begin(&b_time); int t_num = ens->size(); AzBytArr s("Calling optimizer with "); s.cn(t_num); s.c(" trees and "); s.cn(l_num); s.c(" leaves"); AzTimeLog::print(s, out); opt->update(data, ens, &v_p); resetTarget(); int tx; for (tx = 0; tx < t_num; ++tx) { ens->tree_u(tx)->removeSplitAssessment(); /* since weights changed */ } isOpt = true; time_end(b_time, &opt_time); }
/*------------------------------------------------------------------*/ bool AzRgforest::growForest() { clock_t b_time; time_begin(&b_time); /*--- find the best split ---*/ AzTrTsplit best_split; searchBestSplit(&best_split); if (shouldExit(&best_split)) { /* exit if no more split */ return true; /* exit */ } /*--- split the node ---*/ double w_inc; int leaf_nx[2] = {-1,-1}; const AzRgfTree *tree = splitNode(&best_split, &w_inc, leaf_nx); if (lmax_timer.reachedMax(l_num, "AzRgforest: #leaf", out)) { return true; /* #leaf reached max; exit */ } /*--- update target ---*/ updateTarget(tree, leaf_nx, w_inc); time_end(b_time, &search_time); return false; /* don't exit */ }
cudaError_t cudaMemcpy( void* dst, const void* src, size_t count, enum cudaMemcpyKind kind) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); ptrace("dst= %p , src= %p ,size= %lu\n", (void*)dst, (void*)src, count); if( kind == cudaMemcpyHostToDevice) { ptr( arg.pA, dst, 0); ptr( arg.pB, src, count); arg.flag = 1; } else if( kind == cudaMemcpyDeviceToHost ) { ptr( arg.pA, dst, count); ptr( arg.pB, src, 0); arg.flag = 2; } else if( kind == cudaMemcpyDeviceToDevice ) { ptr( arg.pA, dst, 0); ptr( arg.pB, src, count); arg.flag = 3; } else { error("Not impletment cudaMemcpyKind %d\n", kind); return cudaErrorInvalidValue; } send_cmd_to_device( VIRTQC_cudaMemcpy, &arg); if(kind==1){ time_end(t_MemcpyH2D); }else if(kind==2){ time_end(t_MemcpyD2H); } return (cudaError_t)arg.cmd; }
void testCombinaisonEstNombreArmstrong_Clone() { puts("Debut testCombinaisonEstNombreArmstrong_Clone"); char combinaison[3] = {1, 2, 3}; char combinaison2[3] = {1, 2, 4}; TacheCombinaisonEstNombreArmstrong *tache1, *tache2; CachePuissance10 * cachePuissance10; cachePuissance10 = InitCachePuissance10(5); CachePuissanceDigit *cachePuissanceDigit; cachePuissanceDigit = InitCachePuissanceDigit(1, 5); long long test_time_init; test_time_init = time_start(); tache1 = TacheCombinaisonEstNombreArmstrong_Init(3, 200, 400, cachePuissanceDigit, cachePuissance10); TacheCombinaisonEstNombreArmstrong_SetCombinaison(tache1, combinaison); printf("T: Init : %lld\n", time_end(test_time_init)); long long test_time_clone; test_time_clone= time_start(); tache2 = TacheCombinaisonEstNombreArmstrong_Clone(tache1); TacheCombinaisonEstNombreArmstrong_SetCombinaison(tache2, combinaison2); printf("T: Clone : %lld\n", time_end(test_time_clone)); assert((tache1 -> combinaison)[1] == 2); assert((tache1 -> combinaison)[2] == 3); assert((tache1 -> combinaison)[1] == 2); assert((tache2 -> combinaison)[2] == 4); TacheCombinaisonEstNombreArmstrong_Detruire(tache1); TacheCombinaisonEstNombreArmstrong_Detruire(tache2); puts("Fin testCombinaisonEstNombreArmstrong_Clone"); puts(""); }
int cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens) { struct cc **pp = tokens; int i, n; #ifdef STATS int nn, ii; #endif #ifdef STATS if (verbose >= 0) time_begin(); if (verbose > 0) printf("Sweep:"); #endif cc_sweep0(buffer, bufsize, tt.tt_token_min - 1); #ifdef STATS ntoken_stat = 0; nn = 0; ii = 0; #endif for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) { #ifdef STATS if (verbose > 0) { if (ii > 7) { printf("\n "); ii = 0; } ii++; printf(" (%d", i); (void) fflush(stdout); } #endif n = cc_sweep(buffer, bufsize, pp, i); pp += n; #ifdef STATS if (verbose > 0) { if (--n > 0) { printf(" %d", n); nn += n; } putchar(')'); } #endif } qinsertq(&cc_q1b, &cc_q1a); #ifdef STATS if (verbose > 0) printf("\n %d tokens, %d candidates\n", ntoken_stat, nn); if (verbose >= 0) time_end(); #endif return pp - tokens; }
void __cudaUnregisterFatBinary(void **fatCubinHandle) { pfunc(); time_begin(); ptrace("fatCubinHandle= %p, value= %p\n", fatCubinHandle, *fatCubinHandle); send_cmd_to_device( VIRTQC_cudaUnregisterFatBinary, NULL); free(fatCubinHandle); time_end(t_UnregFatbin); close_device(); }
void __cudaRegisterFunction( void **fatCubinHandle, const char *hostFun, char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize ) { VirtioQCArg arg; computeFatBinaryFormat_t fatBinHeader; pfunc(); time_begin(); ptrace("fatCubinHandle= %p, value= %p\n", fatCubinHandle, *fatCubinHandle); ptrace("hostFun= %s (%p)\n", hostFun, hostFun); ptrace("deviceFun= %s (%p)\n", deviceFun, deviceFun); ptrace("deviceName= %s\n", deviceName); ptrace("thread_limit= %d\n", thread_limit); if(tid) ptrace("tid= %u %u %u\n", tid->x, tid->y, tid->z); else ptrace("tid is NULL\n"); if(bid) ptrace("bid= %u %u %u\n", bid->x, bid->y, bid->z); else ptrace("bid is NULL\n"); if(bDim)ptrace("bDim= %u %u %u\n", bDim->x, bDim->y, bDim->z); else ptrace("bDim is NULL\n"); if(gDim)ptrace("gDim= %u %u %u\n", gDim->x, gDim->y, gDim->z); else ptrace("gDim is NULL\n"); if(wSize)ptrace("wSize= %d\n", *wSize); else ptrace("wSize is NULL\n"); memset(&arg, 0, sizeof(VirtioQCArg)); fatBinHeader = (computeFatBinaryFormat_t)(*fatCubinHandle); ptr( arg.pA , fatBinHeader, fatBinHeader->fatSize); ptr( arg.pB , deviceName , strlen(deviceName)+1 ); arg.flag = (uint32_t)(uint64_t)hostFun; ptrace("pA= %p, pASize= %u, pB= %p, pBSize= %u\n", (void*)arg.pA, arg.pASize, (void*)arg.pB, arg.pBSize); send_cmd_to_device( VIRTQC_cudaRegisterFunction, &arg); time_end(t_RegFunc); }
cudaError_t cudaDeviceReset(void) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); send_cmd_to_device( VIRTQC_cudaDeviceReset, &arg); time_end(t_DevReset); return (cudaError_t)arg.cmd; }
cudaError_t cudaGetLastError(void) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); send_cmd_to_device( VIRTQC_cudaGetLastError, &arg); time_end(t_GetLastError); return (cudaError_t)arg.cmd; }
cudaError_t cudaSetDevice(int device) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); ptr( arg.pA, device, 0); send_cmd_to_device( VIRTQC_cudaSetDevice, &arg); time_end(t_SetDev); return (cudaError_t)arg.cmd; }
cudaError_t cudaGetDeviceCount(int *count) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); send_cmd_to_device( VIRTQC_cudaGetDeviceCount, &arg); *count = (int)arg.pA; time_end(t_GetDevCount); return (cudaError_t)arg.cmd; }
cudaError_t cudaEventDestroy(cudaEvent_t event) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); ptr( arg.pA, event, 0); send_cmd_to_device( VIRTQC_cudaEventDestroy, &arg); time_end(t_EventDestroy); return (cudaError_t)arg.cmd; }
cudaError_t cudaRuntimeGetVersion(int *runtimeVersion) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); send_cmd_to_device( VIRTQC_cudaRuntimeGetVersion, &arg); *runtimeVersion = (uint64_t)arg.pA; time_end(t_RuntimeGetVersion); return (cudaError_t)arg.cmd; }
cudaError_t cudaDriverGetVersion(int *driverVersion) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); send_cmd_to_device( VIRTQC_cudaDriverGetVersion, &arg); *driverVersion = (int)arg.pA; time_end(t_DriverGetVersion); return (cudaError_t)arg.cmd; }
void cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag) { struct cc **pp; #ifdef STATS int i = 0; int nt = 0, cc = 0, nc = 0; #endif #ifdef STATS if (verbose >= 0) time_begin(); if (verbose > 0) printf("Compress:"); #endif pp = tokens; while (pp < tokens + ntoken) { #ifdef STATS if (verbose > 0) { ntoken_stat = 0; ccount_stat = 0; ncover_stat = 0; if (i > 2) { printf("\n "); i = 0; } i++; printf(" (%d", (*pp)->length); (void) fflush(stdout); } #endif pp += cc_compress(output, pp, flag); #ifdef STATS if (verbose > 0) { printf(" %dt %du %dc)", ntoken_stat, ccount_stat, ncover_stat); nt += ntoken_stat; cc += ccount_stat; nc += ncover_stat; } #endif } #ifdef STATS if (verbose > 0) printf("\n total: (%dt %du %dc)\n", nt, cc, nc); if (verbose >= 0) time_end(); #endif }
cudaError_t cudaEventCreate(cudaEvent_t *event) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); send_cmd_to_device( VIRTQC_cudaEventCreate, &arg); *event = (void*)arg.pA; time_end(t_EventCreate); return (cudaError_t)arg.cmd; }
cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); ptr( arg.pA, prop, sizeof(struct cudaDeviceProp)); ptr( arg.pB, device, 0); send_cmd_to_device( VIRTQC_cudaGetDeviceProperties, &arg); time_end(t_GetDevProp); return (cudaError_t)arg.cmd; }
cudaError_t cudaFree(void* devPtr) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); ptr( arg.pA, devPtr, 0); send_cmd_to_device( VIRTQC_cudaFree, &arg); ptrace("devPtr= %p\n", (void*)arg.pA); time_end(t_Free); return (cudaError_t)arg.cmd; }
// Fonctions int main(int argc, char* argv[]) { unsigned int n = 10, i; struct indices indice; // Arg Filter if (argc > 1) n = atoi(argv[1]); if (argc > 2) affiche_tableau = 1; if (argc > 3) affiche_arg = 1; if (n<1) { printf("Veuillez entrer un entier positif.\n"); return 0; } // on crée le tableau tableau = malloc (n*sizeof(unsigned long int)); printf("\tn = %u\n", n); if (affiche_tableau) printf("tab :"); // on l'initialise for ( i=0 ; i<n ; i++) { tableau[i] = i; if (affiche_tableau) printf(" %lu", tableau[i]); } if (affiche_tableau) printf("\n"); indice.a = 0; indice.b = n-1; time_start(); { tab_sum((void*) &indice); if (affiche_arg) printf("\n"); printf("\tLa somme des éléments du tableau est égale à : %lu\n", tableau[0]); } time_end(); return EXIT_SUCCESS; }
cudaError_t cudaMalloc(void** devPtr, size_t size) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); ptr( arg.pA, 0, 0); arg.flag = size; send_cmd_to_device( VIRTQC_cudaMalloc, &arg); *devPtr = (void*)arg.pA; ptrace("devPtr= %p\n", (void*)arg.pA); time_end(t_Malloc); return (cudaError_t)arg.cmd; }
cudaError_t cudaEventElapsedTime(float *ms, cudaEvent_t start, cudaEvent_t end) { VirtioQCArg arg; pfunc(); time_begin(); memset(&arg, 0, sizeof(VirtioQCArg)); ptr( arg.pA, start, 0); ptr( arg.pB, end, 0); send_cmd_to_device( VIRTQC_cudaEventElapsedTime, &arg); memcpy(ms, &arg.flag, sizeof(float)); time_end(t_EventElapsedTime); return (cudaError_t)arg.cmd; }