int ensure_dev_mounted(const char * devPath,const char * mountedPoint){ int ret; if(devPath == NULL || mountedPoint == NULL){ return -1; } mkdir(mountedPoint, 0755); //in case it doesn't already exist startTiming(); ret = mount(devPath, mountedPoint, "vfat", MS_NOATIME | MS_NODEV | MS_NODIRATIME, ""); endTimming(); if(ret == 0){ LOGD("mount %s with fs 'vfat' success\n", devPath); return 0; }else{ startTiming(); ret = mount(devPath, mountedPoint, "ntfs", MS_NOATIME | MS_NODEV | MS_NODIRATIME, ""); endTimming(); if(ret == 0){ LOGD("mount %s with fs 'ntfs' success\n", devPath); return 0; }else{ startTiming(); ret = mount(devPath, mountedPoint, "ext4", MS_NOATIME | MS_NODEV | MS_NODIRATIME, ""); endTimming(); if(ret == 0){ LOGD("mount %s with fs 'ext4' success\n", devPath); return 0; } } LOGD("failed to mount %s (%s)\n", devPath, strerror(errno)); return -1; } }
void runTest(StateMachine *machine, int isTableMachine) { FILE *file; int buffSize, len; char *buff; double rate; Timer t; MachineStats stats; int is_heavy, last_idx_in_root; double uncommonRate; stats.totalFailures = 0; stats.totalGotos = 0; file = fopen(TEST_INPUT, "rb"); if (!file) { fprintf(stderr, "Error opening file for reading\n"); exit(1); } fseek(file, 0L, SEEK_END); buffSize = ftell(file); fseek(file, 0L, SEEK_SET); buff = (char*)malloc(sizeof(char) * buffSize); if (buff == NULL) { fprintf(stderr, "Error allocating memory for buffer\n"); exit(1); } len = fread(buff, sizeof(char), buffSize, file); if (len != buffSize) { fprintf(stderr, "Error reading data from file\n"); exit(1); } t.micros = 0; if (isTableMachine) { startTiming(&t); matchTableMachine((TableStateMachine*)machine, NULL, FALSE, buff, buffSize, 1, NULL, NULL, NULL, NULL, &is_heavy, &last_idx_in_root, &uncommonRate); endTiming(&t); } else { startTiming(&t); match(machine, buff, buffSize, 0, &stats, 0, 0); endTiming(&t); } rate = GET_TRANSFER_RATE(buffSize, &t); printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n"); printf("%8ld\t%9d\t%9d\t%5.4f\t%5.4f\n", t.micros, buffSize, buffSize, rate, rate); free(buff); fclose(file); }
void render() { // Render frame if (prepared) { startTiming(); if (animating) { if (animStart > 0.0f) { animStart -= 0.15f * (1.0f / frameTimer); } if ((animate) & (animStart <= 0.0f)) { timer += 0.5f * (1.0f / frameTimer); if (timer > 1.0) { timer -= 1.0f; } } updateUniformBuffers(); } draw(); endTiming(); } }
bool Bilateral::runHalideCPU(Image input, Image output, const Params& params) { #if ENABLE_HALIDE // Create halide buffers buffer_t inputBuffer = createHalideBuffer(input); buffer_t outputBuffer = createHalideBuffer(output); reportStatus("Running Halide CPU filter"); // Warm-up run halide_bilateral_cpu(&inputBuffer, &outputBuffer); // Timed runs startTiming(); for (int i = 0; i < params.iterations; i++) { halide_bilateral_cpu(&inputBuffer, &outputBuffer); } stopTiming(); halide_release(NULL); return outputResults(input, output, params); #else reportStatus("Halide not enabled during build."); return false; #endif }
/*======================================================================== initReadYUV DEFINITION ======================================================================*/ void initReadYUV(int id, int xSize, int ySize) { int fsize; if((ptfile[id] = fopen(path[id], "rb")) == NULL ) { fprintf(stderr,"ERROR: Task read cannot open yuv_file '%s'\n", path[id]); system("PAUSE"); return; } #ifdef VERBOSE printf("Opened file '%s'\n", PATH); #endif // Obtain file size: fseek (ptfile[id] , 0 , SEEK_END); fsize = ftell (ptfile[id]); rewind (ptfile[id]); if(fsize < NB_FRAME*(xSize*ySize + xSize*ySize/2)) { fprintf(stderr,"ERROR: Task read yuv_file incorrect size"); //system("PAUSE"); //return; } #ifdef VERBOSE printf("Correct size for yuv_file '%s'\n", PATH); #endif // Set initial clock startTiming(0); }
double RFWTimer::start() { double current = getTime(); base_time = 0.0; startTiming(); running = true; return current; }
bool Sobel::runHalideGPU(Image input, Image output, const Params& params) { #if ENABLE_HALIDE // Create halide buffers buffer_t inputBuffer = createHalideBuffer(input); buffer_t outputBuffer = createHalideBuffer(output); reportStatus("Running Halide GPU filter"); // Warm-up run inputBuffer.host_dirty = true; halide_sobel_gpu(&inputBuffer, &outputBuffer); halide_dev_sync(NULL); // Timed runs startTiming(); for (int i = 0; i < params.iterations; i++) { halide_sobel_gpu(&inputBuffer, &outputBuffer); } halide_dev_sync(NULL); stopTiming(); halide_copy_to_host(NULL, &outputBuffer); halide_release(NULL); return outputResults(input, output, params); #else reportStatus("Halide not enabled during build."); return false; #endif }
void run() { while (!_done) { if (_process) { startTiming(_viewer, otherThreadTimeName); //------------------------------------------------------------ // Your processing goes here. // Do nothing for the specified number of milliseconds, just so we can // see it in the stats. osg::Timer_t startTick = osg::Timer::instance()->tick(); while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < _timeToRun) { OpenThreads::Thread::YieldCurrentThread(); } //------------------------------------------------------------ endTiming(_viewer, otherThreadTimeName); _process = false; } else { OpenThreads::Thread::microSleep(50); } } }
bool Sobel::runOpenCL(Image input, Image output, const Params& params) { if (!initCL(params, sobel_kernel, "-cl-fast-relaxed-math")) { return false; } cl_int err; cl_kernel kernel; cl_mem d_input, d_output; cl_image_format format = {CL_RGBA, CL_UNORM_INT8}; kernel = clCreateKernel(m_program, "sobel", &err); CHECK_ERROR_OCL(err, "creating kernel", return false); d_input = clCreateImage2D( m_context, CL_MEM_READ_ONLY, &format, input.width, input.height, 0, NULL, &err); CHECK_ERROR_OCL(err, "creating input image", return false); d_output = clCreateImage2D( m_context, CL_MEM_WRITE_ONLY, &format, input.width, input.height, 0, NULL, &err); CHECK_ERROR_OCL(err, "creating output image", return false); size_t origin[3] = {0, 0, 0}; size_t region[3] = {input.width, input.height, 1}; err = clEnqueueWriteImage( m_queue, d_input, CL_TRUE, origin, region, 0, 0, input.data, 0, NULL, NULL); CHECK_ERROR_OCL(err, "writing image data", return false); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_input); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_output); CHECK_ERROR_OCL(err, "setting kernel arguments", return false); reportStatus("Running OpenCL kernel"); const size_t global[2] = {output.width, output.height}; const size_t *local = NULL; if (params.wgsize[0] && params.wgsize[1]) { local = params.wgsize; } // Timed runs for (int i = 0; i < params.iterations + 1; i++) { err = clEnqueueNDRangeKernel( m_queue, kernel, 2, NULL, global, local, 0, NULL, NULL); CHECK_ERROR_OCL(err, "enqueuing kernel", return false); // Start timing after warm-up run if (i == 0) { err = clFinish(m_queue); CHECK_ERROR_OCL(err, "running kernel", return false); startTiming(); } }
double RFWTimer::resume() { if (running) return getTime(); else { running = true; startTiming(); return base_time; } }
void FunctionBot::runArm() { int state; int pnulvl; float lim; state=getState(); pnulvl=pnuState(state); if(state==5||state==6) { lim=MAX_POS_0/300.0f; } else lim=MAX_POS_1/300.0f; if(pnulvl>lpnulvl) { if(!inTransit) { updatePNU(pnulvl); inTransit=true; startTiming(1); } else if(timeExpired()) { inTransit=false; lpnulvl=pnulvl; if(desiredAngle()<lim) { updateElbow(desiredAngle()); } else updateElbow(lim); } else { if(desiredAngle()<MAX_POS_0/300.0f) updateElbow(desiredAngle()); else updateElbow(MAX_POS_0/300.0f); } } else { if(currentAngle()>lim) { updateElbow(lim); } else { updatePNU(pnulvl); lpnulvl=pnulvl; if(desiredAngle()<lim) updateElbow(desiredAngle()); else updateElbow(lim); } } updateClaw(); }
void minorCheneyCopyGC (GC_state s) { size_t bytesAllocated; size_t bytesCopied; struct rusage ru_start; if (DEBUG_GENERATIONAL) fprintf (stderr, "minorGC nursery = "FMTPTR" frontier = "FMTPTR"\n", (uintptr_t)s->heap.nursery, (uintptr_t)s->frontier); assert (invariantForGC (s)); bytesAllocated = s->frontier - s->heap.nursery; if (bytesAllocated == 0) return; s->cumulativeStatistics.bytesAllocated += bytesAllocated; if (not s->canMinor) { s->heap.oldGenSize += bytesAllocated; bytesCopied = 0; } else { if (detailedGCTime (s)) startTiming (&ru_start); s->cumulativeStatistics.numMinorGCs++; s->forwardState.amInMinorGC = TRUE; if (DEBUG_GENERATIONAL or s->controls.messages) { fprintf (stderr, "[GC: Starting minor Cheney-copy;]\n"); fprintf (stderr, "[GC:\tfrom nursery at "FMTPTR" of size %s bytes.]\n", (uintptr_t)(s->heap.nursery), uintmaxToCommaString(bytesAllocated)); } s->forwardState.toStart = s->heap.start + s->heap.oldGenSize; assert (isFrontierAligned (s, s->forwardState.toStart)); s->forwardState.toLimit = s->forwardState.toStart + bytesAllocated; assert (invariantForGC (s)); s->forwardState.back = s->forwardState.toStart; /* Forward all globals. Would like to avoid doing this once all * the globals have been assigned. */ foreachGlobalObjptr (s, forwardObjptrIfInNursery); forwardInterGenerationalObjptrs (s); foreachObjptrInRange (s, s->forwardState.toStart, &s->forwardState.back, forwardObjptrIfInNursery, TRUE); updateWeaksForCheneyCopy (s); bytesCopied = s->forwardState.back - s->forwardState.toStart; s->cumulativeStatistics.bytesCopiedMinor += bytesCopied; s->heap.oldGenSize += bytesCopied; s->lastMajorStatistics.numMinorGCs++; if (detailedGCTime (s)) stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcMinor); if (DEBUG_GENERATIONAL or s->controls.messages) fprintf (stderr, "[GC: Finished minor Cheney-copy; copied %s bytes.]\n", uintmaxToCommaString(bytesCopied)); } }
void majorCheneyCopyGC (GC_state s) { size_t bytesCopied; struct rusage ru_start; pointer toStart; assert (s->secondaryHeap.size >= s->heap.oldGenSize); if (detailedGCTime (s)) startTiming (&ru_start); s->cumulativeStatistics.numCopyingGCs++; s->forwardState.amInMinorGC = FALSE; if (DEBUG or s->controls.messages) { fprintf (stderr, "[GC: Starting major Cheney-copy;]\n"); fprintf (stderr, "[GC:\tfrom heap at "FMTPTR" of size %s bytes,]\n", (uintptr_t)(s->heap.start), uintmaxToCommaString(s->heap.size)); fprintf (stderr, "[GC:\tto heap at "FMTPTR" of size %s bytes.]\n", (uintptr_t)(s->secondaryHeap.start), uintmaxToCommaString(s->secondaryHeap.size)); } s->forwardState.toStart = s->secondaryHeap.start; s->forwardState.toLimit = s->secondaryHeap.start + s->secondaryHeap.size; assert (s->secondaryHeap.start != (pointer)NULL); /* The next assert ensures there is enough space for the copy to * succeed. It does not assert * (s->secondaryHeap.size >= s->heap.size) * because that is too strong. */ assert (s->secondaryHeap.size >= s->heap.oldGenSize); toStart = alignFrontier (s, s->secondaryHeap.start); s->forwardState.back = toStart; foreachGlobalObjptr (s, forwardObjptr); foreachObjptrInRange (s, toStart, &s->forwardState.back, forwardObjptr, TRUE); updateWeaksForCheneyCopy (s); s->secondaryHeap.oldGenSize = s->forwardState.back - s->secondaryHeap.start; bytesCopied = s->secondaryHeap.oldGenSize; s->cumulativeStatistics.bytesCopied += bytesCopied; swapHeapsForCheneyCopy (s); s->lastMajorStatistics.kind = GC_COPYING; if (detailedGCTime (s)) stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcCopying); if (DEBUG or s->controls.messages) fprintf (stderr, "[GC: Finished major Cheney-copy; copied %s bytes.]\n", uintmaxToCommaString(bytesCopied)); }
/*======================================================================== readYUV DEFINITION ======================================================================*/ void readYUV(int id, int xSize, int ySize, unsigned char *y, unsigned char *u, unsigned char *v) { if( ftell(ptfile[id])/(xSize*ySize + xSize*ySize/2) >=NB_FRAME){ rewind(ptfile[id]); } if(id == 1 && ftell(ptfile[id])%(FPS*(xSize*ySize + xSize*ySize/2)) == 0){ unsigned int time = 0; time = stopTiming(0); printf("\nMain: %d frames in %d us - %f fps\n", FPS ,time, ((float)FPS)/(float)time*1000000); startTiming(0); } fread(y, sizeof(char), xSize * ySize, ptfile[id]); fread(u, sizeof(char), xSize * ySize / 4, ptfile[id]); fread(v, sizeof(char), xSize * ySize / 4, ptfile[id]); }
/// Will just sleep for the given number of milliseconds in the same thread /// as the caller, recording the time taken in the viewer's stats. void doSomethingAndTimeIt(osgViewer::Viewer& viewer, const std::string& name, double milliseconds) { startTiming(viewer, name); //------------------------------------------------------------ // Your processing goes here. // Do nothing for the specified number of milliseconds, just so we can // see it in the stats. osg::Timer_t startTick = osg::Timer::instance()->tick(); while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < milliseconds) { OpenThreads::Thread::YieldCurrentThread(); } //------------------------------------------------------------ endTiming(viewer, name); }
void runBench(HASH_FUNCTION hash_fn, int numReplicas, int numNodes, int numKeys, int keySize) { char *hash = NULL; if(hash_fn == HASH_FUNCTION_MD5) hash = "MD5"; else if(hash_fn == HASH_FUNCTION_SHA1) hash = "SHA1"; printf("----------------------------------------------------\n"); printf("bench (%s): replicas = %d, nodes = %d, keys: %d, ring size: %d\n", hash, numReplicas, numNodes, numKeys, numReplicas * numNodes); printf("----------------------------------------------------\n"); hash_ring_t *ring = hash_ring_create(numReplicas, hash_fn); addNodes(ring, numNodes); uint8_t *keys = (uint8_t*)malloc(keySize * numKeys); generateKeys(keys, numKeys, keySize); printf("running...\r"); uint64_t min = 0; uint64_t max = 0; uint64_t total = 0; int times = 100; int x, y; for(y = 0; y < times; y++) { startTiming(); for(x = 0; x < numKeys; x++) { assert(hash_ring_find_node(ring, keys + (keySize * x), keySize) != NULL); } uint64_t result = endTiming(); if(result > max) max = result; if(min == 0 || result < min) min = result; total += result; } printf("stats: total = %.5fs, avg/lookup: %.5fus, min: %.5fus, max: %.5fus, ops/sec: %.0f\n", (double)total / 1000000000, (((double)(total / numKeys)) / 1000) / times, (double)min / numKeys / 1000, (double)max / numKeys / 1000, 1000000000 / ((double)(total / (numKeys * times)))); free(keys); hash_ring_free(ring); }
void render() { if (prepared) { startTiming(); if (animating) { // Update rotation state.rotation.y += 0.05f * frameTimer; if (state.rotation.y > 360.0f) { state.rotation.y -= 360.0f; } updateUniformBuffers(); } draw(); endTiming(); } }
//单击 ”定时“ 处理 void ToolGlobal::pbn_clock_clicked() { if (! clockState) //如果 clockState 是 关闭 状态 { pbn_clock_state->move(gbx_clock->x()+73, pbn_clock_state->y()); //设置 按钮位置 pbn_clock->move(gbx_clock->pos()); //........... pbn_clock_state ->setText(tr("OFF")); //设置 按钮文字 pbn_clock ->setText(tr("关闭")); //........... pbn_clock_state ->setToolTip(tr("关闭")); //设置 按钮提示 pbn_clock ->setToolTip(tr("关闭")); //........... clockState = true; //设置为 “开启” 状态 // timing = new Timing(parentWidget()); //创建 “定时器设置” 窗口 并显示 timing = new Timing(gbx_clock->x()-42, 113, 225, 135, parentWidget()); //创建 “定时器设置” 窗口 并显示 timing ->show(); //......................... connect(timing, SIGNAL(timing_pbnOk_click()), this, SLOT(startTiming())); //关联 定时器 “确定” 按钮 connect(timing, SIGNAL(timing_pbnCancle_click()), this, SLOT(stopTiming())); //关联 定时器 “取消” 按钮 } else //如果 clockState 是 开启 状态 { if (timing) //如果存在 “定时器设置” 窗口 { timing ->close(); //关闭 “定时器设置” 窗口 } if (lcdNumber) //如果存在 “LCD显示器” { delete lcdNumber; //销毁 “LCD显示器” lcdNumber = 0; //指针赋0值 timer_default ->stop(); //终止 “1秒倒计时" timer_target ->stop(); //终止 "自定义倒计时” } pbn_clock_state->move(gbx_clock->x()+13, pbn_clock_state->y()); //设置 按钮位置 pbn_clock->move(gbx_clock->x()+46, pbn_clock->y()); //........... pbn_clock_state ->setText(tr("ON")); //设置 按钮文字 pbn_clock ->setText(tr("定时")); //........... pbn_clock_state ->setToolTip(tr("开启魔音")); //设置 按钮提示 pbn_clock ->setToolTip(tr("开启魔音")); //........... clockState = false; //设置为 “关闭” 状态 } }
void EmuSampler::setMode(EmuMode mod, FlowID fid) /* Stop and start statistics for a given mode */ { //printf("Thread: %u, set to mode: %u\n", fid, mod); nSwitches->inc(); stop(); switch(mod) { case EmuRabbit: startRabbit(fid); break; case EmuWarmup: startWarmup(fid); break; case EmuDetail: startDetail(fid); break; case EmuTiming: startTiming(fid); break; default: I(0); } }
InitAndQuit(bool shadersOn) { if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER) != 0) { std::ostringstream error; error << "Failed on SDL_Init: " << SDL_GetError() << "\n"; throw std::runtime_error(error.str()); } graphics::setupGraphics(shadersOn); SDL_WM_SetCaption("Sousaphone Hero", "Sousaphone Hero"); if(Mix_OpenAudio(AUDIO_RATE, AUDIO_FORMAT, AUDIO_CHANNELS, AUDIO_BUFFERS) != 0) { SDL_Quit(); std::ostringstream error; error << "Failed on Mix_OpenAudio: " << Mix_GetError() << "\n"; throw std::runtime_error(error.str()); } std::srand(time(NULL)); Mix_ChannelFinished(doChangedNotes); // set callback function startTiming(); }
int main (int argc, char **argv) { int N; double * values; char type = argc > 2 ? argv[2][0] : 'A'; if (argc < 2) { fprintf (stderr, "Usage: csort SIZE [ TYPE [SEED]] \n"); exit (1); } N = atoi (argv[1]); srand (argc > 3 ? atol (argv[3]) : 42); if (type == 'A') values = randomVector(N, 0.0, 1.0); else values = randomVector(N, 1.0, 20.0); startTiming(); sort (values, N); stopTiming(); if (N < 20) printArr(values, N); return 0; }
void SamplerGPUSim::queue(uint32_t insn, uint64_t pc, uint64_t addr, uint32_t fid, char op) /* main qemu/gpu/tracer/... entry point {{{1 */ { if(!execute(fid,icount)) return; // QEMU can still send a few additional instructions (emul should stop soon) I(mode!=EmuInit); I(insn!=0); I(icount!=0); if (doPower){ uint64_t ti = 0; bool callpwr = callPowerModel(ti, fid); if (callpwr){ I(ti > 0); //printf("totalnInst:%ld, nPassedInst:%ld, interval:%ld\n", totalnInst, nPassedInst, interval); bool dummy = false; //std::cout<<"mode "<<mode<<" Timeinterval "<<ti<<" last time "<<lastTime<<"\n"; int simt = 0; if (ti > 0){ setMode(EmuTiming, fid); simt = BootLoader::pwrmodel.calcStats(ti, !(mode == EmuTiming), static_cast<float>(freq), dummy, dummy, dummy, dummy); endSimSiged = (simt==90)?1:0; BootLoader::pwrmodel.sescThermWrapper->sesctherm.updateMetrics(); } } }// doPower if (nInstMax < totalnInst || endSimSiged) { markDone(); return; } if (nInstSkip>totalnInst) { I(mode==EmuRabbit); return; } I(nInstSkip<=totalnInst); if (mode == EmuRabbit) { stop(); startTiming(fid); } #if 0 static std::set<AddrType> seenPC; static Time_t seenPC_last = 0; static Time_t seenPC_active = 0; static Time_t seenPC_total = 0; seenPC_total++; if (seenPC.find(pc^insn) == seenPC.end()) { seenPC.insert(pc^insn); seenPC_last = seenPC_total; } if ((seenPC_last+1000) > seenPC_total) seenPC_active++; /* if ((seenPC_total & 1048575) == 1) MSG("%5.3f",(100.0*seenPC_active)/(1.0+seenPC_total)); */ #endif emul->queueInstruction(insn,pc,addr, (op&0x80) /* thumb */, fid); }
void performGC (GC_state s, size_t oldGenBytesRequested, size_t nurseryBytesRequested, bool forceMajor, bool mayResize) { uintmax_t gcTime; bool stackTopOk; size_t stackBytesRequested; struct rusage ru_start; size_t totalBytesRequested; enterGC (s); s->cumulativeStatistics.numGCs++; if (DEBUG or s->controls.messages) { size_t nurserySize = s->heap.size - ((size_t)(s->heap.nursery - s->heap.start)); size_t nurseryUsed = (size_t)(s->frontier - s->heap.nursery); fprintf (stderr, "[GC: Starting gc #%s; requesting %s nursery bytes and %s old-gen bytes,]\n", uintmaxToCommaString(s->cumulativeStatistics.numGCs), uintmaxToCommaString(nurseryBytesRequested), uintmaxToCommaString(oldGenBytesRequested)); fprintf (stderr, "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n", (uintptr_t)(s->heap.start), uintmaxToCommaString(s->heap.size), uintmaxToCommaString(s->heap.withMapsSize - s->heap.size)); fprintf (stderr, "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n", uintmaxToCommaString(s->heap.oldGenSize), 100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size))); fprintf (stderr, "[GC:\tand nursery of size %s bytes (%.1f%% of heap),]\n", uintmaxToCommaString(nurserySize), 100.0 * ((double)(nurserySize) / (double)(s->heap.size))); fprintf (stderr, "[GC:\tand nursery using %s bytes (%.1f%% of heap, %.1f%% of nursery).]\n", uintmaxToCommaString(nurseryUsed), 100.0 * ((double)(nurseryUsed) / (double)(s->heap.size)), 100.0 * ((double)(nurseryUsed) / (double)(nurserySize))); } assert (invariantForGC (s)); if (needGCTime (s)) startTiming (&ru_start); minorGC (s); stackTopOk = invariantForMutatorStack (s); stackBytesRequested = stackTopOk ? 0 : sizeofStackWithHeader (s, sizeofStackGrowReserved (s, getStackCurrent (s))); totalBytesRequested = oldGenBytesRequested + nurseryBytesRequested + stackBytesRequested; if (forceMajor or totalBytesRequested > s->heap.size - s->heap.oldGenSize) majorGC (s, totalBytesRequested, mayResize); setGCStateCurrentHeap (s, oldGenBytesRequested + stackBytesRequested, nurseryBytesRequested); assert (hasHeapBytesFree (s, oldGenBytesRequested + stackBytesRequested, nurseryBytesRequested)); unless (stackTopOk) growStackCurrent (s); setGCStateCurrentThreadAndStack (s); if (needGCTime (s)) { gcTime = stopTiming (&ru_start, &s->cumulativeStatistics.ru_gc); s->cumulativeStatistics.maxPauseTime = max (s->cumulativeStatistics.maxPauseTime, gcTime); } else gcTime = 0; /* Assign gcTime to quell gcc warning. */ if (DEBUG or s->controls.messages) { size_t nurserySize = s->heap.size - (size_t)(s->heap.nursery - s->heap.start); fprintf (stderr, "[GC: Finished gc #%s; time %s ms,]\n", uintmaxToCommaString(s->cumulativeStatistics.numGCs), uintmaxToCommaString(gcTime)); fprintf (stderr, "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n", (uintptr_t)(s->heap.start), uintmaxToCommaString(s->heap.size), uintmaxToCommaString(s->heap.withMapsSize - s->heap.size)); fprintf (stderr, "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n", uintmaxToCommaString(s->heap.oldGenSize), 100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size))); fprintf (stderr, "[GC:\tand nursery of size %s bytes (%.1f%% of heap).]\n", uintmaxToCommaString(nurserySize), 100.0 * ((double)(nurserySize) / (double)(s->heap.size))); } /* Send a GC signal. */ if (s->signalsInfo.gcSignalHandled and s->signalHandlerThread != BOGUS_OBJPTR) { if (DEBUG_SIGNALS) fprintf (stderr, "GC Signal pending.\n"); s->signalsInfo.gcSignalPending = TRUE; unless (s->signalsInfo.amInSignalHandler) s->signalsInfo.signalIsPending = TRUE; } if (DEBUG) displayGCState (s, stderr); assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested)); assert (invariantForGC (s)); leaveGC (s); }
void inspectDumpFile(const char *path, int repeat, StateMachine *machine, TableStateMachine *tableMachine, int isTableMachine, int verbose, int timing, int threads, int packets_to_steal, int dedicated_use_compressed, int work_group_size, int max_wgs, double *thresholds, int drop) { #else void inspectDumpFile(const char *path, int repeat, StateMachine *machine, int isTableMachine, int verbose, int timing, int threads) { #endif double /*rate,*/ combinedRate, threadRate;//, rateWithHeaders; Timer t; long size;//, sizeWithHeaders; int i, cpuid; #ifdef GLOBAL_TIMING GlobalTimerResult gtimer_result; int j; #ifdef PRINT_GLOBAL_TIMER_EVENTS GlobalTimerEvent **events; #endif #endif ScannerData *scanners; PacketReaderData reader; LinkedList *packet_queues; MulticoreManager manager; #ifdef COUNT_FAIL_PERCENT long totalFailures, totalGotos; #endif #ifdef PAPI if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) { fprintf(stderr, "Cannot init PAPI\n"); exit(1); } if (PAPI_thread_init((unsigned long (*)(void))pthread_self) != PAPI_OK) { fprintf(stderr, "Cannot init PAPI for threads\n"); exit(1); } #endif packet_queues = (LinkedList*)malloc(sizeof(LinkedList) * threads); scanners = (ScannerData*)malloc(sizeof(ScannerData) * threads); for (i = 0; i < threads; i++) { list_init(&packet_queues[i]); } packetreader_init(&reader, path, repeat, packet_queues, threads); for (i = 0; i < threads; i++) { #ifdef HYBRID_SCANNER scanner_init(&(scanners[i]), i, &manager, machine, tableMachine, isTableMachine, &packet_queues[i], verbose, drop); #else scanner_init(&(scanners[i]), i, &manager, machine, isTableMachine, &packet_queues[i], verbose); #endif } #ifdef HYBRID_SCANNER multicore_manager_init(&manager, scanners, threads, work_group_size, max_wgs, packets_to_steal, dedicated_use_compressed); multicore_manager_set_thresholds(&manager, thresholds); #else multicore_manager_init(&manager, scanners, threads, 1, threads, 0, 0); #endif packetreader_start(&reader); packetreader_join(&reader); #ifdef HYBRID_SCANNER multicore_manager_start(&manager); #endif #ifdef GLOBAL_TIMING #ifdef PRINT_GLOBAL_TIMER_EVENTS events = NULL; #endif global_timer_start(&(manager.gtimer)); #endif if (timing) { startTiming(&t); } for (i = 0; i < threads; i++) { // If CPUs are ordered [core0,core0,...,core0,core1,core1,...,core1,...] //cpuid = i; // If CPUs are ordered [core0,core1,...,coreN,core0,core1,...,coreN,...] cpuid = (i % 2 == 0) ? i / 2 : (threads + i) / 2; scanner_start_with_affinity(&(scanners[i]), cpuid); // If you use the next line, comment out the pthread_attr_destroy call in scanner_join!!! //scanner_start(&(scanners[i])); } for (i = 0; i < threads; i++) { scanner_join(&(scanners[i])); } // scanner_start(&(scanners[0])); // scanner_start(&(scanners[1])); // scanner_join(&(scanners[0])); // scanner_join(&(scanners[1])); if (timing) { endTiming(&t); } #ifdef GLOBAL_TIMING global_timer_end(&(manager.gtimer)); #endif #ifdef HYBRID_SCANNER multicore_manager_stop(&manager); multicore_manager_join(&manager); #endif #ifdef GLOBAL_TIMING global_timer_join(&(manager.gtimer)); global_timer_get_results(&(manager.gtimer), >imer_result); #endif if (timing) { //endTiming(&t); size = reader.size; //sizeWithHeaders = reader.sizeWithHeaders; //rate = GET_TRANSFER_RATE(size, &t); //rateWithHeaders = GET_TRANSFER_RATE(sizeWithHeaders, &t); // printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n"); //printf("%8ld\t%9ld\t%9ld\t%5.4f\t%5.4f\n", t.micros, size, sizeWithHeaders, rate, rateWithHeaders); printf("TOTAL_BYTES\tTotal data scanned: %ld bytes\n", size); //printf("TOTAL_TIME\tTotal time: %ld ms\n", t.micros); //printf("TOTAL_THRPT\tTotal throughput: %5.4f Mbps\n", rate); combinedRate = 0; printf("Alert mode timer: %ld us\n", manager.alert_mode_timer.micros); for (i = 0; i < threads; i++) { if (0 && manager.alert_mode_used) { threadRate = GET_TRANSFER_RATE(scanners[i].bytes_scanned_since_alert, &(manager.alert_mode_timer)); } else { threadRate = GET_SCANNER_TRANSFER_RATE(&(scanners[i])); } combinedRate += threadRate; printf("T_%2d_THRPT\t%5.4f\tMbps\t%lu\tB\t%lu\tB\t%ld\tus\n", i, threadRate, scanners[i].bytes_scanned, scanners[i].bytes_scanned_since_alert, scanners[i].timer.micros); } printf("COMB_THRPT\t%5.4f\tMbps\n", combinedRate); #ifdef GLOBAL_TIMING //printf("\nGlobal timing:\n"); /* printf("Time\t"); for (j = 0; j < manager.gtimer.intervals; j++) { printf("%6ld\t", gtimer_result.times[j]); } printf("\n"); */ for (i = 0; i < manager.gtimer.num_scanners; i++) { printf("T_%2d_GTIME\t", i); for (j = 0; j < manager.gtimer.intervals; j++) { printf("%5.3f\t", gtimer_result.results[gtimer_result.intervals * i + j]); } printf("\n"); } #ifdef PRINT_GLOBAL_TIMER_EVENTS j = global_timer_get_events(&(manager.gtimer), &events); if (j > 0) { printf("\nEvents:\n"); for (i = 0; i < j; i++) { printf("Event %d: %s [Time: %d, Source: %s]\n", i, events[i]->text, events[i]->interval, events[i]->source); } } #endif #endif } #ifdef COUNT_FAIL_PERCENT totalFailures = totalGotos = 0; for (i = 0; i < threads; i++) { totalFailures += scanners[i].stats.totalFailures; totalGotos += scanners[i].stats.totalGotos; } printf("Fail percent: %f\n", ((double)totalFailures) / (totalFailures + totalGotos)); printf("Total failures: %ld, Total gotos: %ld\n", totalFailures, totalGotos); #endif multicore_manager_destroy(&manager); #ifdef GLOBAL_TIMING global_timer_destroy(&(manager.gtimer)); global_timer_destroy_result(>imer_result); #endif free(scanners); for (i = 0; i < threads; i++) { //printf("Status of input-queue of thread %d: in=%d, out=%d\n", i, packet_queues[i].in, packet_queues[i].out); list_destroy(&(packet_queues[i]), 1); } free(packet_queues); }
// ---------------------------------------------------------------------------- // gpuNUFFT_gpu: NUFFT // // Inverse gpuNUFFT implementation - interpolation from uniform grid data onto // nonuniform k-space data based on optimized // gpuNUFFT kernel with minimal oversampling // ratio (see Beatty et al.) // // Basic steps: - apodization correction // - zero padding with osf // - FFT // - convolution and resampling // // parameters: // * data : output kspace data // * data_count : number of samples on trajectory // * n_coils : number of channels or coils // * crds : coordinates on trajectory, passed as SoA // * imdata : input image data // * imdata_count : number of image data points // * grid_width : size of grid // * kernel : precomputed convolution kernel as lookup table // * kernel_count : number of kernel lookup table entries // * sectors : mapping of data indices according to each sector // * sector_count : number of sectors // * sector_centers: coordinates (x,y,z) of sector centers // * sector_width : width of sector // * im_width : dimension of image // * osr : oversampling ratio // * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed // void gpuNUFFT::GpuNUFFTOperator::performForwardGpuNUFFT(gpuNUFFT::Array<DType2> imgData,gpuNUFFT::Array<CufftType>& kspaceData, GpuNUFFTOutput gpuNUFFTOut) { if (DEBUG) { std::cout << "performing forward gpuNUFFT!!!" << std::endl; std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl; std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl; } showMemoryInfo(); if (debugTiming) startTiming(); int data_count = (int)this->kSpaceTraj.count(); int n_coils = (int)kspaceData.dim.channels; IndType imdata_count = this->imgDims.count(); int sector_count = (int)this->gridSectorDims.count(); //cuda mem allocation DType2 *imdata_d; CufftType *data_d; if (DEBUG) printf("allocate and copy imdata of size %d...\n",imdata_count); allocateDeviceMem<DType2>(&imdata_d,imdata_count); if (DEBUG) printf("allocate and copy data of size %d...\n",data_count); allocateDeviceMem<CufftType>(&data_d,data_count); initDeviceMemory(n_coils); if (debugTiming) printf("Memory allocation: %.2f ms\n",stopTiming()); int err; //iterate over coils and compute result for (int coil_it = 0; coil_it < n_coils; coil_it++) { int data_coil_offset = coil_it * data_count; int im_coil_offset = coil_it * (int)imdata_count; if (this->applySensData()) // perform automatically "repeating" of input image in case // of existing sensitivity data copyToDevice(imgData.data,imdata_d,imdata_count); else copyToDevice(imgData.data + im_coil_offset,imdata_d,imdata_count); //reset temp arrays cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim); cudaMemset(data_d,0, sizeof(CufftType)*data_count); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError())); if (this->applySensData()) { copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count); performSensMul(imdata_d,sens_d,gi_host,false); } // apodization Correction if (n_coils > 1 && deapo_d != NULL) performForwardDeapodization(imdata_d,deapo_d,gi_host); else performForwardDeapodization(imdata_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError())); // resize by oversampling factor and zero pad performPadding(imdata_d,gdata_d,gi_host); if (debugTiming) startTiming(); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError())); // shift image to get correct zero frequency position performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError())); // eventually free imdata_d // Forward FFT to kspace domain if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_FORWARD) != CUFFT_SUCCESS) { fprintf(stderr,"cufft has failed with err %i \n",err); showMemoryInfo(true,stderr); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError())); performFFTShift(gdata_d,FORWARD,getGridDims(),gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) printf("FFT (incl. shift): %.2f ms\n",stopTiming()); if (debugTiming) startTiming(); // convolution and resampling to non-standard trajectory forwardConvolution(data_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) printf("Forward Convolution: %.2f ms\n",stopTiming()); performFFTScaling(data_d,gi_host->data_count,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError())); //write result in correct order back into output array writeOrderedGPU(data_sorted_d,data_indices_d,data_d,(int)this->kSpaceTraj.count()); copyFromDevice(data_sorted_d,kspaceData.data + data_coil_offset,data_count); }//iterate over coils freeTotalDeviceMemory(data_d,imdata_d,NULL); freeDeviceMemory(n_coils); if ((cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error in performForwardGpuNUFFT function: %s\n",cudaGetErrorString(cudaGetLastError())); free(gi_host); }
// ---------------------------------------------------------------------------- // performGpuNUFFTAdj: NUFFT^H // // GpuNUFFT implementation - interpolation from nonuniform k-space data onto // oversampled grid based on optimized gpuNUFFT kernel // with minimal oversampling ratio (see Beatty et al.) // // Basic steps: - density compensation // - convolution with interpolation function // - iFFT // - cropping due to oversampling ratio // - apodization correction // // parameters: // * data : input kspace data // * data_count : number of samples on trajectory // * n_coils : number of channels or coils // * crds : coordinate array on trajectory // * imdata : output image data // * imdata_count : number of image data points // * grid_width : size of grid // * kernel : precomputed convolution kernel as lookup table // * kernel_count : number of kernel lookup table entries // * sectors : mapping of start and end points of each sector // * sector_count : number of sectors // * sector_centers: coordinates (x,y,z) of sector centers // * sector_width : width of sector // * im_width : dimension of image // * osr : oversampling ratio // * do_comp : true, if density compensation has to be done // * density_comp : densiy compensation array // * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed // void gpuNUFFT::GpuNUFFTOperator::performGpuNUFFTAdj(gpuNUFFT::Array<DType2> kspaceData, gpuNUFFT::Array<CufftType>& imgData, GpuNUFFTOutput gpuNUFFTOut) { if (DEBUG) { std::cout << "performing gpuNUFFT adjoint!!!" << std::endl; std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl; std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl; std::cout << "apply density comp: " << this->applyDensComp() << std::endl; std::cout << "apply sens data: " << this->applySensData() << std::endl; } if (debugTiming) startTiming(); showMemoryInfo(); int data_count = (int)this->kSpaceTraj.count(); int n_coils = (int)kspaceData.dim.channels; IndType imdata_count = this->imgDims.count(); int sector_count = (int)this->gridSectorDims.count(); // select data ordered and leave it on gpu DType2* data_d; if (DEBUG) printf("allocate data of size %d...\n",data_count); allocateDeviceMem<DType2>(&data_d,data_count); CufftType *imdata_d, *imdata_sum_d = NULL; if (DEBUG) printf("allocate and copy imdata of size %d...\n",imdata_count); allocateDeviceMem<CufftType>(&imdata_d,imdata_count); if (this->applySensData()) { if (DEBUG) printf("allocate and copy temp imdata of size %d...\n",imdata_count); allocateDeviceMem<CufftType>(&imdata_sum_d,imdata_count); cudaMemset(imdata_sum_d,0,imdata_count*sizeof(CufftType)); } initDeviceMemory(n_coils); int err; if (debugTiming) printf("Memory allocation: %.2f ms\n",stopTiming()); //iterate over coils and compute result for (int coil_it = 0; coil_it < n_coils; coil_it++) { int data_coil_offset = coil_it * data_count; int im_coil_offset = coil_it * (int)imdata_count;//gi_host->width_dim; cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim); //copy coil data to device and select ordered copyToDevice(kspaceData.data + data_coil_offset,data_d,data_count); selectOrderedGPU(data_d,data_indices_d,data_sorted_d,data_count); if (this->applyDensComp()) performDensityCompensation(data_sorted_d,density_comp_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) startTiming(); adjConvolution(data_sorted_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host); if (debugTiming) printf("Adjoint convolution: %.2f ms\n",stopTiming()); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error at adj thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError())); if (gpuNUFFTOut == CONVOLUTION) { if (DEBUG) printf("stopping output after CONVOLUTION step\n"); //get output copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim); if (DEBUG) printf("test value at point zero: %f\n",(imgData.data)[0].x); free(gi_host); freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL); freeDeviceMemory(n_coils); return; } if ((cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error at adj thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) startTiming(); performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host); //Inverse FFT if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_INVERSE) != CUFFT_SUCCESS) { fprintf(stderr,"cufft has failed at adj with err %i \n",err); showMemoryInfo(true,stderr); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error at adj thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError())); if (gpuNUFFTOut == FFT) { if (DEBUG) printf("stopping output after FFT step\n"); //get output copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim); free(gi_host); freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL); freeDeviceMemory(n_coils); printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError())); return; } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError())); performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host); if (debugTiming) printf("iFFT (incl. shift) : %.2f ms\n",stopTiming()); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError())); performCrop(gdata_d,imdata_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError())); //check if precomputed deapo function can be used if (n_coils > 1 && deapo_d != NULL) performDeapodization(imdata_d,deapo_d,gi_host); else performDeapodization(imdata_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError())); performFFTScaling(imdata_d,gi_host->im_width_dim,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at adj thread synchronization 9: %s\n",cudaGetErrorString(cudaGetLastError())); if (this->applySensData()) { copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count); performSensMul(imdata_d,sens_d,gi_host,true); performSensSum(imdata_d,imdata_sum_d,gi_host); } else { // get result per coil // no summation is performed in absence of sensitity data copyFromDevice<CufftType>(imdata_d,imgData.data+im_coil_offset,imdata_count); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at adj thread synchronization 10: %s\n",cudaGetErrorString(cudaGetLastError())); }//iterate over coils if (this->applySensData()) { // get result of combined coils copyFromDevice<CufftType>(imdata_sum_d,imgData.data,imdata_count); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at adj thread synchronization 11: %s\n",cudaGetErrorString(cudaGetLastError())); freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL); freeDeviceMemory(n_coils); if ((cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error in gpuNUFFT_gpu_adj function: %s\n",cudaGetErrorString(cudaGetLastError())); free(gi_host); }
/* * Translate a method into native code. * * Registers are allocated per basic block, using an LRU algorithm. * Contents of registers are spilled at the end of basic block, * depending on the edges in the CFG leaving the basic block: * * - If there is an edge from the basic block to an exception handler, * local variables are spilled on the stack * * - If there is only one non-exception edge, and the target basic * block is following the current block immediately, no spills are done * * - Otherwise, the local variables and the operand stack are spilled * onto the stack */ jboolean translate(Method* xmeth, errorInfo* einfo) { #if defined(KAFFE_VMDEBUG) int i; #endif /* defined(KAFFE_VMDEBUG) */ jint low; jint high; jvalue tmpl; int idx; SlotInfo* tmp; SlotInfo* tmp2; SlotInfo* mtable; bytecode* base; uint32 len; callInfo cinfo; fieldInfo finfo; Hjava_lang_Class* crinfo; codeinfo* mycodeInfo; nativeCodeInfo ncode; int64 tms = 0; int64 tme; static bool reinvoke = false; jboolean success = true; lockClass(xmeth->class); if (METHOD_TRANSLATED(xmeth)) { goto done3; } /* If this code block is native, then just set it up and return */ if (methodIsNative(xmeth)) { void *func = native(xmeth, einfo); if (func != NULL) { engine_create_wrapper(xmeth, func); KAFFEJIT_TO_NATIVE(xmeth); } else { success = false; } goto done3; } /* Scan the code and determine the basic blocks */ success = analyzeMethod(xmeth, &mycodeInfo, einfo); if (success == false) { /* It may happen that we already have translated it * by implicit recursion in the verifier. */ if (METHOD_TRANSLATED(xmeth)) success = true; goto done3; } #if defined(KAFFE_FEEDBACK) if( kaffe_feedback_file ) lockMutex(kaffe_feedback_file); #endif /* Only one in the translator at once. Must check the translation * hasn't been done by someone else once we get it. */ enterTranslator(); startTiming(&fulljit, "JIT translation"); if (Kaffe_JavaVMArgs.enableVerboseJIT) { tms = currentTime(); } DBG(MOREJIT, dprintf("callinfo = %p\n", &cinfo); );
int main(int argc, char** argv) { cout << endl<< endl << "********************** program start************************ " << endl << endl; int nx = 256, ny = 256, nz = 256, nn = nx*ny*nz; FLOAT dx = Lx/(FLOAT)nx, dy = Ly/(FLOAT)ny, dz = Lz/(FLOAT)nz; FLOAT dt = 0.1*dx*dx/KAPPA; int step = 1000; double elaps=0.0; double getElapsedTime(); int thread_num=1; #ifdef _OPENMP #pragma omp parallel { thread_num = omp_get_num_threads(); if(omp_get_thread_num()==0)cout<<"\nUsed Number of Threads : "<< thread_num <<endl<<endl; } #endif // To avoid chaching effects for small message sizes // int fact = 1; for(;fact*nn*sizeof(FLOAT)<100e6;++fact); cout << "fact = " << fact << endl; // FLOAT* f = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN); //FLOAT* fn = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN); FLOAT* f = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN); FLOAT* fn = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN); initArray(f ,nx, ny, nz); initArray(fn,nx, ny, nz); long data = 0; FLOAT flops=0.0; startTiming(); for(int n = 0;n<step;++n){ // flops += diffusion_simd(nx, ny, nz, nn, dx, dy, dz, dt, f, fn); // flops += diffusion_peel(nx, ny, nz, nn, dx, dy, dz, dt, f, fn); flops += diffusion_tiled(nx, ny, nz, nn, dx, dy, dz, dt, f, fn); data+=nn*2*sizeof(FLOAT); swap(&f, &fn); elaps += dt; } endTiming(); cout<<"Buffer Size: " <<sizeof(FLOAT)*nn/(1000.0*1000.0) <<" [MB] Total Data: "<<data/(1000.0*1000.0*1000.0)<<" [GB]"<<endl; cout<<"Bandwidth: " <<data/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GB/s]"<<endl; cout<<"FLOPS : " <<flops/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GFLOPS]"<<endl; cout<<"Elapsed Time: " <<getElapsedTime()<<endl<<endl; error_func(nx, ny, nz, dx, f, elaps); // scalable_aligned_free(f ); // scalable_aligned_free(fn); _mm_free(f ); _mm_free(fn); return 0; }