int ensure_dev_mounted(const char * devPath,const char * mountedPoint){
    int ret;
	if(devPath == NULL || mountedPoint == NULL){
		return -1;
	}
    mkdir(mountedPoint, 0755);  //in case it doesn't already exist
    startTiming();
    ret = mount(devPath, mountedPoint, "vfat",
        MS_NOATIME | MS_NODEV | MS_NODIRATIME, "");
    endTimming();
    if(ret == 0){
        LOGD("mount %s with fs 'vfat' success\n", devPath);
        return 0;
    }else{
        startTiming();
        ret = mount(devPath, mountedPoint, "ntfs",
            MS_NOATIME | MS_NODEV | MS_NODIRATIME, "");
        endTimming();
        if(ret == 0){
            LOGD("mount %s with fs 'ntfs' success\n", devPath);
            return 0;
        }else{
            startTiming();
            ret = mount(devPath, mountedPoint, "ext4",
                MS_NOATIME | MS_NODEV | MS_NODIRATIME, "");
            endTimming();
            if(ret == 0){
                LOGD("mount %s with fs 'ext4' success\n", devPath);
                return 0;
            }
        }
        LOGD("failed to mount %s (%s)\n", devPath, strerror(errno));
        return -1;
    }
}
Exemplo n.º 2
0
void runTest(StateMachine *machine, int isTableMachine) {
	FILE *file;
	int buffSize, len;
	char *buff;
	double rate;
	Timer t;
	MachineStats stats;
	int is_heavy, last_idx_in_root;
	double uncommonRate;

	stats.totalFailures = 0;
	stats.totalGotos = 0;

	file = fopen(TEST_INPUT, "rb");
	if (!file) {
		fprintf(stderr, "Error opening file for reading\n");
		exit(1);
	}

	fseek(file, 0L, SEEK_END);
	buffSize = ftell(file);
	fseek(file, 0L, SEEK_SET);

	buff = (char*)malloc(sizeof(char) * buffSize);
	if (buff == NULL) {
		fprintf(stderr, "Error allocating memory for buffer\n");
		exit(1);
	}
	len = fread(buff, sizeof(char), buffSize, file);
	if (len != buffSize) {
		fprintf(stderr, "Error reading data from file\n");
		exit(1);
	}

	t.micros = 0;
	if (isTableMachine) {
		startTiming(&t);
		matchTableMachine((TableStateMachine*)machine, NULL, FALSE, buff, buffSize, 1, NULL, NULL, NULL, NULL, &is_heavy, &last_idx_in_root, &uncommonRate);
		endTiming(&t);
	} else {
		startTiming(&t);
		match(machine, buff, buffSize, 0, &stats, 0, 0);
		endTiming(&t);
	}
	rate = GET_TRANSFER_RATE(buffSize, &t);

	printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n");
	printf("%8ld\t%9d\t%9d\t%5.4f\t%5.4f\n", t.micros, buffSize, buffSize, rate, rate);

	free(buff);

	fclose(file);
}
Exemplo n.º 3
0
	void render()
	{
		// Render frame
		if (prepared)
		{
			startTiming();
			if (animating)
			{
				if (animStart > 0.0f)
				{
					animStart -= 0.15f * (1.0f / frameTimer);
				}
				if ((animate) & (animStart <= 0.0f))
				{
					timer += 0.5f * (1.0f / frameTimer);
					if (timer > 1.0)
					{
						timer -= 1.0f;
					}
				}
				updateUniformBuffers();
			}
			draw();
			endTiming();
		}
	}
Exemplo n.º 4
0
  bool Bilateral::runHalideCPU(Image input, Image output, const Params& params)
  {
#if ENABLE_HALIDE
    // Create halide buffers
    buffer_t inputBuffer = createHalideBuffer(input);
    buffer_t outputBuffer = createHalideBuffer(output);

    reportStatus("Running Halide CPU filter");

    // Warm-up run
    halide_bilateral_cpu(&inputBuffer, &outputBuffer);

    // Timed runs
    startTiming();
    for (int i = 0; i < params.iterations; i++)
    {
      halide_bilateral_cpu(&inputBuffer, &outputBuffer);
    }
    stopTiming();

    halide_release(NULL);

    return outputResults(input, output, params);
#else
    reportStatus("Halide not enabled during build.");
    return false;
#endif
  }
Exemplo n.º 5
0
/*========================================================================

   initReadYUV DEFINITION

   ======================================================================*/
void initReadYUV(int id, int xSize, int ySize) {
    int fsize;
    if((ptfile[id] = fopen(path[id], "rb")) == NULL )
    {
        fprintf(stderr,"ERROR: Task read cannot open yuv_file '%s'\n", path[id]);
        system("PAUSE");
        return;
    }

#ifdef VERBOSE
    printf("Opened file '%s'\n", PATH);
#endif

    // Obtain file size:
    fseek (ptfile[id] , 0 , SEEK_END);
    fsize = ftell (ptfile[id]);
    rewind (ptfile[id]);
    if(fsize < NB_FRAME*(xSize*ySize + xSize*ySize/2))
    {
        fprintf(stderr,"ERROR: Task read yuv_file incorrect size");
        //system("PAUSE");
        //return;
    }

#ifdef VERBOSE
    printf("Correct size for yuv_file '%s'\n", PATH);
#endif

    // Set initial clock
    startTiming(0);
}
Exemplo n.º 6
0
double RFWTimer::start() {
	double current = getTime();
	base_time = 0.0;
	startTiming();
	running = true;
	return current;
}
Exemplo n.º 7
0
bool Sobel::runHalideGPU(Image input, Image output, const Params& params)
{
#if ENABLE_HALIDE
    // Create halide buffers
    buffer_t inputBuffer = createHalideBuffer(input);
    buffer_t outputBuffer = createHalideBuffer(output);

    reportStatus("Running Halide GPU filter");

    // Warm-up run
    inputBuffer.host_dirty = true;
    halide_sobel_gpu(&inputBuffer, &outputBuffer);
    halide_dev_sync(NULL);

    // Timed runs
    startTiming();
    for (int i = 0; i < params.iterations; i++)
    {
        halide_sobel_gpu(&inputBuffer, &outputBuffer);
    }
    halide_dev_sync(NULL);
    stopTiming();

    halide_copy_to_host(NULL, &outputBuffer);
    halide_release(NULL);

    return outputResults(input, output, params);
#else
    reportStatus("Halide not enabled during build.");
    return false;
#endif
}
Exemplo n.º 8
0
    void run()
    {
        while (!_done)
        {
            if (_process)
            {
                startTiming(_viewer, otherThreadTimeName);

                //------------------------------------------------------------
                // Your processing goes here.

                // Do nothing for the specified number of  milliseconds, just so we can 
                // see it in the stats.
                osg::Timer_t startTick = osg::Timer::instance()->tick();
                while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < _timeToRun)
                {
                    OpenThreads::Thread::YieldCurrentThread();
                }
                //------------------------------------------------------------

                endTiming(_viewer, otherThreadTimeName);

                _process = false;
            }
            else
            {
                OpenThreads::Thread::microSleep(50);
            }
        }
    }
Exemplo n.º 9
0
bool Sobel::runOpenCL(Image input, Image output, const Params& params)
{
    if (!initCL(params, sobel_kernel, "-cl-fast-relaxed-math"))
    {
        return false;
    }

    cl_int err;
    cl_kernel kernel;
    cl_mem d_input, d_output;
    cl_image_format format = {CL_RGBA, CL_UNORM_INT8};

    kernel = clCreateKernel(m_program, "sobel", &err);
    CHECK_ERROR_OCL(err, "creating kernel", return false);

    d_input = clCreateImage2D(
                  m_context, CL_MEM_READ_ONLY, &format,
                  input.width, input.height, 0, NULL, &err);
    CHECK_ERROR_OCL(err, "creating input image", return false);

    d_output = clCreateImage2D(
                   m_context, CL_MEM_WRITE_ONLY, &format,
                   input.width, input.height, 0, NULL, &err);
    CHECK_ERROR_OCL(err, "creating output image", return false);

    size_t origin[3] = {0, 0, 0};
    size_t region[3] = {input.width, input.height, 1};
    err = clEnqueueWriteImage(
              m_queue, d_input, CL_TRUE,
              origin, region, 0, 0, input.data, 0, NULL, NULL);
    CHECK_ERROR_OCL(err, "writing image data", return false);

    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_input);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_output);
    CHECK_ERROR_OCL(err, "setting kernel arguments", return false);

    reportStatus("Running OpenCL kernel");

    const size_t global[2] = {output.width, output.height};
    const size_t *local = NULL;
    if (params.wgsize[0] && params.wgsize[1])
    {
        local = params.wgsize;
    }

    // Timed runs
    for (int i = 0; i < params.iterations + 1; i++)
    {
        err = clEnqueueNDRangeKernel(
                  m_queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
        CHECK_ERROR_OCL(err, "enqueuing kernel", return false);

        // Start timing after warm-up run
        if (i == 0)
        {
            err = clFinish(m_queue);
            CHECK_ERROR_OCL(err, "running kernel", return false);
            startTiming();
        }
    }
Exemplo n.º 10
0
double RFWTimer::resume() {
	if (running) return getTime();
	else {
		running = true;
		startTiming();
		return base_time;
	}
}
Exemplo n.º 11
0
	void FunctionBot::runArm()
	{
		int state;
		int pnulvl;
		float lim;
		state=getState();
		pnulvl=pnuState(state);
		if(state==5||state==6)
		{
			lim=MAX_POS_0/300.0f;
		}
		else
			lim=MAX_POS_1/300.0f;
		if(pnulvl>lpnulvl)
		{
			if(!inTransit)
			{
				updatePNU(pnulvl);
				inTransit=true;
				startTiming(1);
			}
			else if(timeExpired())
			{
				inTransit=false;
				lpnulvl=pnulvl;
				if(desiredAngle()<lim)
				{
					updateElbow(desiredAngle());
				}
				else
					updateElbow(lim);
			}
			else
			{
				if(desiredAngle()<MAX_POS_0/300.0f)
					updateElbow(desiredAngle());
				else
					updateElbow(MAX_POS_0/300.0f);
			}
		}
		else
		{
			if(currentAngle()>lim)
			{
				updateElbow(lim);
			}
			else
			{
				updatePNU(pnulvl);
				lpnulvl=pnulvl;
				if(desiredAngle()<lim)
					updateElbow(desiredAngle());
				else
					updateElbow(lim);
			}
		}
		updateClaw();
	}
Exemplo n.º 12
0
void minorCheneyCopyGC (GC_state s) {
  size_t bytesAllocated;
  size_t bytesCopied;
  struct rusage ru_start;

  if (DEBUG_GENERATIONAL)
    fprintf (stderr, "minorGC  nursery = "FMTPTR"  frontier = "FMTPTR"\n",
             (uintptr_t)s->heap.nursery, (uintptr_t)s->frontier);
  assert (invariantForGC (s));
  bytesAllocated = s->frontier - s->heap.nursery;
  if (bytesAllocated == 0)
    return;
  s->cumulativeStatistics.bytesAllocated += bytesAllocated;
  if (not s->canMinor) {
    s->heap.oldGenSize += bytesAllocated;
    bytesCopied = 0;
  } else {
    if (detailedGCTime (s))
      startTiming (&ru_start);
    s->cumulativeStatistics.numMinorGCs++;
    s->forwardState.amInMinorGC = TRUE;
    if (DEBUG_GENERATIONAL or s->controls.messages) {
      fprintf (stderr, 
               "[GC: Starting minor Cheney-copy;]\n");
      fprintf (stderr,
               "[GC:\tfrom nursery at "FMTPTR" of size %s bytes.]\n",
               (uintptr_t)(s->heap.nursery),
               uintmaxToCommaString(bytesAllocated));
    }
    s->forwardState.toStart = s->heap.start + s->heap.oldGenSize;
    assert (isFrontierAligned (s, s->forwardState.toStart));
    s->forwardState.toLimit = s->forwardState.toStart + bytesAllocated;
    assert (invariantForGC (s));
    s->forwardState.back = s->forwardState.toStart;
    /* Forward all globals.  Would like to avoid doing this once all
     * the globals have been assigned.
     */
    foreachGlobalObjptr (s, forwardObjptrIfInNursery);
    forwardInterGenerationalObjptrs (s);
    foreachObjptrInRange (s, s->forwardState.toStart, &s->forwardState.back, 
                          forwardObjptrIfInNursery, TRUE);
    updateWeaksForCheneyCopy (s);
    bytesCopied = s->forwardState.back - s->forwardState.toStart;
    s->cumulativeStatistics.bytesCopiedMinor += bytesCopied;
    s->heap.oldGenSize += bytesCopied;
    s->lastMajorStatistics.numMinorGCs++;
    if (detailedGCTime (s))
      stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcMinor);
    if (DEBUG_GENERATIONAL or s->controls.messages)
      fprintf (stderr, 
               "[GC: Finished minor Cheney-copy; copied %s bytes.]\n",
               uintmaxToCommaString(bytesCopied));
  }
}
Exemplo n.º 13
0
void majorCheneyCopyGC (GC_state s) {
  size_t bytesCopied;
  struct rusage ru_start;
  pointer toStart;

  assert (s->secondaryHeap.size >= s->heap.oldGenSize);
  if (detailedGCTime (s))
    startTiming (&ru_start);
  s->cumulativeStatistics.numCopyingGCs++;
  s->forwardState.amInMinorGC = FALSE;
  if (DEBUG or s->controls.messages) {
    fprintf (stderr, 
             "[GC: Starting major Cheney-copy;]\n");
    fprintf (stderr,
             "[GC:\tfrom heap at "FMTPTR" of size %s bytes,]\n",
             (uintptr_t)(s->heap.start), 
             uintmaxToCommaString(s->heap.size));
    fprintf (stderr, 
             "[GC:\tto heap at "FMTPTR" of size %s bytes.]\n",
             (uintptr_t)(s->secondaryHeap.start), 
             uintmaxToCommaString(s->secondaryHeap.size));
  }
  s->forwardState.toStart = s->secondaryHeap.start;
  s->forwardState.toLimit = s->secondaryHeap.start + s->secondaryHeap.size;
  assert (s->secondaryHeap.start != (pointer)NULL);
  /* The next assert ensures there is enough space for the copy to
   * succeed.  It does not assert 
   *   (s->secondaryHeap.size >= s->heap.size) 
   * because that is too strong.
   */
  assert (s->secondaryHeap.size >= s->heap.oldGenSize);
  toStart = alignFrontier (s, s->secondaryHeap.start);
  s->forwardState.back = toStart;
  foreachGlobalObjptr (s, forwardObjptr);
  foreachObjptrInRange (s, toStart, &s->forwardState.back, forwardObjptr, TRUE);
  updateWeaksForCheneyCopy (s);
  s->secondaryHeap.oldGenSize = s->forwardState.back - s->secondaryHeap.start;
  bytesCopied = s->secondaryHeap.oldGenSize;
  s->cumulativeStatistics.bytesCopied += bytesCopied;
  swapHeapsForCheneyCopy (s);
  s->lastMajorStatistics.kind = GC_COPYING;
  if (detailedGCTime (s))
    stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcCopying);
  if (DEBUG or s->controls.messages)
    fprintf (stderr, 
             "[GC: Finished major Cheney-copy; copied %s bytes.]\n",
             uintmaxToCommaString(bytesCopied));
}
Exemplo n.º 14
0
/*========================================================================

   readYUV DEFINITION

   ======================================================================*/
void readYUV(int id, int xSize, int ySize, unsigned char *y, unsigned char *u, unsigned char *v) {

    if( ftell(ptfile[id])/(xSize*ySize + xSize*ySize/2) >=NB_FRAME){
        rewind(ptfile[id]);
    }
	
	if(id == 1 && ftell(ptfile[id])%(FPS*(xSize*ySize + xSize*ySize/2)) == 0){
			unsigned int time = 0;
            time = stopTiming(0);
            printf("\nMain: %d frames in %d us - %f fps\n", FPS ,time, ((float)FPS)/(float)time*1000000);
            startTiming(0);
    }

    fread(y, sizeof(char), xSize * ySize, ptfile[id]);
    fread(u, sizeof(char), xSize * ySize / 4, ptfile[id]);
    fread(v, sizeof(char), xSize * ySize / 4, ptfile[id]);
}
Exemplo n.º 15
0
/// Will just sleep for the given number of milliseconds in the same thread 
/// as the caller, recording the time taken in the viewer's stats.
void doSomethingAndTimeIt(osgViewer::Viewer& viewer, const std::string& name, double milliseconds)
{
    startTiming(viewer, name);

    //------------------------------------------------------------
    // Your processing goes here.

    // Do nothing for the specified number of  milliseconds, just so we can 
    // see it in the stats.
    osg::Timer_t startTick = osg::Timer::instance()->tick();
    while (osg::Timer::instance()->delta_m(startTick, osg::Timer::instance()->tick()) < milliseconds)
    {
        OpenThreads::Thread::YieldCurrentThread();
    }
    //------------------------------------------------------------

    endTiming(viewer, name);
}
Exemplo n.º 16
0
void runBench(HASH_FUNCTION hash_fn, int numReplicas, int numNodes, int numKeys, int keySize) {
    char *hash = NULL;
    if(hash_fn == HASH_FUNCTION_MD5) hash = "MD5";
    else if(hash_fn == HASH_FUNCTION_SHA1) hash = "SHA1";
    
    printf("----------------------------------------------------\n");
    printf("bench (%s): replicas = %d, nodes = %d, keys: %d, ring size: %d\n", hash, numReplicas, numNodes, numKeys, numReplicas * numNodes);
    printf("----------------------------------------------------\n");
    hash_ring_t *ring = hash_ring_create(numReplicas, hash_fn);
    
    addNodes(ring, numNodes);
    
    uint8_t *keys = (uint8_t*)malloc(keySize * numKeys);
    generateKeys(keys, numKeys, keySize);
    
    printf("running...\r");
    
    uint64_t min = 0;
    uint64_t max = 0;
    uint64_t total = 0;
    int times = 100;
    
    int x, y;
    for(y = 0; y < times; y++) {
        startTiming();
        for(x = 0; x < numKeys; x++) {
            assert(hash_ring_find_node(ring, keys + (keySize * x), keySize) != NULL);
        }
        uint64_t result = endTiming();
        if(result > max) max = result;
        if(min == 0 || result < min) min = result;
        total += result;
    }
    
    printf("stats: total = %.5fs, avg/lookup: %.5fus, min: %.5fus, max: %.5fus, ops/sec: %.0f\n", 
        (double)total / 1000000000,
        (((double)(total / numKeys)) / 1000) / times,
        (double)min / numKeys / 1000,
        (double)max / numKeys / 1000,
        1000000000 / ((double)(total / (numKeys * times))));
    
    free(keys);
    hash_ring_free(ring);
}
Exemplo n.º 17
0
    void render()
    {
        if (prepared)
        {
            startTiming();
            if (animating)
            {
                // Update rotation
                state.rotation.y += 0.05f * frameTimer;
                if (state.rotation.y > 360.0f)
                {
                    state.rotation.y -= 360.0f;

                }
                updateUniformBuffers();
            }
            draw();
            endTiming();
        }
    }
Exemplo n.º 18
0
//单击 ”定时“ 处理
void ToolGlobal::pbn_clock_clicked()
{
    if (! clockState)       //如果 clockState 是 关闭 状态
    {
        pbn_clock_state->move(gbx_clock->x()+73, pbn_clock_state->y());                 //设置 按钮位置
        pbn_clock->move(gbx_clock->pos());                                              //...........
        pbn_clock_state ->setText(tr("OFF"));                                           //设置 按钮文字
        pbn_clock ->setText(tr("关闭"));                                                 //...........
        pbn_clock_state ->setToolTip(tr("关闭"));                                        //设置 按钮提示
        pbn_clock ->setToolTip(tr("关闭"));                                              //...........
        clockState = true;                                                              //设置为 “开启” 状态
//        timing = new Timing(parentWidget());                                          //创建 “定时器设置” 窗口 并显示
        timing = new Timing(gbx_clock->x()-42, 113, 225, 135, parentWidget());          //创建 “定时器设置” 窗口 并显示
        timing ->show();                                                                //.........................
        connect(timing, SIGNAL(timing_pbnOk_click()), this, SLOT(startTiming()));       //关联 定时器 “确定” 按钮
        connect(timing, SIGNAL(timing_pbnCancle_click()), this, SLOT(stopTiming()));    //关联 定时器 “取消” 按钮
    }
    else                                                //如果 clockState 是 开启 状态
    {
        if (timing)                                     //如果存在 “定时器设置” 窗口
        {
            timing ->close();                           //关闭 “定时器设置” 窗口
        }
        if (lcdNumber)                                  //如果存在 “LCD显示器”
        {
            delete lcdNumber;                           //销毁 “LCD显示器”
            lcdNumber = 0;                              //指针赋0值
            timer_default ->stop();                     //终止 “1秒倒计时"
            timer_target ->stop();                      //终止 "自定义倒计时”
        }
        pbn_clock_state->move(gbx_clock->x()+13, pbn_clock_state->y());     //设置 按钮位置
        pbn_clock->move(gbx_clock->x()+46, pbn_clock->y());                 //...........
        pbn_clock_state ->setText(tr("ON"));                                //设置 按钮文字
        pbn_clock ->setText(tr("定时"));                                     //...........
        pbn_clock_state ->setToolTip(tr("开启魔音"));                         //设置 按钮提示
        pbn_clock ->setToolTip(tr("开启魔音"));                               //...........
        clockState = false;                                                 //设置为 “关闭” 状态
    }
}
Exemplo n.º 19
0
void EmuSampler::setMode(EmuMode mod, FlowID fid)
  /* Stop and start statistics for a given mode  */
{
  //printf("Thread: %u, set to mode: %u\n", fid, mod);
  nSwitches->inc();
  stop();
  switch(mod) {
    case EmuRabbit:
      startRabbit(fid);
      break;
    case EmuWarmup:
      startWarmup(fid);
      break;
    case EmuDetail:
      startDetail(fid);
      break;
    case EmuTiming:
      startTiming(fid);
      break;
    default:
      I(0);
  }
}
Exemplo n.º 20
0
         InitAndQuit(bool shadersOn)
         {
            if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER) != 0)
            {
               std::ostringstream error;
               error << "Failed on SDL_Init: " << SDL_GetError() << "\n";
               throw std::runtime_error(error.str());
            }
            graphics::setupGraphics(shadersOn);
            SDL_WM_SetCaption("Sousaphone Hero", "Sousaphone Hero");
            if(Mix_OpenAudio(AUDIO_RATE, AUDIO_FORMAT, AUDIO_CHANNELS, AUDIO_BUFFERS) != 0)
            {
               SDL_Quit();
               std::ostringstream error;
               error << "Failed on Mix_OpenAudio: " << Mix_GetError() << "\n";
               throw std::runtime_error(error.str());
            }

            std::srand(time(NULL));

            Mix_ChannelFinished(doChangedNotes); // set callback function

            startTiming();
         }
Exemplo n.º 21
0
int main (int argc, char **argv) {
  int N;
  double * values;
  char type = argc > 2 ? argv[2][0] : 'A';
  if (argc < 2) {
    fprintf (stderr, "Usage: csort SIZE [ TYPE [SEED]] \n");
    exit (1);
  }
  N = atoi (argv[1]);
  srand (argc > 3 ? atol (argv[3]) : 42);

  if (type == 'A')
    values = randomVector(N, 0.0, 1.0);
  else 
    values = randomVector(N, 1.0, 20.0);

  startTiming();
  sort (values, N);
  stopTiming();

  if (N < 20)
    printArr(values, N);
  return 0;
}
Exemplo n.º 22
0
void SamplerGPUSim::queue(uint32_t insn, uint64_t pc, uint64_t addr, uint32_t fid, char op)
  /* main qemu/gpu/tracer/... entry point {{{1 */
{
  if(!execute(fid,icount))
    return; // QEMU can still send a few additional instructions (emul should stop soon)
  
  I(mode!=EmuInit);

  I(insn!=0);
  I(icount!=0);

  if (doPower){

    uint64_t ti = 0;
    bool callpwr = callPowerModel(ti, fid);

    if (callpwr){ 

      I(ti > 0);
      //printf("totalnInst:%ld, nPassedInst:%ld, interval:%ld\n", totalnInst, nPassedInst, interval);


      bool dummy = false;

      //std::cout<<"mode "<<mode<<" Timeinterval "<<ti<<" last time "<<lastTime<<"\n";  

      int simt = 0;
      if (ti > 0){
        setMode(EmuTiming, fid);
        simt =  BootLoader::pwrmodel.calcStats(ti, 
            !(mode == EmuTiming), static_cast<float>(freq), dummy, dummy, dummy, dummy); 

        endSimSiged = (simt==90)?1:0;
        BootLoader::pwrmodel.sescThermWrapper->sesctherm.updateMetrics();  
      }
    }
  }// doPower
  
  if (nInstMax < totalnInst || endSimSiged) {
    markDone();
    return;
  }
  if (nInstSkip>totalnInst) {
    I(mode==EmuRabbit);
    return;
  }
  I(nInstSkip<=totalnInst);
  if (mode == EmuRabbit) {
    stop();
    startTiming(fid);
  }

#if 0
  static std::set<AddrType> seenPC;
  static Time_t seenPC_last   = 0;
  static Time_t seenPC_active = 0;
  static Time_t seenPC_total  = 0;
  seenPC_total++;
  if (seenPC.find(pc^insn) == seenPC.end()) {
    seenPC.insert(pc^insn);
    seenPC_last = seenPC_total;
  }
  if ((seenPC_last+1000) > seenPC_total)
    seenPC_active++;
/*
  if ((seenPC_total & 1048575) == 1)
    MSG("%5.3f",(100.0*seenPC_active)/(1.0+seenPC_total)); */
#endif

  emul->queueInstruction(insn,pc,addr, (op&0x80) /* thumb */, fid);
}
Exemplo n.º 23
0
void performGC (GC_state s, 
                size_t oldGenBytesRequested,
                size_t nurseryBytesRequested, 
                bool forceMajor,
                bool mayResize) {
  uintmax_t gcTime;
  bool stackTopOk;
  size_t stackBytesRequested;
  struct rusage ru_start;
  size_t totalBytesRequested;

  enterGC (s);
  s->cumulativeStatistics.numGCs++;
  if (DEBUG or s->controls.messages) {
    size_t nurserySize = s->heap.size - ((size_t)(s->heap.nursery - s->heap.start));
    size_t nurseryUsed = (size_t)(s->frontier - s->heap.nursery);
    fprintf (stderr, 
             "[GC: Starting gc #%s; requesting %s nursery bytes and %s old-gen bytes,]\n",
             uintmaxToCommaString(s->cumulativeStatistics.numGCs),
             uintmaxToCommaString(nurseryBytesRequested),
             uintmaxToCommaString(oldGenBytesRequested));
    fprintf (stderr, 
             "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n",
             (uintptr_t)(s->heap.start),
             uintmaxToCommaString(s->heap.size),
             uintmaxToCommaString(s->heap.withMapsSize - s->heap.size));
    fprintf (stderr, 
             "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n",
             uintmaxToCommaString(s->heap.oldGenSize),
             100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size)));
    fprintf (stderr,
             "[GC:\tand nursery of size %s bytes (%.1f%% of heap),]\n",
             uintmaxToCommaString(nurserySize),
             100.0 * ((double)(nurserySize) / (double)(s->heap.size)));
    fprintf (stderr,
             "[GC:\tand nursery using %s bytes (%.1f%% of heap, %.1f%% of nursery).]\n",
             uintmaxToCommaString(nurseryUsed),
             100.0 * ((double)(nurseryUsed) / (double)(s->heap.size)),
             100.0 * ((double)(nurseryUsed) / (double)(nurserySize)));
  }
  assert (invariantForGC (s));
  if (needGCTime (s))
    startTiming (&ru_start);
  minorGC (s);
  stackTopOk = invariantForMutatorStack (s);
  stackBytesRequested = 
    stackTopOk 
    ? 0 
    : sizeofStackWithHeader (s, sizeofStackGrowReserved (s, getStackCurrent (s)));
  totalBytesRequested = 
    oldGenBytesRequested 
    + nurseryBytesRequested
    + stackBytesRequested;
  if (forceMajor 
      or totalBytesRequested > s->heap.size - s->heap.oldGenSize)
    majorGC (s, totalBytesRequested, mayResize);
  setGCStateCurrentHeap (s, oldGenBytesRequested + stackBytesRequested, 
                         nurseryBytesRequested);
  assert (hasHeapBytesFree (s, oldGenBytesRequested + stackBytesRequested,
                            nurseryBytesRequested));
  unless (stackTopOk)
    growStackCurrent (s);
  setGCStateCurrentThreadAndStack (s);
  if (needGCTime (s)) {
    gcTime = stopTiming (&ru_start, &s->cumulativeStatistics.ru_gc);
    s->cumulativeStatistics.maxPauseTime = 
      max (s->cumulativeStatistics.maxPauseTime, gcTime);
  } else
    gcTime = 0;  /* Assign gcTime to quell gcc warning. */
  if (DEBUG or s->controls.messages) {
    size_t nurserySize = s->heap.size - (size_t)(s->heap.nursery - s->heap.start);
    fprintf (stderr, 
             "[GC: Finished gc #%s; time %s ms,]\n",
             uintmaxToCommaString(s->cumulativeStatistics.numGCs),
             uintmaxToCommaString(gcTime));
    fprintf (stderr, 
             "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n",
             (uintptr_t)(s->heap.start),
             uintmaxToCommaString(s->heap.size),
             uintmaxToCommaString(s->heap.withMapsSize - s->heap.size));
    fprintf (stderr, 
             "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n",
             uintmaxToCommaString(s->heap.oldGenSize),
             100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size)));
    fprintf (stderr,
             "[GC:\tand nursery of size %s bytes (%.1f%% of heap).]\n",
             uintmaxToCommaString(nurserySize),
             100.0 * ((double)(nurserySize) / (double)(s->heap.size)));
  }
  /* Send a GC signal. */
  if (s->signalsInfo.gcSignalHandled
      and s->signalHandlerThread != BOGUS_OBJPTR) {
    if (DEBUG_SIGNALS)
      fprintf (stderr, "GC Signal pending.\n");
    s->signalsInfo.gcSignalPending = TRUE;
    unless (s->signalsInfo.amInSignalHandler) 
      s->signalsInfo.signalIsPending = TRUE;
  }
  if (DEBUG) 
    displayGCState (s, stderr);
  assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
  assert (invariantForGC (s));
  leaveGC (s);
}
Exemplo n.º 24
0
void inspectDumpFile(const char *path, int repeat, StateMachine *machine, TableStateMachine *tableMachine, int isTableMachine,
		int verbose, int timing, int threads, int packets_to_steal, int dedicated_use_compressed,
		int work_group_size, int max_wgs, double *thresholds, int drop) {
#else
void inspectDumpFile(const char *path, int repeat, StateMachine *machine, int isTableMachine, int verbose, int timing, int threads) {
#endif
	double /*rate,*/ combinedRate, threadRate;//, rateWithHeaders;
	Timer t;
	long size;//, sizeWithHeaders;
	int i, cpuid;
#ifdef GLOBAL_TIMING
	GlobalTimerResult gtimer_result;
	int j;
#ifdef PRINT_GLOBAL_TIMER_EVENTS
	GlobalTimerEvent **events;
#endif
#endif
	ScannerData *scanners;
	PacketReaderData reader;
	LinkedList *packet_queues;

	MulticoreManager manager;

#ifdef COUNT_FAIL_PERCENT
	long totalFailures, totalGotos;
#endif

#ifdef PAPI
	if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
		fprintf(stderr, "Cannot init PAPI\n");
		exit(1);
	}

	if (PAPI_thread_init((unsigned long (*)(void))pthread_self) != PAPI_OK) {
		fprintf(stderr, "Cannot init PAPI for threads\n");
		exit(1);
	}
#endif

	packet_queues = (LinkedList*)malloc(sizeof(LinkedList) * threads);
	scanners = (ScannerData*)malloc(sizeof(ScannerData) * threads);

	for (i = 0; i < threads; i++) {
		list_init(&packet_queues[i]);
	}

	packetreader_init(&reader, path, repeat, packet_queues, threads);
	for (i = 0; i < threads; i++) {
#ifdef HYBRID_SCANNER
		scanner_init(&(scanners[i]), i, &manager, machine, tableMachine, isTableMachine, &packet_queues[i], verbose, drop);
#else
		scanner_init(&(scanners[i]), i, &manager, machine, isTableMachine, &packet_queues[i], verbose);
#endif
	}

#ifdef HYBRID_SCANNER
	multicore_manager_init(&manager, scanners, threads, work_group_size, max_wgs, packets_to_steal, dedicated_use_compressed);
	multicore_manager_set_thresholds(&manager, thresholds);
#else
	multicore_manager_init(&manager, scanners, threads, 1, threads, 0, 0);
#endif
	packetreader_start(&reader);

	packetreader_join(&reader);

#ifdef HYBRID_SCANNER
	multicore_manager_start(&manager);
#endif
#ifdef GLOBAL_TIMING
#ifdef PRINT_GLOBAL_TIMER_EVENTS
	events = NULL;
#endif
	global_timer_start(&(manager.gtimer));
#endif
	if (timing) {
		startTiming(&t);
	}

	for (i = 0; i < threads; i++) {
		// If CPUs are ordered [core0,core0,...,core0,core1,core1,...,core1,...]
		//cpuid = i;
		// If CPUs are ordered [core0,core1,...,coreN,core0,core1,...,coreN,...]
		cpuid = (i % 2 == 0) ? i / 2 : (threads + i) / 2;
		scanner_start_with_affinity(&(scanners[i]), cpuid);

		// If you use the next line, comment out the pthread_attr_destroy call in scanner_join!!!
		//scanner_start(&(scanners[i]));
	}

	for (i = 0; i < threads; i++) {
		scanner_join(&(scanners[i]));
	}

//	scanner_start(&(scanners[0]));
//	scanner_start(&(scanners[1]));
//	scanner_join(&(scanners[0]));
//	scanner_join(&(scanners[1]));

	if (timing) {
		endTiming(&t);
	}

#ifdef GLOBAL_TIMING
	global_timer_end(&(manager.gtimer));
#endif

#ifdef HYBRID_SCANNER
	multicore_manager_stop(&manager);

	multicore_manager_join(&manager);
#endif

#ifdef GLOBAL_TIMING
	global_timer_join(&(manager.gtimer));
	global_timer_get_results(&(manager.gtimer), &gtimer_result);
#endif

	if (timing) {
		//endTiming(&t);
		size = reader.size;
		//sizeWithHeaders = reader.sizeWithHeaders;
		//rate = GET_TRANSFER_RATE(size, &t);
		//rateWithHeaders = GET_TRANSFER_RATE(sizeWithHeaders, &t);

//		printf("Time(micros)\tData(No H)\tData(w/ H)\tRate(No H) Mb/s\tRate (w/ H) Mb/s\n");
		//printf("%8ld\t%9ld\t%9ld\t%5.4f\t%5.4f\n", t.micros, size, sizeWithHeaders, rate, rateWithHeaders);

		printf("TOTAL_BYTES\tTotal data scanned: %ld bytes\n", size);
		//printf("TOTAL_TIME\tTotal time: %ld ms\n", t.micros);
		//printf("TOTAL_THRPT\tTotal throughput: %5.4f Mbps\n", rate);

		combinedRate = 0;
		printf("Alert mode timer: %ld us\n", manager.alert_mode_timer.micros);
		for (i = 0; i < threads; i++) {
			if (0 && manager.alert_mode_used) {
				threadRate = GET_TRANSFER_RATE(scanners[i].bytes_scanned_since_alert, &(manager.alert_mode_timer));
			} else {
				threadRate = GET_SCANNER_TRANSFER_RATE(&(scanners[i]));
			}
			combinedRate += threadRate;
			printf("T_%2d_THRPT\t%5.4f\tMbps\t%lu\tB\t%lu\tB\t%ld\tus\n", i, threadRate, scanners[i].bytes_scanned, scanners[i].bytes_scanned_since_alert, scanners[i].timer.micros);
		}
		printf("COMB_THRPT\t%5.4f\tMbps\n", combinedRate);

#ifdef GLOBAL_TIMING
		//printf("\nGlobal timing:\n");
/*
		printf("Time\t");
		for (j = 0; j < manager.gtimer.intervals; j++) {
			printf("%6ld\t", gtimer_result.times[j]);
		}
		printf("\n");
*/
		for (i = 0; i < manager.gtimer.num_scanners; i++) {
			printf("T_%2d_GTIME\t", i);
			for (j = 0; j < manager.gtimer.intervals; j++) {
				printf("%5.3f\t", gtimer_result.results[gtimer_result.intervals * i + j]);
			}
			printf("\n");
		}

#ifdef PRINT_GLOBAL_TIMER_EVENTS
		j = global_timer_get_events(&(manager.gtimer), &events);
		if (j > 0) {
			printf("\nEvents:\n");
			for (i = 0; i < j; i++) {
				printf("Event %d: %s [Time: %d, Source: %s]\n", i, events[i]->text, events[i]->interval, events[i]->source);
			}
		}
#endif
#endif
	}
#ifdef COUNT_FAIL_PERCENT
	totalFailures = totalGotos = 0;
        for (i = 0; i < threads; i++) {
                totalFailures += scanners[i].stats.totalFailures;
		totalGotos += scanners[i].stats.totalGotos;
        }

        printf("Fail percent: %f\n", ((double)totalFailures) / (totalFailures + totalGotos));
        printf("Total failures: %ld, Total gotos: %ld\n", totalFailures, totalGotos);
#endif

    multicore_manager_destroy(&manager);
#ifdef GLOBAL_TIMING
    global_timer_destroy(&(manager.gtimer));
    global_timer_destroy_result(&gtimer_result);
#endif

    free(scanners);
    for (i = 0; i < threads; i++) {
    	//printf("Status of input-queue of thread %d: in=%d, out=%d\n", i, packet_queues[i].in, packet_queues[i].out);
    	list_destroy(&(packet_queues[i]), 1);
    }
    free(packet_queues);
}
Exemplo n.º 25
0
// ----------------------------------------------------------------------------
// gpuNUFFT_gpu: NUFFT
//
// Inverse gpuNUFFT implementation - interpolation from uniform grid data onto 
//                                   nonuniform k-space data based on optimized 
//                                   gpuNUFFT kernel with minimal oversampling
//                                   ratio (see Beatty et al.)
//
// Basic steps: - apodization correction
//              - zero padding with osf
//              - FFT
//							- convolution and resampling
//
// parameters:
//	* data		     : output kspace data 
//  * data_count   : number of samples on trajectory
//  * n_coils      : number of channels or coils
//  * crds         : coordinates on trajectory, passed as SoA
//  * imdata       : input image data
//  * imdata_count : number of image data points
//  * grid_width   : size of grid 
//  * kernel       : precomputed convolution kernel as lookup table
//  * kernel_count : number of kernel lookup table entries
//  * sectors      : mapping of data indices according to each sector
//  * sector_count : number of sectors
//  * sector_centers: coordinates (x,y,z) of sector centers
//  * sector_width : width of sector 
//  * im_width     : dimension of image
//  * osr          : oversampling ratio
//  * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed
//  
void gpuNUFFT::GpuNUFFTOperator::performForwardGpuNUFFT(gpuNUFFT::Array<DType2> imgData,gpuNUFFT::Array<CufftType>& kspaceData, GpuNUFFTOutput gpuNUFFTOut)
{
  if (DEBUG)
  {
    std::cout << "performing forward gpuNUFFT!!!" << std::endl;
    std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl;
    std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl;
  }
  showMemoryInfo();
  
  if (debugTiming)
    startTiming();

  int			data_count          = (int)this->kSpaceTraj.count();
  int			n_coils             = (int)kspaceData.dim.channels;
  IndType	imdata_count        = this->imgDims.count();
  int			sector_count        = (int)this->gridSectorDims.count();

  //cuda mem allocation
  DType2 *imdata_d;
  CufftType *data_d;

  if (DEBUG)
    printf("allocate and copy imdata of size %d...\n",imdata_count);
  allocateDeviceMem<DType2>(&imdata_d,imdata_count);

  if (DEBUG)
    printf("allocate and copy data of size %d...\n",data_count);
  allocateDeviceMem<CufftType>(&data_d,data_count);

  initDeviceMemory(n_coils);
    
  if (debugTiming)
    printf("Memory allocation: %.2f ms\n",stopTiming());

  int err;

  //iterate over coils and compute result
  for (int coil_it = 0; coil_it < n_coils; coil_it++)
  {
    int data_coil_offset = coil_it * data_count;
    int im_coil_offset = coil_it * (int)imdata_count;

    if (this->applySensData())
      // perform automatically "repeating" of input image in case
      // of existing sensitivity data
      copyToDevice(imgData.data,imdata_d,imdata_count);
    else
      copyToDevice(imgData.data + im_coil_offset,imdata_d,imdata_count);

    //reset temp arrays
    cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim);
    cudaMemset(data_d,0, sizeof(CufftType)*data_count);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (this->applySensData())
    {
      copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count);
      performSensMul(imdata_d,sens_d,gi_host,false);
    }

    // apodization Correction
    if (n_coils > 1 && deapo_d != NULL)
      performForwardDeapodization(imdata_d,deapo_d,gi_host);
    else
      performForwardDeapodization(imdata_d,gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError()));
    // resize by oversampling factor and zero pad
    performPadding(imdata_d,gdata_d,gi_host);

    if (debugTiming)
      startTiming();

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError()));
    // shift image to get correct zero frequency position
    performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError()));
    // eventually free imdata_d
    // Forward FFT to kspace domain
    if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_FORWARD) != CUFFT_SUCCESS)
    {
      fprintf(stderr,"cufft has failed with err %i \n",err);
      showMemoryInfo(true,stderr);
    }

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError()));
    performFFTShift(gdata_d,FORWARD,getGridDims(),gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError()));

    if (debugTiming)
      printf("FFT (incl. shift): %.2f ms\n",stopTiming());

    if (debugTiming)
      startTiming();

    // convolution and resampling to non-standard trajectory
    forwardConvolution(data_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (debugTiming)
      printf("Forward Convolution: %.2f ms\n",stopTiming());

    performFFTScaling(data_d,gi_host->data_count,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error: at thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    //write result in correct order back into output array
    writeOrderedGPU(data_sorted_d,data_indices_d,data_d,(int)this->kSpaceTraj.count());
    copyFromDevice(data_sorted_d,kspaceData.data + data_coil_offset,data_count);
  }//iterate over coils
  
  freeTotalDeviceMemory(data_d,imdata_d,NULL);
  freeDeviceMemory(n_coils);

  if ((cudaThreadSynchronize() != cudaSuccess))
    fprintf(stderr,"error in performForwardGpuNUFFT function: %s\n",cudaGetErrorString(cudaGetLastError()));
  free(gi_host);
}
Exemplo n.º 26
0
// ----------------------------------------------------------------------------
// performGpuNUFFTAdj: NUFFT^H
//
// GpuNUFFT implementation - interpolation from nonuniform k-space data onto 
//                           oversampled grid based on optimized gpuNUFFT kernel
//                           with minimal oversampling ratio (see Beatty et al.)
//
// Basic steps: - density compensation
//              - convolution with interpolation function
//              - iFFT
//              - cropping due to oversampling ratio
//              - apodization correction
//
// parameters:
//	* data		     : input kspace data 
//  * data_count   : number of samples on trajectory
//  * n_coils      : number of channels or coils
//  * crds         : coordinate array on trajectory
//  * imdata       : output image data
//  * imdata_count : number of image data points
//  * grid_width   : size of grid 
//  * kernel       : precomputed convolution kernel as lookup table
//  * kernel_count : number of kernel lookup table entries
//  * sectors      : mapping of start and end points of each sector
//  * sector_count : number of sectors
//  * sector_centers: coordinates (x,y,z) of sector centers
//  * sector_width : width of sector 
//  * im_width     : dimension of image
//  * osr          : oversampling ratio
//  * do_comp      : true, if density compensation has to be done
//  * density_comp : densiy compensation array
//  * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed
//  
void gpuNUFFT::GpuNUFFTOperator::performGpuNUFFTAdj(gpuNUFFT::Array<DType2> kspaceData, gpuNUFFT::Array<CufftType>& imgData, GpuNUFFTOutput gpuNUFFTOut)
{
  if (DEBUG)
  {
    std::cout << "performing gpuNUFFT adjoint!!!" << std::endl;
    std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl;
    std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl;
    std::cout << "apply density comp: " << this->applyDensComp() << std::endl;
    std::cout << "apply sens data: " << this->applySensData() << std::endl;
  }
  if (debugTiming)
    startTiming();

  showMemoryInfo();

  int			data_count          = (int)this->kSpaceTraj.count();
  int			n_coils             = (int)kspaceData.dim.channels;
  IndType imdata_count        = this->imgDims.count();
  int			sector_count        = (int)this->gridSectorDims.count();

  // select data ordered and leave it on gpu
  DType2* data_d;

  if (DEBUG)
    printf("allocate data of size %d...\n",data_count);
  allocateDeviceMem<DType2>(&data_d,data_count);

  CufftType *imdata_d, *imdata_sum_d = NULL;

  if (DEBUG)
    printf("allocate and copy imdata of size %d...\n",imdata_count);
  allocateDeviceMem<CufftType>(&imdata_d,imdata_count);

  if (this->applySensData())
  {
    if (DEBUG)
      printf("allocate and copy temp imdata of size %d...\n",imdata_count);
    allocateDeviceMem<CufftType>(&imdata_sum_d,imdata_count);
    cudaMemset(imdata_sum_d,0,imdata_count*sizeof(CufftType));
  }

  initDeviceMemory(n_coils);
  int err;

  if (debugTiming)
    printf("Memory allocation: %.2f ms\n",stopTiming());

  //iterate over coils and compute result
  for (int coil_it = 0; coil_it < n_coils; coil_it++)
  {
    int data_coil_offset = coil_it * data_count;
    int im_coil_offset = coil_it * (int)imdata_count;//gi_host->width_dim;

    cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim);
    //copy coil data to device and select ordered
    copyToDevice(kspaceData.data + data_coil_offset,data_d,data_count);
    selectOrderedGPU(data_d,data_indices_d,data_sorted_d,data_count);

    if (this->applyDensComp())
      performDensityCompensation(data_sorted_d,density_comp_d,gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (debugTiming)
      startTiming();

    adjConvolution(data_sorted_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host);

    if (debugTiming)
      printf("Adjoint convolution: %.2f ms\n",stopTiming());

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      fprintf(stderr,"error at adj  thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError()));
    if (gpuNUFFTOut == CONVOLUTION)
    {
      if (DEBUG)
        printf("stopping output after CONVOLUTION step\n");
      //get output
      copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim);
      if (DEBUG)
        printf("test value at point zero: %f\n",(imgData.data)[0].x);

      free(gi_host);
      freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL);
      freeDeviceMemory(n_coils);
      return;
    }
    if ((cudaThreadSynchronize() != cudaSuccess))
      fprintf(stderr,"error at adj thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (debugTiming)
      startTiming();

    performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host);

    //Inverse FFT
    if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_INVERSE) != CUFFT_SUCCESS)
    {
      fprintf(stderr,"cufft has failed at adj with err %i \n",err);
      showMemoryInfo(true,stderr);
    }
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      fprintf(stderr,"error at adj thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError()));

    if (gpuNUFFTOut == FFT)
    {
      if (DEBUG)
        printf("stopping output after FFT step\n");
      //get output
      copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim);

      free(gi_host);
      
      freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL);
      freeDeviceMemory(n_coils);
      
      printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError()));
      return;
    }
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError()));
    performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host);
    
    if (debugTiming)
      printf("iFFT (incl. shift) : %.2f ms\n",stopTiming());

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError()));
    performCrop(gdata_d,imdata_d,gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError()));
    //check if precomputed deapo function can be used
    if (n_coils > 1 && deapo_d != NULL)
      performDeapodization(imdata_d,deapo_d,gi_host);
    else
      performDeapodization(imdata_d,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError()));

    performFFTScaling(imdata_d,gi_host->im_width_dim,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error: at adj  thread synchronization 9: %s\n",cudaGetErrorString(cudaGetLastError()));

    if (this->applySensData())
    {
      copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count);
      performSensMul(imdata_d,sens_d,gi_host,true);
      performSensSum(imdata_d,imdata_sum_d,gi_host);
    }
    else
    {
      // get result per coil
      // no summation is performed in absence of sensitity data
      copyFromDevice<CufftType>(imdata_d,imgData.data+im_coil_offset,imdata_count);
    }

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error: at adj  thread synchronization 10: %s\n",cudaGetErrorString(cudaGetLastError()));
  }//iterate over coils
  
  if (this->applySensData())
  {
    // get result of combined coils
    copyFromDevice<CufftType>(imdata_sum_d,imgData.data,imdata_count);
  }

  if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
    printf("error: at adj  thread synchronization 11: %s\n",cudaGetErrorString(cudaGetLastError()));
  
  freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL);
  freeDeviceMemory(n_coils);
  if ((cudaThreadSynchronize() != cudaSuccess))
    fprintf(stderr,"error in gpuNUFFT_gpu_adj function: %s\n",cudaGetErrorString(cudaGetLastError()));
  free(gi_host);
}
Exemplo n.º 27
0
/*
 * Translate a method into native code.
 *
 * Registers are allocated per basic block, using an LRU algorithm.
 * Contents of registers are spilled at the end of basic block,
 * depending on the edges in the CFG leaving the basic block:
 *
 * - If there is an edge from the basic block to an exception handler,
 *   local variables are spilled on the stack
 *
 * - If there is only one non-exception edge, and the target basic
 *   block is following the current block immediately, no spills are done
 *
 * - Otherwise, the local variables and the operand stack are spilled
 *   onto the stack
 */
jboolean
translate(Method* xmeth, errorInfo* einfo)
{
#if defined(KAFFE_VMDEBUG)
	int i;
#endif /* defined(KAFFE_VMDEBUG) */

	jint low;
	jint high;
	jvalue tmpl;
	int idx;
	SlotInfo* tmp;
	SlotInfo* tmp2;
	SlotInfo* mtable;

	bytecode* base;
	uint32 len;
	callInfo cinfo;
	fieldInfo finfo;
	Hjava_lang_Class* crinfo;
	codeinfo* mycodeInfo;

	nativeCodeInfo ncode;

	int64 tms = 0;
	int64 tme;

	static bool reinvoke = false;

	jboolean success = true;

	lockClass(xmeth->class);

	if (METHOD_TRANSLATED(xmeth)) {
		goto done3;
	}

	/* If this code block is native, then just set it up and return */
	if (methodIsNative(xmeth)) {
		void *func = native(xmeth, einfo);
		if (func != NULL) {
			engine_create_wrapper(xmeth, func);
			KAFFEJIT_TO_NATIVE(xmeth);
		} else {
			success = false;
		}
		goto done3;
	}

	/* Scan the code and determine the basic blocks */
	success = analyzeMethod(xmeth, &mycodeInfo, einfo);
	if (success == false) {
		/* It may happen that we already have translated it
		 * by implicit recursion in the verifier.
		 */
		if (METHOD_TRANSLATED(xmeth))
		  success = true;
		goto done3;
	}

#if defined(KAFFE_FEEDBACK)
	if( kaffe_feedback_file )
		lockMutex(kaffe_feedback_file);
#endif
	/* Only one in the translator at once. Must check the translation
	 * hasn't been done by someone else once we get it.
	 */
	enterTranslator();

	startTiming(&fulljit, "JIT translation");

	if (Kaffe_JavaVMArgs.enableVerboseJIT) {
		tms = currentTime();
	}

DBG(MOREJIT,
    dprintf("callinfo = %p\n", &cinfo);
    );
Exemplo n.º 28
0
int main(int argc, char** argv)
{
	cout << endl<< endl << "********************** program start************************ " << endl << endl;

	int    nx = 256,   ny = 256,   nz = 256, nn = nx*ny*nz;
    FLOAT  dx = Lx/(FLOAT)nx,  dy = Ly/(FLOAT)ny,   dz = Lz/(FLOAT)nz;
	FLOAT dt = 0.1*dx*dx/KAPPA;

	int step = 1000;
	double elaps=0.0;
	double getElapsedTime();

	int thread_num=1;

#ifdef _OPENMP
#pragma omp parallel
   {
	 thread_num = omp_get_num_threads();
     if(omp_get_thread_num()==0)cout<<"\nUsed Number of Threads : "<< thread_num <<endl<<endl;
   }
#endif


    // To avoid chaching effects for small message sizes //
    int fact = 1; for(;fact*nn*sizeof(FLOAT)<100e6;++fact);
	cout << "fact = " << fact << endl;

  //  FLOAT* f  = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN);
  //FLOAT* fn = (FLOAT *)scalable_aligned_malloc(sizeof(FLOAT) * nn, SIMDALIGN);

	FLOAT* f  = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN);
	FLOAT* fn = (FLOAT *)_mm_malloc(sizeof(FLOAT) * nn, SIMDALIGN);


    initArray(f ,nx, ny, nz);
    initArray(fn,nx, ny, nz);


    long data = 0;
	FLOAT flops=0.0;

    startTiming();
    for(int n = 0;n<step;++n){

//       flops += diffusion_simd(nx, ny, nz, nn, dx, dy, dz, dt, f, fn);
//       flops += diffusion_peel(nx, ny, nz, nn, dx, dy, dz, dt, f, fn);
       flops += diffusion_tiled(nx, ny, nz, nn, dx, dy, dz, dt, f, fn);

       data+=nn*2*sizeof(FLOAT);
	   swap(&f, &fn);
	   elaps += dt;

    }
    endTiming();


    cout<<"Buffer Size: " <<sizeof(FLOAT)*nn/(1000.0*1000.0) <<" [MB]  Total Data: "<<data/(1000.0*1000.0*1000.0)<<" [GB]"<<endl;
    cout<<"Bandwidth: " <<data/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GB/s]"<<endl;
    cout<<"FLOPS    : " <<flops/(1000.0*1000.0*1000.0*getElapsedTime())<<"[GFLOPS]"<<endl;
    cout<<"Elapsed Time: " <<getElapsedTime()<<endl<<endl;

	error_func(nx, ny, nz, dx, f, elaps);

//    scalable_aligned_free(f );
//    scalable_aligned_free(fn);

		_mm_free(f );
		_mm_free(fn);

	return 0;
}