コード例 #1
0
ファイル: TaskTimerTest.c プロジェクト: ZECTBynmo/MrsWatson
static int _testTaskTimerCallStopTwice(void) {
  TaskTimer t = newTaskTimer(1);

  startTimingTask(t, 0);
  _testSleep();
  stopTiming(t);
  stopTiming(t);
  assertIntEquals(t->currentTask, -1);
  // Recorded time should be at most 1ms off
  assertDoubleEquals(t->totalTaskTimes[0], SLEEP_DURATION_MS, MAX_TIMER_TOLERANCE_MS);

  freeTaskTimer(t);
  return 0;
}
コード例 #2
0
ファイル: Sobel.cpp プロジェクト: jrprice/improsa
bool Sobel::runHalideGPU(Image input, Image output, const Params& params)
{
#if ENABLE_HALIDE
    // Create halide buffers
    buffer_t inputBuffer = createHalideBuffer(input);
    buffer_t outputBuffer = createHalideBuffer(output);

    reportStatus("Running Halide GPU filter");

    // Warm-up run
    inputBuffer.host_dirty = true;
    halide_sobel_gpu(&inputBuffer, &outputBuffer);
    halide_dev_sync(NULL);

    // Timed runs
    startTiming();
    for (int i = 0; i < params.iterations; i++)
    {
        halide_sobel_gpu(&inputBuffer, &outputBuffer);
    }
    halide_dev_sync(NULL);
    stopTiming();

    halide_copy_to_host(NULL, &outputBuffer);
    halide_release(NULL);

    return outputResults(input, output, params);
#else
    reportStatus("Halide not enabled during build.");
    return false;
#endif
}
コード例 #3
0
ファイル: Bilateral.cpp プロジェクト: jrprice/improsa
  bool Bilateral::runHalideCPU(Image input, Image output, const Params& params)
  {
#if ENABLE_HALIDE
    // Create halide buffers
    buffer_t inputBuffer = createHalideBuffer(input);
    buffer_t outputBuffer = createHalideBuffer(output);

    reportStatus("Running Halide CPU filter");

    // Warm-up run
    halide_bilateral_cpu(&inputBuffer, &outputBuffer);

    // Timed runs
    startTiming();
    for (int i = 0; i < params.iterations; i++)
    {
      halide_bilateral_cpu(&inputBuffer, &outputBuffer);
    }
    stopTiming();

    halide_release(NULL);

    return outputResults(input, output, params);
#else
    reportStatus("Halide not enabled during build.");
    return false;
#endif
  }
コード例 #4
0
ファイル: cheney-copy.c プロジェクト: kayceesrk/mlton-zmq
void minorCheneyCopyGC (GC_state s) {
  size_t bytesAllocated;
  size_t bytesCopied;
  struct rusage ru_start;

  if (DEBUG_GENERATIONAL)
    fprintf (stderr, "minorGC  nursery = "FMTPTR"  frontier = "FMTPTR"\n",
             (uintptr_t)s->heap.nursery, (uintptr_t)s->frontier);
  assert (invariantForGC (s));
  bytesAllocated = s->frontier - s->heap.nursery;
  if (bytesAllocated == 0)
    return;
  s->cumulativeStatistics.bytesAllocated += bytesAllocated;
  if (not s->canMinor) {
    s->heap.oldGenSize += bytesAllocated;
    bytesCopied = 0;
  } else {
    if (detailedGCTime (s))
      startTiming (&ru_start);
    s->cumulativeStatistics.numMinorGCs++;
    s->forwardState.amInMinorGC = TRUE;
    if (DEBUG_GENERATIONAL or s->controls.messages) {
      fprintf (stderr, 
               "[GC: Starting minor Cheney-copy;]\n");
      fprintf (stderr,
               "[GC:\tfrom nursery at "FMTPTR" of size %s bytes.]\n",
               (uintptr_t)(s->heap.nursery),
               uintmaxToCommaString(bytesAllocated));
    }
    s->forwardState.toStart = s->heap.start + s->heap.oldGenSize;
    assert (isFrontierAligned (s, s->forwardState.toStart));
    s->forwardState.toLimit = s->forwardState.toStart + bytesAllocated;
    assert (invariantForGC (s));
    s->forwardState.back = s->forwardState.toStart;
    /* Forward all globals.  Would like to avoid doing this once all
     * the globals have been assigned.
     */
    foreachGlobalObjptr (s, forwardObjptrIfInNursery);
    forwardInterGenerationalObjptrs (s);
    foreachObjptrInRange (s, s->forwardState.toStart, &s->forwardState.back, 
                          forwardObjptrIfInNursery, TRUE);
    updateWeaksForCheneyCopy (s);
    bytesCopied = s->forwardState.back - s->forwardState.toStart;
    s->cumulativeStatistics.bytesCopiedMinor += bytesCopied;
    s->heap.oldGenSize += bytesCopied;
    s->lastMajorStatistics.numMinorGCs++;
    if (detailedGCTime (s))
      stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcMinor);
    if (DEBUG_GENERATIONAL or s->controls.messages)
      fprintf (stderr, 
               "[GC: Finished minor Cheney-copy; copied %s bytes.]\n",
               uintmaxToCommaString(bytesCopied));
  }
}
コード例 #5
0
ファイル: TaskTimer.c プロジェクト: ZECTBynmo/MrsWatson
void startTimingTask(TaskTimer taskTimer, const int taskId) {
  if(taskId == taskTimer->currentTask) {
    return;
  }
  stopTiming(taskTimer);
#if WINDOWS
  QueryPerformanceCounter(&(taskTimer->startTime));
#elif UNIX
  gettimeofday(taskTimer->startTime, NULL);
#endif
  taskTimer->currentTask = taskId;
}
コード例 #6
0
ファイル: TaskTimerTest.c プロジェクト: ZECTBynmo/MrsWatson
static int _testTaskTimerDuration(void) {
  TaskTimer t = newTaskTimer(1);

  assertIntEquals(t->currentTask, -1);
  startTimingTask(t, 0);
  _testSleep();
  stopTiming(t);
  assertIntEquals(t->currentTask, -1);
  assertDoubleEquals(t->totalTaskTimes[0], SLEEP_DURATION_MS, MAX_TIMER_TOLERANCE_MS);

  freeTaskTimer(t);
  return 0;
}
コード例 #7
0
ファイル: cheney-copy.c プロジェクト: kayceesrk/mlton-zmq
void majorCheneyCopyGC (GC_state s) {
  size_t bytesCopied;
  struct rusage ru_start;
  pointer toStart;

  assert (s->secondaryHeap.size >= s->heap.oldGenSize);
  if (detailedGCTime (s))
    startTiming (&ru_start);
  s->cumulativeStatistics.numCopyingGCs++;
  s->forwardState.amInMinorGC = FALSE;
  if (DEBUG or s->controls.messages) {
    fprintf (stderr, 
             "[GC: Starting major Cheney-copy;]\n");
    fprintf (stderr,
             "[GC:\tfrom heap at "FMTPTR" of size %s bytes,]\n",
             (uintptr_t)(s->heap.start), 
             uintmaxToCommaString(s->heap.size));
    fprintf (stderr, 
             "[GC:\tto heap at "FMTPTR" of size %s bytes.]\n",
             (uintptr_t)(s->secondaryHeap.start), 
             uintmaxToCommaString(s->secondaryHeap.size));
  }
  s->forwardState.toStart = s->secondaryHeap.start;
  s->forwardState.toLimit = s->secondaryHeap.start + s->secondaryHeap.size;
  assert (s->secondaryHeap.start != (pointer)NULL);
  /* The next assert ensures there is enough space for the copy to
   * succeed.  It does not assert 
   *   (s->secondaryHeap.size >= s->heap.size) 
   * because that is too strong.
   */
  assert (s->secondaryHeap.size >= s->heap.oldGenSize);
  toStart = alignFrontier (s, s->secondaryHeap.start);
  s->forwardState.back = toStart;
  foreachGlobalObjptr (s, forwardObjptr);
  foreachObjptrInRange (s, toStart, &s->forwardState.back, forwardObjptr, TRUE);
  updateWeaksForCheneyCopy (s);
  s->secondaryHeap.oldGenSize = s->forwardState.back - s->secondaryHeap.start;
  bytesCopied = s->secondaryHeap.oldGenSize;
  s->cumulativeStatistics.bytesCopied += bytesCopied;
  swapHeapsForCheneyCopy (s);
  s->lastMajorStatistics.kind = GC_COPYING;
  if (detailedGCTime (s))
    stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcCopying);
  if (DEBUG or s->controls.messages)
    fprintf (stderr, 
             "[GC: Finished major Cheney-copy; copied %s bytes.]\n",
             uintmaxToCommaString(bytesCopied));
}
コード例 #8
0
ファイル: TaskTimerTest.c プロジェクト: ZECTBynmo/MrsWatson
static int _testTaskTimerDurationMultipleTimes(void) {
  TaskTimer t = newTaskTimer(1);
  int i;

  for(i = 0; i < 5; i++) {
    assertIntEquals(t->currentTask, -1);
    startTimingTask(t, 0);
    _testSleep();
    stopTiming(t);
    assertIntEquals(t->currentTask, -1);
  }
  assertDoubleEquals(t->totalTaskTimes[0], 5.0 * SLEEP_DURATION_MS, MAX_TIMER_TOLERANCE_MS * 5.0);

  freeTaskTimer(t);
  return 0;
}
コード例 #9
0
ファイル: yuvRead.c プロジェクト: blaunay/preesm
/*========================================================================

   readYUV DEFINITION

   ======================================================================*/
void readYUV(int id, int xSize, int ySize, unsigned char *y, unsigned char *u, unsigned char *v) {

    if( ftell(ptfile[id])/(xSize*ySize + xSize*ySize/2) >=NB_FRAME){
        rewind(ptfile[id]);
    }
	
	if(id == 1 && ftell(ptfile[id])%(FPS*(xSize*ySize + xSize*ySize/2)) == 0){
			unsigned int time = 0;
            time = stopTiming(0);
            printf("\nMain: %d frames in %d us - %f fps\n", FPS ,time, ((float)FPS)/(float)time*1000000);
            startTiming(0);
    }

    fread(y, sizeof(char), xSize * ySize, ptfile[id]);
    fread(u, sizeof(char), xSize * ySize / 4, ptfile[id]);
    fread(v, sizeof(char), xSize * ySize / 4, ptfile[id]);
}
コード例 #10
0
ファイル: toolglobal.cpp プロジェクト: caoyanjie/Sprite
//单击 ”定时“ 处理
void ToolGlobal::pbn_clock_clicked()
{
    if (! clockState)       //如果 clockState 是 关闭 状态
    {
        pbn_clock_state->move(gbx_clock->x()+73, pbn_clock_state->y());                 //设置 按钮位置
        pbn_clock->move(gbx_clock->pos());                                              //...........
        pbn_clock_state ->setText(tr("OFF"));                                           //设置 按钮文字
        pbn_clock ->setText(tr("关闭"));                                                 //...........
        pbn_clock_state ->setToolTip(tr("关闭"));                                        //设置 按钮提示
        pbn_clock ->setToolTip(tr("关闭"));                                              //...........
        clockState = true;                                                              //设置为 “开启” 状态
//        timing = new Timing(parentWidget());                                          //创建 “定时器设置” 窗口 并显示
        timing = new Timing(gbx_clock->x()-42, 113, 225, 135, parentWidget());          //创建 “定时器设置” 窗口 并显示
        timing ->show();                                                                //.........................
        connect(timing, SIGNAL(timing_pbnOk_click()), this, SLOT(startTiming()));       //关联 定时器 “确定” 按钮
        connect(timing, SIGNAL(timing_pbnCancle_click()), this, SLOT(stopTiming()));    //关联 定时器 “取消” 按钮
    }
    else                                                //如果 clockState 是 开启 状态
    {
        if (timing)                                     //如果存在 “定时器设置” 窗口
        {
            timing ->close();                           //关闭 “定时器设置” 窗口
        }
        if (lcdNumber)                                  //如果存在 “LCD显示器”
        {
            delete lcdNumber;                           //销毁 “LCD显示器”
            lcdNumber = 0;                              //指针赋0值
            timer_default ->stop();                     //终止 “1秒倒计时"
            timer_target ->stop();                      //终止 "自定义倒计时”
        }
        pbn_clock_state->move(gbx_clock->x()+13, pbn_clock_state->y());     //设置 按钮位置
        pbn_clock->move(gbx_clock->x()+46, pbn_clock->y());                 //...........
        pbn_clock_state ->setText(tr("ON"));                                //设置 按钮文字
        pbn_clock ->setText(tr("定时"));                                     //...........
        pbn_clock_state ->setToolTip(tr("开启魔音"));                         //设置 按钮提示
        pbn_clock ->setToolTip(tr("开启魔音"));                               //...........
        clockState = false;                                                 //设置为 “关闭” 状态
    }
}
コード例 #11
0
ファイル: treemodel.cpp プロジェクト: jhj/aqp-qt5
bool TreeModel::dropMimeData(const QMimeData *mimeData,
        Qt::DropAction action, int row, int column,
        const QModelIndex &parent)
{
    if (action == Qt::IgnoreAction)
        return true;
    if (action != Qt::MoveAction || column > 0 ||
        !mimeData || !mimeData->hasFormat(MimeType))
        return false;
    if (TaskItem *item = itemForIndex(parent)) {
        emit stopTiming();
        QByteArray xmlData = qUncompress(mimeData->data(MimeType));
        QXmlStreamReader reader(xmlData);
        if (row == -1)
            row = parent.isValid() ? parent.row()
                                   : rootItem->childCount();
        beginInsertRows(parent, row, row);
        readTasks(&reader, item);
        endInsertRows();
        return true;
    }
    return false;
}
コード例 #12
0
ファイル: csort.c プロジェクト: adityarout/CS61B
int main (int argc, char **argv) {
  int N;
  double * values;
  char type = argc > 2 ? argv[2][0] : 'A';
  if (argc < 2) {
    fprintf (stderr, "Usage: csort SIZE [ TYPE [SEED]] \n");
    exit (1);
  }
  N = atoi (argv[1]);
  srand (argc > 3 ? atol (argv[3]) : 42);

  if (type == 'A')
    values = randomVector(N, 0.0, 1.0);
  else 
    values = randomVector(N, 1.0, 20.0);

  startTiming();
  sort (values, N);
  stopTiming();

  if (N < 20)
    printArr(values, N);
  return 0;
}
コード例 #13
0
// ----------------------------------------------------------------------------
// gpuNUFFT_gpu: NUFFT
//
// Inverse gpuNUFFT implementation - interpolation from uniform grid data onto 
//                                   nonuniform k-space data based on optimized 
//                                   gpuNUFFT kernel with minimal oversampling
//                                   ratio (see Beatty et al.)
//
// Basic steps: - apodization correction
//              - zero padding with osf
//              - FFT
//							- convolution and resampling
//
// parameters:
//	* data		     : output kspace data 
//  * data_count   : number of samples on trajectory
//  * n_coils      : number of channels or coils
//  * crds         : coordinates on trajectory, passed as SoA
//  * imdata       : input image data
//  * imdata_count : number of image data points
//  * grid_width   : size of grid 
//  * kernel       : precomputed convolution kernel as lookup table
//  * kernel_count : number of kernel lookup table entries
//  * sectors      : mapping of data indices according to each sector
//  * sector_count : number of sectors
//  * sector_centers: coordinates (x,y,z) of sector centers
//  * sector_width : width of sector 
//  * im_width     : dimension of image
//  * osr          : oversampling ratio
//  * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed
//  
void gpuNUFFT::GpuNUFFTOperator::performForwardGpuNUFFT(gpuNUFFT::Array<DType2> imgData,gpuNUFFT::Array<CufftType>& kspaceData, GpuNUFFTOutput gpuNUFFTOut)
{
  if (DEBUG)
  {
    std::cout << "performing forward gpuNUFFT!!!" << std::endl;
    std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl;
    std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl;
  }
  showMemoryInfo();
  
  if (debugTiming)
    startTiming();

  int			data_count          = (int)this->kSpaceTraj.count();
  int			n_coils             = (int)kspaceData.dim.channels;
  IndType	imdata_count        = this->imgDims.count();
  int			sector_count        = (int)this->gridSectorDims.count();

  //cuda mem allocation
  DType2 *imdata_d;
  CufftType *data_d;

  if (DEBUG)
    printf("allocate and copy imdata of size %d...\n",imdata_count);
  allocateDeviceMem<DType2>(&imdata_d,imdata_count);

  if (DEBUG)
    printf("allocate and copy data of size %d...\n",data_count);
  allocateDeviceMem<CufftType>(&data_d,data_count);

  initDeviceMemory(n_coils);
    
  if (debugTiming)
    printf("Memory allocation: %.2f ms\n",stopTiming());

  int err;

  //iterate over coils and compute result
  for (int coil_it = 0; coil_it < n_coils; coil_it++)
  {
    int data_coil_offset = coil_it * data_count;
    int im_coil_offset = coil_it * (int)imdata_count;

    if (this->applySensData())
      // perform automatically "repeating" of input image in case
      // of existing sensitivity data
      copyToDevice(imgData.data,imdata_d,imdata_count);
    else
      copyToDevice(imgData.data + im_coil_offset,imdata_d,imdata_count);

    //reset temp arrays
    cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim);
    cudaMemset(data_d,0, sizeof(CufftType)*data_count);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (this->applySensData())
    {
      copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count);
      performSensMul(imdata_d,sens_d,gi_host,false);
    }

    // apodization Correction
    if (n_coils > 1 && deapo_d != NULL)
      performForwardDeapodization(imdata_d,deapo_d,gi_host);
    else
      performForwardDeapodization(imdata_d,gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError()));
    // resize by oversampling factor and zero pad
    performPadding(imdata_d,gdata_d,gi_host);

    if (debugTiming)
      startTiming();

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError()));
    // shift image to get correct zero frequency position
    performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError()));
    // eventually free imdata_d
    // Forward FFT to kspace domain
    if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_FORWARD) != CUFFT_SUCCESS)
    {
      fprintf(stderr,"cufft has failed with err %i \n",err);
      showMemoryInfo(true,stderr);
    }

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError()));
    performFFTShift(gdata_d,FORWARD,getGridDims(),gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError()));

    if (debugTiming)
      printf("FFT (incl. shift): %.2f ms\n",stopTiming());

    if (debugTiming)
      startTiming();

    // convolution and resampling to non-standard trajectory
    forwardConvolution(data_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (debugTiming)
      printf("Forward Convolution: %.2f ms\n",stopTiming());

    performFFTScaling(data_d,gi_host->data_count,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error: at thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    //write result in correct order back into output array
    writeOrderedGPU(data_sorted_d,data_indices_d,data_d,(int)this->kSpaceTraj.count());
    copyFromDevice(data_sorted_d,kspaceData.data + data_coil_offset,data_count);
  }//iterate over coils
  
  freeTotalDeviceMemory(data_d,imdata_d,NULL);
  freeDeviceMemory(n_coils);

  if ((cudaThreadSynchronize() != cudaSuccess))
    fprintf(stderr,"error in performForwardGpuNUFFT function: %s\n",cudaGetErrorString(cudaGetLastError()));
  free(gi_host);
}
コード例 #14
0
// ----------------------------------------------------------------------------
// performGpuNUFFTAdj: NUFFT^H
//
// GpuNUFFT implementation - interpolation from nonuniform k-space data onto 
//                           oversampled grid based on optimized gpuNUFFT kernel
//                           with minimal oversampling ratio (see Beatty et al.)
//
// Basic steps: - density compensation
//              - convolution with interpolation function
//              - iFFT
//              - cropping due to oversampling ratio
//              - apodization correction
//
// parameters:
//	* data		     : input kspace data 
//  * data_count   : number of samples on trajectory
//  * n_coils      : number of channels or coils
//  * crds         : coordinate array on trajectory
//  * imdata       : output image data
//  * imdata_count : number of image data points
//  * grid_width   : size of grid 
//  * kernel       : precomputed convolution kernel as lookup table
//  * kernel_count : number of kernel lookup table entries
//  * sectors      : mapping of start and end points of each sector
//  * sector_count : number of sectors
//  * sector_centers: coordinates (x,y,z) of sector centers
//  * sector_width : width of sector 
//  * im_width     : dimension of image
//  * osr          : oversampling ratio
//  * do_comp      : true, if density compensation has to be done
//  * density_comp : densiy compensation array
//  * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed
//  
void gpuNUFFT::GpuNUFFTOperator::performGpuNUFFTAdj(gpuNUFFT::Array<DType2> kspaceData, gpuNUFFT::Array<CufftType>& imgData, GpuNUFFTOutput gpuNUFFTOut)
{
  if (DEBUG)
  {
    std::cout << "performing gpuNUFFT adjoint!!!" << std::endl;
    std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl;
    std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl;
    std::cout << "apply density comp: " << this->applyDensComp() << std::endl;
    std::cout << "apply sens data: " << this->applySensData() << std::endl;
  }
  if (debugTiming)
    startTiming();

  showMemoryInfo();

  int			data_count          = (int)this->kSpaceTraj.count();
  int			n_coils             = (int)kspaceData.dim.channels;
  IndType imdata_count        = this->imgDims.count();
  int			sector_count        = (int)this->gridSectorDims.count();

  // select data ordered and leave it on gpu
  DType2* data_d;

  if (DEBUG)
    printf("allocate data of size %d...\n",data_count);
  allocateDeviceMem<DType2>(&data_d,data_count);

  CufftType *imdata_d, *imdata_sum_d = NULL;

  if (DEBUG)
    printf("allocate and copy imdata of size %d...\n",imdata_count);
  allocateDeviceMem<CufftType>(&imdata_d,imdata_count);

  if (this->applySensData())
  {
    if (DEBUG)
      printf("allocate and copy temp imdata of size %d...\n",imdata_count);
    allocateDeviceMem<CufftType>(&imdata_sum_d,imdata_count);
    cudaMemset(imdata_sum_d,0,imdata_count*sizeof(CufftType));
  }

  initDeviceMemory(n_coils);
  int err;

  if (debugTiming)
    printf("Memory allocation: %.2f ms\n",stopTiming());

  //iterate over coils and compute result
  for (int coil_it = 0; coil_it < n_coils; coil_it++)
  {
    int data_coil_offset = coil_it * data_count;
    int im_coil_offset = coil_it * (int)imdata_count;//gi_host->width_dim;

    cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim);
    //copy coil data to device and select ordered
    copyToDevice(kspaceData.data + data_coil_offset,data_d,data_count);
    selectOrderedGPU(data_d,data_indices_d,data_sorted_d,data_count);

    if (this->applyDensComp())
      performDensityCompensation(data_sorted_d,density_comp_d,gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (debugTiming)
      startTiming();

    adjConvolution(data_sorted_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host);

    if (debugTiming)
      printf("Adjoint convolution: %.2f ms\n",stopTiming());

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      fprintf(stderr,"error at adj  thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError()));
    if (gpuNUFFTOut == CONVOLUTION)
    {
      if (DEBUG)
        printf("stopping output after CONVOLUTION step\n");
      //get output
      copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim);
      if (DEBUG)
        printf("test value at point zero: %f\n",(imgData.data)[0].x);

      free(gi_host);
      freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL);
      freeDeviceMemory(n_coils);
      return;
    }
    if ((cudaThreadSynchronize() != cudaSuccess))
      fprintf(stderr,"error at adj thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError()));
    
    if (debugTiming)
      startTiming();

    performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host);

    //Inverse FFT
    if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_INVERSE) != CUFFT_SUCCESS)
    {
      fprintf(stderr,"cufft has failed at adj with err %i \n",err);
      showMemoryInfo(true,stderr);
    }
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      fprintf(stderr,"error at adj thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError()));

    if (gpuNUFFTOut == FFT)
    {
      if (DEBUG)
        printf("stopping output after FFT step\n");
      //get output
      copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim);

      free(gi_host);
      
      freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL);
      freeDeviceMemory(n_coils);
      
      printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError()));
      return;
    }
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError()));
    performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host);
    
    if (debugTiming)
      printf("iFFT (incl. shift) : %.2f ms\n",stopTiming());

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError()));
    performCrop(gdata_d,imdata_d,gi_host);

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError()));
    //check if precomputed deapo function can be used
    if (n_coils > 1 && deapo_d != NULL)
      performDeapodization(imdata_d,deapo_d,gi_host);
    else
      performDeapodization(imdata_d,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error at adj thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError()));

    performFFTScaling(imdata_d,gi_host->im_width_dim,gi_host);
    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error: at adj  thread synchronization 9: %s\n",cudaGetErrorString(cudaGetLastError()));

    if (this->applySensData())
    {
      copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count);
      performSensMul(imdata_d,sens_d,gi_host,true);
      performSensSum(imdata_d,imdata_sum_d,gi_host);
    }
    else
    {
      // get result per coil
      // no summation is performed in absence of sensitity data
      copyFromDevice<CufftType>(imdata_d,imgData.data+im_coil_offset,imdata_count);
    }

    if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
      printf("error: at adj  thread synchronization 10: %s\n",cudaGetErrorString(cudaGetLastError()));
  }//iterate over coils
  
  if (this->applySensData())
  {
    // get result of combined coils
    copyFromDevice<CufftType>(imdata_sum_d,imgData.data,imdata_count);
  }

  if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
    printf("error: at adj  thread synchronization 11: %s\n",cudaGetErrorString(cudaGetLastError()));
  
  freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL);
  freeDeviceMemory(n_coils);
  if ((cudaThreadSynchronize() != cudaSuccess))
    fprintf(stderr,"error in gpuNUFFT_gpu_adj function: %s\n",cudaGetErrorString(cudaGetLastError()));
  free(gi_host);
}
コード例 #15
0
void performGC (GC_state s, 
                size_t oldGenBytesRequested,
                size_t nurseryBytesRequested, 
                bool forceMajor,
                bool mayResize) {
  uintmax_t gcTime;
  bool stackTopOk;
  size_t stackBytesRequested;
  struct rusage ru_start;
  size_t totalBytesRequested;

  enterGC (s);
  s->cumulativeStatistics.numGCs++;
  if (DEBUG or s->controls.messages) {
    size_t nurserySize = s->heap.size - ((size_t)(s->heap.nursery - s->heap.start));
    size_t nurseryUsed = (size_t)(s->frontier - s->heap.nursery);
    fprintf (stderr, 
             "[GC: Starting gc #%s; requesting %s nursery bytes and %s old-gen bytes,]\n",
             uintmaxToCommaString(s->cumulativeStatistics.numGCs),
             uintmaxToCommaString(nurseryBytesRequested),
             uintmaxToCommaString(oldGenBytesRequested));
    fprintf (stderr, 
             "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n",
             (uintptr_t)(s->heap.start),
             uintmaxToCommaString(s->heap.size),
             uintmaxToCommaString(s->heap.withMapsSize - s->heap.size));
    fprintf (stderr, 
             "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n",
             uintmaxToCommaString(s->heap.oldGenSize),
             100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size)));
    fprintf (stderr,
             "[GC:\tand nursery of size %s bytes (%.1f%% of heap),]\n",
             uintmaxToCommaString(nurserySize),
             100.0 * ((double)(nurserySize) / (double)(s->heap.size)));
    fprintf (stderr,
             "[GC:\tand nursery using %s bytes (%.1f%% of heap, %.1f%% of nursery).]\n",
             uintmaxToCommaString(nurseryUsed),
             100.0 * ((double)(nurseryUsed) / (double)(s->heap.size)),
             100.0 * ((double)(nurseryUsed) / (double)(nurserySize)));
  }
  assert (invariantForGC (s));
  if (needGCTime (s))
    startTiming (&ru_start);
  minorGC (s);
  stackTopOk = invariantForMutatorStack (s);
  stackBytesRequested = 
    stackTopOk 
    ? 0 
    : sizeofStackWithHeader (s, sizeofStackGrowReserved (s, getStackCurrent (s)));
  totalBytesRequested = 
    oldGenBytesRequested 
    + nurseryBytesRequested
    + stackBytesRequested;
  if (forceMajor 
      or totalBytesRequested > s->heap.size - s->heap.oldGenSize)
    majorGC (s, totalBytesRequested, mayResize);
  setGCStateCurrentHeap (s, oldGenBytesRequested + stackBytesRequested, 
                         nurseryBytesRequested);
  assert (hasHeapBytesFree (s, oldGenBytesRequested + stackBytesRequested,
                            nurseryBytesRequested));
  unless (stackTopOk)
    growStackCurrent (s);
  setGCStateCurrentThreadAndStack (s);
  if (needGCTime (s)) {
    gcTime = stopTiming (&ru_start, &s->cumulativeStatistics.ru_gc);
    s->cumulativeStatistics.maxPauseTime = 
      max (s->cumulativeStatistics.maxPauseTime, gcTime);
  } else
    gcTime = 0;  /* Assign gcTime to quell gcc warning. */
  if (DEBUG or s->controls.messages) {
    size_t nurserySize = s->heap.size - (size_t)(s->heap.nursery - s->heap.start);
    fprintf (stderr, 
             "[GC: Finished gc #%s; time %s ms,]\n",
             uintmaxToCommaString(s->cumulativeStatistics.numGCs),
             uintmaxToCommaString(gcTime));
    fprintf (stderr, 
             "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n",
             (uintptr_t)(s->heap.start),
             uintmaxToCommaString(s->heap.size),
             uintmaxToCommaString(s->heap.withMapsSize - s->heap.size));
    fprintf (stderr, 
             "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n",
             uintmaxToCommaString(s->heap.oldGenSize),
             100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size)));
    fprintf (stderr,
             "[GC:\tand nursery of size %s bytes (%.1f%% of heap).]\n",
             uintmaxToCommaString(nurserySize),
             100.0 * ((double)(nurserySize) / (double)(s->heap.size)));
  }
  /* Send a GC signal. */
  if (s->signalsInfo.gcSignalHandled
      and s->signalHandlerThread != BOGUS_OBJPTR) {
    if (DEBUG_SIGNALS)
      fprintf (stderr, "GC Signal pending.\n");
    s->signalsInfo.gcSignalPending = TRUE;
    unless (s->signalsInfo.amInSignalHandler) 
      s->signalsInfo.signalIsPending = TRUE;
  }
  if (DEBUG) 
    displayGCState (s, stderr);
  assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
  assert (invariantForGC (s));
  leaveGC (s);
}