static int _testTaskTimerCallStopTwice(void) { TaskTimer t = newTaskTimer(1); startTimingTask(t, 0); _testSleep(); stopTiming(t); stopTiming(t); assertIntEquals(t->currentTask, -1); // Recorded time should be at most 1ms off assertDoubleEquals(t->totalTaskTimes[0], SLEEP_DURATION_MS, MAX_TIMER_TOLERANCE_MS); freeTaskTimer(t); return 0; }
bool Sobel::runHalideGPU(Image input, Image output, const Params& params) { #if ENABLE_HALIDE // Create halide buffers buffer_t inputBuffer = createHalideBuffer(input); buffer_t outputBuffer = createHalideBuffer(output); reportStatus("Running Halide GPU filter"); // Warm-up run inputBuffer.host_dirty = true; halide_sobel_gpu(&inputBuffer, &outputBuffer); halide_dev_sync(NULL); // Timed runs startTiming(); for (int i = 0; i < params.iterations; i++) { halide_sobel_gpu(&inputBuffer, &outputBuffer); } halide_dev_sync(NULL); stopTiming(); halide_copy_to_host(NULL, &outputBuffer); halide_release(NULL); return outputResults(input, output, params); #else reportStatus("Halide not enabled during build."); return false; #endif }
bool Bilateral::runHalideCPU(Image input, Image output, const Params& params) { #if ENABLE_HALIDE // Create halide buffers buffer_t inputBuffer = createHalideBuffer(input); buffer_t outputBuffer = createHalideBuffer(output); reportStatus("Running Halide CPU filter"); // Warm-up run halide_bilateral_cpu(&inputBuffer, &outputBuffer); // Timed runs startTiming(); for (int i = 0; i < params.iterations; i++) { halide_bilateral_cpu(&inputBuffer, &outputBuffer); } stopTiming(); halide_release(NULL); return outputResults(input, output, params); #else reportStatus("Halide not enabled during build."); return false; #endif }
void minorCheneyCopyGC (GC_state s) { size_t bytesAllocated; size_t bytesCopied; struct rusage ru_start; if (DEBUG_GENERATIONAL) fprintf (stderr, "minorGC nursery = "FMTPTR" frontier = "FMTPTR"\n", (uintptr_t)s->heap.nursery, (uintptr_t)s->frontier); assert (invariantForGC (s)); bytesAllocated = s->frontier - s->heap.nursery; if (bytesAllocated == 0) return; s->cumulativeStatistics.bytesAllocated += bytesAllocated; if (not s->canMinor) { s->heap.oldGenSize += bytesAllocated; bytesCopied = 0; } else { if (detailedGCTime (s)) startTiming (&ru_start); s->cumulativeStatistics.numMinorGCs++; s->forwardState.amInMinorGC = TRUE; if (DEBUG_GENERATIONAL or s->controls.messages) { fprintf (stderr, "[GC: Starting minor Cheney-copy;]\n"); fprintf (stderr, "[GC:\tfrom nursery at "FMTPTR" of size %s bytes.]\n", (uintptr_t)(s->heap.nursery), uintmaxToCommaString(bytesAllocated)); } s->forwardState.toStart = s->heap.start + s->heap.oldGenSize; assert (isFrontierAligned (s, s->forwardState.toStart)); s->forwardState.toLimit = s->forwardState.toStart + bytesAllocated; assert (invariantForGC (s)); s->forwardState.back = s->forwardState.toStart; /* Forward all globals. Would like to avoid doing this once all * the globals have been assigned. */ foreachGlobalObjptr (s, forwardObjptrIfInNursery); forwardInterGenerationalObjptrs (s); foreachObjptrInRange (s, s->forwardState.toStart, &s->forwardState.back, forwardObjptrIfInNursery, TRUE); updateWeaksForCheneyCopy (s); bytesCopied = s->forwardState.back - s->forwardState.toStart; s->cumulativeStatistics.bytesCopiedMinor += bytesCopied; s->heap.oldGenSize += bytesCopied; s->lastMajorStatistics.numMinorGCs++; if (detailedGCTime (s)) stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcMinor); if (DEBUG_GENERATIONAL or s->controls.messages) fprintf (stderr, "[GC: Finished minor Cheney-copy; copied %s bytes.]\n", uintmaxToCommaString(bytesCopied)); } }
void startTimingTask(TaskTimer taskTimer, const int taskId) { if(taskId == taskTimer->currentTask) { return; } stopTiming(taskTimer); #if WINDOWS QueryPerformanceCounter(&(taskTimer->startTime)); #elif UNIX gettimeofday(taskTimer->startTime, NULL); #endif taskTimer->currentTask = taskId; }
static int _testTaskTimerDuration(void) { TaskTimer t = newTaskTimer(1); assertIntEquals(t->currentTask, -1); startTimingTask(t, 0); _testSleep(); stopTiming(t); assertIntEquals(t->currentTask, -1); assertDoubleEquals(t->totalTaskTimes[0], SLEEP_DURATION_MS, MAX_TIMER_TOLERANCE_MS); freeTaskTimer(t); return 0; }
void majorCheneyCopyGC (GC_state s) { size_t bytesCopied; struct rusage ru_start; pointer toStart; assert (s->secondaryHeap.size >= s->heap.oldGenSize); if (detailedGCTime (s)) startTiming (&ru_start); s->cumulativeStatistics.numCopyingGCs++; s->forwardState.amInMinorGC = FALSE; if (DEBUG or s->controls.messages) { fprintf (stderr, "[GC: Starting major Cheney-copy;]\n"); fprintf (stderr, "[GC:\tfrom heap at "FMTPTR" of size %s bytes,]\n", (uintptr_t)(s->heap.start), uintmaxToCommaString(s->heap.size)); fprintf (stderr, "[GC:\tto heap at "FMTPTR" of size %s bytes.]\n", (uintptr_t)(s->secondaryHeap.start), uintmaxToCommaString(s->secondaryHeap.size)); } s->forwardState.toStart = s->secondaryHeap.start; s->forwardState.toLimit = s->secondaryHeap.start + s->secondaryHeap.size; assert (s->secondaryHeap.start != (pointer)NULL); /* The next assert ensures there is enough space for the copy to * succeed. It does not assert * (s->secondaryHeap.size >= s->heap.size) * because that is too strong. */ assert (s->secondaryHeap.size >= s->heap.oldGenSize); toStart = alignFrontier (s, s->secondaryHeap.start); s->forwardState.back = toStart; foreachGlobalObjptr (s, forwardObjptr); foreachObjptrInRange (s, toStart, &s->forwardState.back, forwardObjptr, TRUE); updateWeaksForCheneyCopy (s); s->secondaryHeap.oldGenSize = s->forwardState.back - s->secondaryHeap.start; bytesCopied = s->secondaryHeap.oldGenSize; s->cumulativeStatistics.bytesCopied += bytesCopied; swapHeapsForCheneyCopy (s); s->lastMajorStatistics.kind = GC_COPYING; if (detailedGCTime (s)) stopTiming (&ru_start, &s->cumulativeStatistics.ru_gcCopying); if (DEBUG or s->controls.messages) fprintf (stderr, "[GC: Finished major Cheney-copy; copied %s bytes.]\n", uintmaxToCommaString(bytesCopied)); }
static int _testTaskTimerDurationMultipleTimes(void) { TaskTimer t = newTaskTimer(1); int i; for(i = 0; i < 5; i++) { assertIntEquals(t->currentTask, -1); startTimingTask(t, 0); _testSleep(); stopTiming(t); assertIntEquals(t->currentTask, -1); } assertDoubleEquals(t->totalTaskTimes[0], 5.0 * SLEEP_DURATION_MS, MAX_TIMER_TOLERANCE_MS * 5.0); freeTaskTimer(t); return 0; }
/*======================================================================== readYUV DEFINITION ======================================================================*/ void readYUV(int id, int xSize, int ySize, unsigned char *y, unsigned char *u, unsigned char *v) { if( ftell(ptfile[id])/(xSize*ySize + xSize*ySize/2) >=NB_FRAME){ rewind(ptfile[id]); } if(id == 1 && ftell(ptfile[id])%(FPS*(xSize*ySize + xSize*ySize/2)) == 0){ unsigned int time = 0; time = stopTiming(0); printf("\nMain: %d frames in %d us - %f fps\n", FPS ,time, ((float)FPS)/(float)time*1000000); startTiming(0); } fread(y, sizeof(char), xSize * ySize, ptfile[id]); fread(u, sizeof(char), xSize * ySize / 4, ptfile[id]); fread(v, sizeof(char), xSize * ySize / 4, ptfile[id]); }
//单击 ”定时“ 处理 void ToolGlobal::pbn_clock_clicked() { if (! clockState) //如果 clockState 是 关闭 状态 { pbn_clock_state->move(gbx_clock->x()+73, pbn_clock_state->y()); //设置 按钮位置 pbn_clock->move(gbx_clock->pos()); //........... pbn_clock_state ->setText(tr("OFF")); //设置 按钮文字 pbn_clock ->setText(tr("关闭")); //........... pbn_clock_state ->setToolTip(tr("关闭")); //设置 按钮提示 pbn_clock ->setToolTip(tr("关闭")); //........... clockState = true; //设置为 “开启” 状态 // timing = new Timing(parentWidget()); //创建 “定时器设置” 窗口 并显示 timing = new Timing(gbx_clock->x()-42, 113, 225, 135, parentWidget()); //创建 “定时器设置” 窗口 并显示 timing ->show(); //......................... connect(timing, SIGNAL(timing_pbnOk_click()), this, SLOT(startTiming())); //关联 定时器 “确定” 按钮 connect(timing, SIGNAL(timing_pbnCancle_click()), this, SLOT(stopTiming())); //关联 定时器 “取消” 按钮 } else //如果 clockState 是 开启 状态 { if (timing) //如果存在 “定时器设置” 窗口 { timing ->close(); //关闭 “定时器设置” 窗口 } if (lcdNumber) //如果存在 “LCD显示器” { delete lcdNumber; //销毁 “LCD显示器” lcdNumber = 0; //指针赋0值 timer_default ->stop(); //终止 “1秒倒计时" timer_target ->stop(); //终止 "自定义倒计时” } pbn_clock_state->move(gbx_clock->x()+13, pbn_clock_state->y()); //设置 按钮位置 pbn_clock->move(gbx_clock->x()+46, pbn_clock->y()); //........... pbn_clock_state ->setText(tr("ON")); //设置 按钮文字 pbn_clock ->setText(tr("定时")); //........... pbn_clock_state ->setToolTip(tr("开启魔音")); //设置 按钮提示 pbn_clock ->setToolTip(tr("开启魔音")); //........... clockState = false; //设置为 “关闭” 状态 } }
bool TreeModel::dropMimeData(const QMimeData *mimeData, Qt::DropAction action, int row, int column, const QModelIndex &parent) { if (action == Qt::IgnoreAction) return true; if (action != Qt::MoveAction || column > 0 || !mimeData || !mimeData->hasFormat(MimeType)) return false; if (TaskItem *item = itemForIndex(parent)) { emit stopTiming(); QByteArray xmlData = qUncompress(mimeData->data(MimeType)); QXmlStreamReader reader(xmlData); if (row == -1) row = parent.isValid() ? parent.row() : rootItem->childCount(); beginInsertRows(parent, row, row); readTasks(&reader, item); endInsertRows(); return true; } return false; }
int main (int argc, char **argv) { int N; double * values; char type = argc > 2 ? argv[2][0] : 'A'; if (argc < 2) { fprintf (stderr, "Usage: csort SIZE [ TYPE [SEED]] \n"); exit (1); } N = atoi (argv[1]); srand (argc > 3 ? atol (argv[3]) : 42); if (type == 'A') values = randomVector(N, 0.0, 1.0); else values = randomVector(N, 1.0, 20.0); startTiming(); sort (values, N); stopTiming(); if (N < 20) printArr(values, N); return 0; }
// ---------------------------------------------------------------------------- // gpuNUFFT_gpu: NUFFT // // Inverse gpuNUFFT implementation - interpolation from uniform grid data onto // nonuniform k-space data based on optimized // gpuNUFFT kernel with minimal oversampling // ratio (see Beatty et al.) // // Basic steps: - apodization correction // - zero padding with osf // - FFT // - convolution and resampling // // parameters: // * data : output kspace data // * data_count : number of samples on trajectory // * n_coils : number of channels or coils // * crds : coordinates on trajectory, passed as SoA // * imdata : input image data // * imdata_count : number of image data points // * grid_width : size of grid // * kernel : precomputed convolution kernel as lookup table // * kernel_count : number of kernel lookup table entries // * sectors : mapping of data indices according to each sector // * sector_count : number of sectors // * sector_centers: coordinates (x,y,z) of sector centers // * sector_width : width of sector // * im_width : dimension of image // * osr : oversampling ratio // * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed // void gpuNUFFT::GpuNUFFTOperator::performForwardGpuNUFFT(gpuNUFFT::Array<DType2> imgData,gpuNUFFT::Array<CufftType>& kspaceData, GpuNUFFTOutput gpuNUFFTOut) { if (DEBUG) { std::cout << "performing forward gpuNUFFT!!!" << std::endl; std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl; std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl; } showMemoryInfo(); if (debugTiming) startTiming(); int data_count = (int)this->kSpaceTraj.count(); int n_coils = (int)kspaceData.dim.channels; IndType imdata_count = this->imgDims.count(); int sector_count = (int)this->gridSectorDims.count(); //cuda mem allocation DType2 *imdata_d; CufftType *data_d; if (DEBUG) printf("allocate and copy imdata of size %d...\n",imdata_count); allocateDeviceMem<DType2>(&imdata_d,imdata_count); if (DEBUG) printf("allocate and copy data of size %d...\n",data_count); allocateDeviceMem<CufftType>(&data_d,data_count); initDeviceMemory(n_coils); if (debugTiming) printf("Memory allocation: %.2f ms\n",stopTiming()); int err; //iterate over coils and compute result for (int coil_it = 0; coil_it < n_coils; coil_it++) { int data_coil_offset = coil_it * data_count; int im_coil_offset = coil_it * (int)imdata_count; if (this->applySensData()) // perform automatically "repeating" of input image in case // of existing sensitivity data copyToDevice(imgData.data,imdata_d,imdata_count); else copyToDevice(imgData.data + im_coil_offset,imdata_d,imdata_count); //reset temp arrays cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim); cudaMemset(data_d,0, sizeof(CufftType)*data_count); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError())); if (this->applySensData()) { copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count); performSensMul(imdata_d,sens_d,gi_host,false); } // apodization Correction if (n_coils > 1 && deapo_d != NULL) performForwardDeapodization(imdata_d,deapo_d,gi_host); else performForwardDeapodization(imdata_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError())); // resize by oversampling factor and zero pad performPadding(imdata_d,gdata_d,gi_host); if (debugTiming) startTiming(); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError())); // shift image to get correct zero frequency position performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError())); // eventually free imdata_d // Forward FFT to kspace domain if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_FORWARD) != CUFFT_SUCCESS) { fprintf(stderr,"cufft has failed with err %i \n",err); showMemoryInfo(true,stderr); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError())); performFFTShift(gdata_d,FORWARD,getGridDims(),gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) printf("FFT (incl. shift): %.2f ms\n",stopTiming()); if (debugTiming) startTiming(); // convolution and resampling to non-standard trajectory forwardConvolution(data_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) printf("Forward Convolution: %.2f ms\n",stopTiming()); performFFTScaling(data_d,gi_host->data_count,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError())); //write result in correct order back into output array writeOrderedGPU(data_sorted_d,data_indices_d,data_d,(int)this->kSpaceTraj.count()); copyFromDevice(data_sorted_d,kspaceData.data + data_coil_offset,data_count); }//iterate over coils freeTotalDeviceMemory(data_d,imdata_d,NULL); freeDeviceMemory(n_coils); if ((cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error in performForwardGpuNUFFT function: %s\n",cudaGetErrorString(cudaGetLastError())); free(gi_host); }
// ---------------------------------------------------------------------------- // performGpuNUFFTAdj: NUFFT^H // // GpuNUFFT implementation - interpolation from nonuniform k-space data onto // oversampled grid based on optimized gpuNUFFT kernel // with minimal oversampling ratio (see Beatty et al.) // // Basic steps: - density compensation // - convolution with interpolation function // - iFFT // - cropping due to oversampling ratio // - apodization correction // // parameters: // * data : input kspace data // * data_count : number of samples on trajectory // * n_coils : number of channels or coils // * crds : coordinate array on trajectory // * imdata : output image data // * imdata_count : number of image data points // * grid_width : size of grid // * kernel : precomputed convolution kernel as lookup table // * kernel_count : number of kernel lookup table entries // * sectors : mapping of start and end points of each sector // * sector_count : number of sectors // * sector_centers: coordinates (x,y,z) of sector centers // * sector_width : width of sector // * im_width : dimension of image // * osr : oversampling ratio // * do_comp : true, if density compensation has to be done // * density_comp : densiy compensation array // * gpuNUFFT_out : enum indicating how far gpuNUFFT has to be processed // void gpuNUFFT::GpuNUFFTOperator::performGpuNUFFTAdj(gpuNUFFT::Array<DType2> kspaceData, gpuNUFFT::Array<CufftType>& imgData, GpuNUFFTOutput gpuNUFFTOut) { if (DEBUG) { std::cout << "performing gpuNUFFT adjoint!!!" << std::endl; std::cout << "dataCount: " << kspaceData.count() << " chnCount: " << kspaceData.dim.channels << std::endl; std::cout << "imgCount: " << imgData.count() << " gridWidth: " << this->getGridWidth() << std::endl; std::cout << "apply density comp: " << this->applyDensComp() << std::endl; std::cout << "apply sens data: " << this->applySensData() << std::endl; } if (debugTiming) startTiming(); showMemoryInfo(); int data_count = (int)this->kSpaceTraj.count(); int n_coils = (int)kspaceData.dim.channels; IndType imdata_count = this->imgDims.count(); int sector_count = (int)this->gridSectorDims.count(); // select data ordered and leave it on gpu DType2* data_d; if (DEBUG) printf("allocate data of size %d...\n",data_count); allocateDeviceMem<DType2>(&data_d,data_count); CufftType *imdata_d, *imdata_sum_d = NULL; if (DEBUG) printf("allocate and copy imdata of size %d...\n",imdata_count); allocateDeviceMem<CufftType>(&imdata_d,imdata_count); if (this->applySensData()) { if (DEBUG) printf("allocate and copy temp imdata of size %d...\n",imdata_count); allocateDeviceMem<CufftType>(&imdata_sum_d,imdata_count); cudaMemset(imdata_sum_d,0,imdata_count*sizeof(CufftType)); } initDeviceMemory(n_coils); int err; if (debugTiming) printf("Memory allocation: %.2f ms\n",stopTiming()); //iterate over coils and compute result for (int coil_it = 0; coil_it < n_coils; coil_it++) { int data_coil_offset = coil_it * data_count; int im_coil_offset = coil_it * (int)imdata_count;//gi_host->width_dim; cudaMemset(gdata_d,0, sizeof(CufftType)*gi_host->grid_width_dim); //copy coil data to device and select ordered copyToDevice(kspaceData.data + data_coil_offset,data_d,data_count); selectOrderedGPU(data_d,data_indices_d,data_sorted_d,data_count); if (this->applyDensComp()) performDensityCompensation(data_sorted_d,density_comp_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 1: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) startTiming(); adjConvolution(data_sorted_d,crds_d,gdata_d,NULL,sectors_d,sector_centers_d,gi_host); if (debugTiming) printf("Adjoint convolution: %.2f ms\n",stopTiming()); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error at adj thread synchronization 2: %s\n",cudaGetErrorString(cudaGetLastError())); if (gpuNUFFTOut == CONVOLUTION) { if (DEBUG) printf("stopping output after CONVOLUTION step\n"); //get output copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim); if (DEBUG) printf("test value at point zero: %f\n",(imgData.data)[0].x); free(gi_host); freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL); freeDeviceMemory(n_coils); return; } if ((cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error at adj thread synchronization 3: %s\n",cudaGetErrorString(cudaGetLastError())); if (debugTiming) startTiming(); performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host); //Inverse FFT if (err=pt2CufftExec(fft_plan, gdata_d, gdata_d, CUFFT_INVERSE) != CUFFT_SUCCESS) { fprintf(stderr,"cufft has failed at adj with err %i \n",err); showMemoryInfo(true,stderr); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error at adj thread synchronization 4: %s\n",cudaGetErrorString(cudaGetLastError())); if (gpuNUFFTOut == FFT) { if (DEBUG) printf("stopping output after FFT step\n"); //get output copyFromDevice<CufftType>(gdata_d,imgData.data,gi_host->grid_width_dim); free(gi_host); freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL); freeDeviceMemory(n_coils); printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError())); return; } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 5: %s\n",cudaGetErrorString(cudaGetLastError())); performFFTShift(gdata_d,INVERSE,getGridDims(),gi_host); if (debugTiming) printf("iFFT (incl. shift) : %.2f ms\n",stopTiming()); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 6: %s\n",cudaGetErrorString(cudaGetLastError())); performCrop(gdata_d,imdata_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 7: %s\n",cudaGetErrorString(cudaGetLastError())); //check if precomputed deapo function can be used if (n_coils > 1 && deapo_d != NULL) performDeapodization(imdata_d,deapo_d,gi_host); else performDeapodization(imdata_d,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error at adj thread synchronization 8: %s\n",cudaGetErrorString(cudaGetLastError())); performFFTScaling(imdata_d,gi_host->im_width_dim,gi_host); if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at adj thread synchronization 9: %s\n",cudaGetErrorString(cudaGetLastError())); if (this->applySensData()) { copyToDevice(this->sens.data + im_coil_offset, sens_d,imdata_count); performSensMul(imdata_d,sens_d,gi_host,true); performSensSum(imdata_d,imdata_sum_d,gi_host); } else { // get result per coil // no summation is performed in absence of sensitity data copyFromDevice<CufftType>(imdata_d,imgData.data+im_coil_offset,imdata_count); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at adj thread synchronization 10: %s\n",cudaGetErrorString(cudaGetLastError())); }//iterate over coils if (this->applySensData()) { // get result of combined coils copyFromDevice<CufftType>(imdata_sum_d,imgData.data,imdata_count); } if (DEBUG && (cudaThreadSynchronize() != cudaSuccess)) printf("error: at adj thread synchronization 11: %s\n",cudaGetErrorString(cudaGetLastError())); freeTotalDeviceMemory(data_d,imdata_d,imdata_sum_d,NULL); freeDeviceMemory(n_coils); if ((cudaThreadSynchronize() != cudaSuccess)) fprintf(stderr,"error in gpuNUFFT_gpu_adj function: %s\n",cudaGetErrorString(cudaGetLastError())); free(gi_host); }
void performGC (GC_state s, size_t oldGenBytesRequested, size_t nurseryBytesRequested, bool forceMajor, bool mayResize) { uintmax_t gcTime; bool stackTopOk; size_t stackBytesRequested; struct rusage ru_start; size_t totalBytesRequested; enterGC (s); s->cumulativeStatistics.numGCs++; if (DEBUG or s->controls.messages) { size_t nurserySize = s->heap.size - ((size_t)(s->heap.nursery - s->heap.start)); size_t nurseryUsed = (size_t)(s->frontier - s->heap.nursery); fprintf (stderr, "[GC: Starting gc #%s; requesting %s nursery bytes and %s old-gen bytes,]\n", uintmaxToCommaString(s->cumulativeStatistics.numGCs), uintmaxToCommaString(nurseryBytesRequested), uintmaxToCommaString(oldGenBytesRequested)); fprintf (stderr, "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n", (uintptr_t)(s->heap.start), uintmaxToCommaString(s->heap.size), uintmaxToCommaString(s->heap.withMapsSize - s->heap.size)); fprintf (stderr, "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n", uintmaxToCommaString(s->heap.oldGenSize), 100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size))); fprintf (stderr, "[GC:\tand nursery of size %s bytes (%.1f%% of heap),]\n", uintmaxToCommaString(nurserySize), 100.0 * ((double)(nurserySize) / (double)(s->heap.size))); fprintf (stderr, "[GC:\tand nursery using %s bytes (%.1f%% of heap, %.1f%% of nursery).]\n", uintmaxToCommaString(nurseryUsed), 100.0 * ((double)(nurseryUsed) / (double)(s->heap.size)), 100.0 * ((double)(nurseryUsed) / (double)(nurserySize))); } assert (invariantForGC (s)); if (needGCTime (s)) startTiming (&ru_start); minorGC (s); stackTopOk = invariantForMutatorStack (s); stackBytesRequested = stackTopOk ? 0 : sizeofStackWithHeader (s, sizeofStackGrowReserved (s, getStackCurrent (s))); totalBytesRequested = oldGenBytesRequested + nurseryBytesRequested + stackBytesRequested; if (forceMajor or totalBytesRequested > s->heap.size - s->heap.oldGenSize) majorGC (s, totalBytesRequested, mayResize); setGCStateCurrentHeap (s, oldGenBytesRequested + stackBytesRequested, nurseryBytesRequested); assert (hasHeapBytesFree (s, oldGenBytesRequested + stackBytesRequested, nurseryBytesRequested)); unless (stackTopOk) growStackCurrent (s); setGCStateCurrentThreadAndStack (s); if (needGCTime (s)) { gcTime = stopTiming (&ru_start, &s->cumulativeStatistics.ru_gc); s->cumulativeStatistics.maxPauseTime = max (s->cumulativeStatistics.maxPauseTime, gcTime); } else gcTime = 0; /* Assign gcTime to quell gcc warning. */ if (DEBUG or s->controls.messages) { size_t nurserySize = s->heap.size - (size_t)(s->heap.nursery - s->heap.start); fprintf (stderr, "[GC: Finished gc #%s; time %s ms,]\n", uintmaxToCommaString(s->cumulativeStatistics.numGCs), uintmaxToCommaString(gcTime)); fprintf (stderr, "[GC:\theap at "FMTPTR" of size %s bytes (+ %s bytes card/cross map),]\n", (uintptr_t)(s->heap.start), uintmaxToCommaString(s->heap.size), uintmaxToCommaString(s->heap.withMapsSize - s->heap.size)); fprintf (stderr, "[GC:\twith old-gen of size %s bytes (%.1f%% of heap),]\n", uintmaxToCommaString(s->heap.oldGenSize), 100.0 * ((double)(s->heap.oldGenSize) / (double)(s->heap.size))); fprintf (stderr, "[GC:\tand nursery of size %s bytes (%.1f%% of heap).]\n", uintmaxToCommaString(nurserySize), 100.0 * ((double)(nurserySize) / (double)(s->heap.size))); } /* Send a GC signal. */ if (s->signalsInfo.gcSignalHandled and s->signalHandlerThread != BOGUS_OBJPTR) { if (DEBUG_SIGNALS) fprintf (stderr, "GC Signal pending.\n"); s->signalsInfo.gcSignalPending = TRUE; unless (s->signalsInfo.amInSignalHandler) s->signalsInfo.signalIsPending = TRUE; } if (DEBUG) displayGCState (s, stderr); assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested)); assert (invariantForGC (s)); leaveGC (s); }