Пример #1
0
int main(int argc, char ** argv)
{
  long lower, upper;
  int WGS;

  if (argc != 4) {
    printf("not 2 arguments\n");
    return 1;
  }
  sscanf(argv[1], "%ld", &lower);
  sscanf(argv[2], "%ld", &upper);
  sscanf(argv[3], "%d", &WGS);

  long results_size = (upper*(upper-1))/2;

  long* results = (long *) malloc(sizeof(long)*WGS);
  int i;
  for(i = 0; i < WGS; i ++) results[i] = 0L;

  printf("%ld\n", results_size);
  FILE *fp;
  char *KernelSource;
  cl_kernel kernel;
  fp = fopen("totient_kernel.cl", "r");
  if (!fp) {
    fprintf(stderr, "Failed to load kernel.\n");
    exit(1);
  }
  KernelSource = (char*)malloc(MAX_SOURCE_SIZE);
  fread( KernelSource, 1, MAX_SOURCE_SIZE, fp);
  fclose( fp );
  
  size_t local[1];
  size_t global[1];
  local[0] = WGS;
  global[0] = results_size;
  
  initGPU();

  // Fill in here:
  kernel = setupKernel( KernelSource, "totient", 2,
                                  LongArr, WGS, results,
                                  IntConst, WGS);

  // Fill in here:
  runKernel( kernel, 1, global, local);

  long tot = 0;
  int l;
  for(l = 0; l < WGS; l ++) tot += results[l];
  
  printf("C: Sum of Totients between [%ld..%ld] is %ld\n",
    lower, upper, tot);
  return 0;
}
Пример #2
0
DispersalFunctions::DispersalFunctions(Config *settings0, Random* rand0,
    bool cuda) :
  sb_old(0), hb_old(0), settings(settings0), _rand(rand0), _cuda(cuda)
{
  if (_cuda)
    {
      hb = settings->getHB(0);
      sb = settings->getSB(0);
      sb_old = sb;
      hb_old = hb;
      initGPU(sb->getF(), sb->getMaxX(), sb->getMaxY(), hb->getF(),
          hb->getMaxX(), hb->getMaxY());
    }
}
Пример #3
0
void initMMU(MMU * pmmu)
{
	bzero((void*)pmmu,sizeof(MMU));
	uint8_t bios[BIOS_SIZE] = __BIOS__ ;
	memcpy(pmmu->bios, bios, BIOS_SIZE);
	pmmu->bios_enabled = 1;
	pmmu->rom_bank = 1;
	pmmu->ram_bank_enable = 1;
	pmmu->mbc1_mode = MBC1_16_8_MODE;
	initGPU(&(pmmu->gpu));
	#ifdef USE_ADDRESS_LUT
	fillAddressLUT(pmmu);
	#endif
}
Пример #4
0
// -----------------------------------------------------------------------------
SceneManager::SceneManager(const glm::ivec2 screenSize,
                           SceneContainer *container)
    : m_world(container->getWorld()), m_camera(container->getCamera()),
      #ifndef WINDOWS
      m_textManager(TextManager(FONT_PATH + FONT_FILE, FONT_HEIGHT, screenSize)),
      #endif
      m_drawer(screenSize),
      m_shadowManager(screenSize),
      m_BACKGROUND_COLOR(container->getBackgroundColor()),
      m_positionMutex(SDL_CreateMutex()) {
  m_lightMask = (2 << (m_world->getLightsNumber() - 1)) - 1;
  setupProjection(screenSize);
  initGPU(container);
  glClearColor(m_BACKGROUND_COLOR.x, m_BACKGROUND_COLOR.y, m_BACKGROUND_COLOR.z,
               m_BACKGROUND_COLOR.w);
  checkOpenGLError("GLInitializer: glClearColor");
}
Пример #5
0
void  init_compression(queue * fifo,int maxit,int numb,int bsize)
{
    maxiterations=maxit;
    numblocks=numb;
    blocksize=bsize;

    printf("Initializing the GPU\n");
    initGPU();
    //create consumer threades
    pthread_create (&congpu, NULL, gpu_consumer, fifo);
    pthread_create (&concpu, NULL, cpu_consumer, fifo);
    pthread_create (&consend, NULL, cpu_sender, fifo);



    return;
}
Пример #6
0
int computeFitness(int * c_position, int * c_velocity, int * p_angle, int * p_velocity, int * fitness, int n)
{
	if (!initiated) {
		initGPU(n);
		initiated = 1;
	}

	#pragma mark Writing memory
	// Allocate memory on the device to hold our data and store the results into
	buffer_size = sizeof(int) * n;

	err = clEnqueueWriteBuffer(cmd_queue, mem_c_position, CL_TRUE, 0, buffer_size, (void *) c_position, 0, NULL, NULL);
	err |= clEnqueueWriteBuffer(cmd_queue, mem_c_velocity, CL_TRUE, 0, buffer_size, (void *) c_velocity, 0, NULL, NULL);
	err |= clEnqueueWriteBuffer(cmd_queue, mem_p_angle, CL_TRUE, 0, buffer_size, (void *) p_angle, 0, NULL, NULL);
	err |= clEnqueueWriteBuffer(cmd_queue, mem_p_velocity, CL_TRUE, 0, buffer_size, (void *) p_velocity, 0, NULL, NULL);
	assert(err == CL_SUCCESS);

	// Get all of the stuff written and allocated
	clFinish(cmd_queue);

	#pragma mark Kernel Arguments
	// Now setup the arguments to our kernel
	err  = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void *) &mem_c_position);
	err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void *) &mem_c_velocity);
	err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), (void *) &mem_p_angle);
	err |= clSetKernelArg(kernel[0], 3, sizeof(cl_mem), (void *) &mem_p_velocity);
	err |= clSetKernelArg(kernel[0], 4, sizeof(cl_mem), (void *) &mem_fitness);
	assert(err == CL_SUCCESS);

	#pragma mark Execution and Reading memory
	// Run the calculation by enqueuing it and forcing the 
	// command queue to complete the task
	size_t global_work_size = n;
	err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
	assert(err == CL_SUCCESS);
	clFinish(cmd_queue);

	// Once finished read back the results from the answer 
	// array into the results array
	err = clEnqueueReadBuffer(cmd_queue, mem_fitness, CL_TRUE, 0, buffer_size, fitness, 0, NULL, NULL);
	assert(err == CL_SUCCESS);
	clFinish(cmd_queue);

	return CL_SUCCESS;
}
Пример #7
0
void gpu_init(H264Context *h)
{
  GPUH264Context * const g = &h->gpu;
  MpegEncContext * const s = &h->s;
  int pic_width = 16*s->mb_width, pic_height = 16*s->mb_height;
  int i;
  tp_3d.texTarget		= GL_TEXTURE_3D;
  tp_3d.texInternalFormat       = GL_LUMINANCE8;
  tp_3d.texFormat	        = GL_LUMINANCE;

  if(g->init) {
      printf("gpu_init called twice (Why?)\n");
      return;
    }
  //screenWidth = 1920, screenHeight = 1088;
  screenWidth = pic_width, screenHeight = pic_height;
  printf("Initializing GPU Context\n");
  initGPU(screenWidth, screenHeight);
  initGPGPU(screenWidth, screenHeight);
  glEnable(tp_3d.texTarget);

  //RUDD TEMP DPB is fixed at 64 for now
  //Nearest Power of 2?
  g->dpb_tex = createTexture(screenWidth, screenHeight, 16, tp_3d); 
  g->dpb_free = ~0x0;

  //RUDD TEST for comparison
  g->lum_residual = av_mallocz(s->linesize   * s->mb_height * 16 * sizeof(short));
  g->cr_residual  = av_mallocz(s->uvlinesize * s->mb_height * 8  * sizeof(short));
  g->cb_residual  = av_mallocz(s->uvlinesize * s->mb_height * 8  * sizeof(short));

  //RUDD TODO size?
  g->block_buffer = av_mallocz(9000*sizeof(H264mb));
  g->init = 1;

  setup_shaders(g);
}
Пример #8
0
int main(int argc, char* argv[])
{
    atexit(AtExit);
    if (argc < 2) {
        printf("Usage:\n");

#ifdef MODULE_SUPPORT
        printf("%s <in.ncch> [-d|-noscreen|-codepatch <code>|-modules <num> <in.ncch>|-overdrivlist <num> <services>|-sdmc <path>|-sysdata <path>|-sdwrite|-slotone|-configsave|-gdbport <port>]\n", argv[0]);
#else
        printf("%s <in.ncch> [-d|-noscreen|-codepatch <code>|-sdmc <path>|-sysdata <path>|-sdwrite|-slotone|-configsave|-gdbport <port>]\n", argv[0]);
#endif

        return 1;
    }

    //disasm = (argc > 2) && (strcmp(argv[2], "-d") == 0);
    //noscreen =    (argc > 2) && (strcmp(argv[2], "-noscreen") == 0);

    for (int i = 2; i < argc; i++) {
        if ((strcmp(argv[i], "-d") == 0))disasm = true;
        else if ((strcmp(argv[i], "-noscreen") == 0))noscreen = true;
        else if ((strcmp(argv[i], "-codepatch") == 0)) {
            i++;
            codepath = malloc(strlen(argv[i]));
            strcpy(codepath, argv[i]);
        }
        else if ((strcmp(argv[i], "-sdmc") == 0))
        {
            i++;
            strcpy(config_sdmc_path, argv[i]);
            config_has_sdmc = true;
        }
        else if ((strcmp(argv[i], "-sysdata") == 0))
        {
            i++;
            strcpy(config_sysdataoutpath, argv[i]);
            config_usesys = true;
        }
        else if ((strcmp(argv[i], "-sdwrite") == 0))config_slotone = true;
        else if ((strcmp(argv[i], "-slotone") == 0))config_sdmcwriteable = true;
        else if ((strcmp(argv[i], "-configsave") == 0))config_nand_cfg_save = true;

#ifdef GDB_STUB
        if ((strcmp(argv[i], "-gdbport") == 0))
        {
            i++;
            global_gdb_port = atoi(argv[i]);
            if (global_gdb_port < 1 || global_gdb_port > 65535) {
                DEBUG("ARM9 GDB stub port must be in the range 1 to 65535\n");
                exit(-1);
            }
            gdb_ctrl_iface.stall = stall_cpu;
            gdb_ctrl_iface.unstall = unstall_cpu;
            gdb_ctrl_iface.read_reg = read_cpu_reg;
            gdb_ctrl_iface.set_reg = set_cpu_reg;
            gdb_ctrl_iface.install_post_ex_fn = install_post_exec_fn;
            gdb_ctrl_iface.remove_post_ex_fn = remove_post_exec_fn;

            gdb_base_memory_iface.prefetch16 = gdb_prefetch16;
            gdb_base_memory_iface.prefetch32 = gdb_prefetch32;
            gdb_base_memory_iface.read32 = gdb_read32;
            gdb_base_memory_iface.write32 = gdb_write32;
            gdb_base_memory_iface.read16 = gdb_read16;
            gdb_base_memory_iface.write16 = gdb_write16;
            gdb_base_memory_iface.read8 = gdb_read8;
            gdb_base_memory_iface.write8 = gdb_write8;

            


        }
#endif
        if (i >= argc)break;


#ifdef MODULE_SUPPORT
        if ((strcmp(argv[i], "-modules") == 0)) {
            i++;
            modulenum = atoi(argv[i]);
            modulenames = malloc(sizeof(char*)*modulenum);
            i++;
            for (int j = 0; j < modulenum; j++) {
                *(modulenames + j) = malloc(strlen(argv[i]));
                strcpy(*(modulenames + j), argv[i]);
                i++;
            }
        }
        if (i >= argc)break;
        if ((strcmp(argv[i], "-overdrivlist") == 0)) {
            i++;
            overdrivnum = atoi(argv[i]);
            overdrivnames = malloc(sizeof(char*)*modulenum);
            i++;
            for (int j = 0; j < modulenum; j++) {
                *(overdrivnames + j) = malloc(strlen(argv[i]));
                strcpy(*(overdrivnames + j), argv[i]);
                i++;
            }
        }
        if (i >= argc)break;
#endif

    }

#ifdef MODULE_SUPPORT
    curprocesshandlelist = malloc(sizeof(u32)*(modulenum + 1));
    ModuleSupport_MemInit(modulenum);
#endif

    signal(SIGINT, AtSig);

    if (!noscreen)
        screen_Init();
    hid_spvr_init();
    hid_user_init();
    initDSP();
    mcu_GPU_init();
    initGPU();
    srv_InitGlobal();


    arm11_Init();

#ifdef MODULE_SUPPORT
    int i;

    for (i = 0; i<modulenum; i++) {
        u32 handzwei = handle_New(HANDLE_TYPE_PROCESS, 0);
        curprocesshandle = handzwei;
        *(curprocesshandlelist + i) = handzwei;

        ModuleSupport_SwapProcessMem(i);

        u32 hand = handle_New(HANDLE_TYPE_THREAD, 0);
        threads_New(hand);

        // Load file.
        FILE* fd = fopen(*(modulenames + i), "rb");
        if (fd == NULL) {
            perror("Error opening file");
            return 1;
        }

        if (loader_LoadFile(fd) != 0) {
            fclose(fd);
            return 1;
        }
    }

    u32 handzwei = handle_New(HANDLE_TYPE_PROCESS, 0);
    *(curprocesshandlelist + modulenum) = handzwei;
    ModuleSupport_SwapProcessMem(modulenum);
#else
    u32 handzwei = handle_New(HANDLE_TYPE_PROCESS, 0);
    curprocesshandle = handzwei;
#endif

    FILE* fd = fopen(argv[1], "rb");
    if (fd == NULL) {
        perror("Error opening file");
        return 1;
    }

    u32 hand = handle_New(HANDLE_TYPE_THREAD, 0);
    threads_New(hand);

    // Load file.
    if (loader_LoadFile(fd) != 0) {
        fclose(fd);
        return 1;
    }

#ifdef GDB_STUB
    if (global_gdb_port)
    {
        gdb_stub = createStub_gdb(global_gdb_port,
            &gdb_memio,
            &gdb_base_memory_iface);
        if (gdb_stub == NULL) {
            DEBUG("Failed to create ARM9 gdbstub on port %d\n",
                global_gdb_port);
            exit(-1);
        }
        activateStub_gdb(gdb_stub, &gdb_ctrl_iface);
    }
#endif
    // Execute.
    while (running) {
        if (!noscreen)
            screen_HandleEvent();

#ifdef MODULE_SUPPORT
        int k;
        for (k = 0; k <= modulenum; k++) {
            ModuleSupport_SwapProcess(k);
            DEBUG("Process %X\n",k);
        }
#else
        threads_Execute();
#endif

        if (!noscreen)
            screen_RenderGPU();

        FPS_Lock();
        //mem_Dbugdump();
    }


    fclose(fd);
    return 0;
}
Пример #9
0
void* CDFAlign::ThreadFunc_cuAlign(void* p)
{
	CDFAlign *pThis=(CDFAlign *)p;
	APARA &para=pThis->m_para;
	pThis->m_bRun=true;

	char str[512];
	int i,j;
	bool bSuccess=false;

	//open stack file
	MRC stack;
	if(stack.open(pThis->m_fnStack,"rb")<=0)
	{
		sprintf(str,"Error: Failed to open stack %s .",pThis->m_fnStack);
		Message(str);
		pThis->m_bRun=false;
		return (void *)0;
	}

	//get image size
	int nx=stack.getNx();
	int ny=stack.getNy();
	int nz=stack.getNz();
	sprintf(str,"\nInput Stack: Nx(%d) Ny(%d) Nz(%d)\n\n",nx,ny,nz);
	pThis->TextOutput(str);

	
	int bin=para.bin;
	if(bin<=0) return (void *)0;

	int offsetx=para.crop_offsetx;
	int offsety=para.crop_offsety;
	DIM nsamUnbin=verifyCropSize(nx,ny, offsetx, offsety, para.crop_nsam,bin);
	if(nsamUnbin.x<=0 || nsamUnbin.y<=0 )
	{
		Message("Error: Wrong image Size.");
		pThis->m_bRun=false;
		return (void *)0;
	}

	DIM nsam=nsamUnbin/bin;
	
	int nsamb=nsam.width()+2;
	if(bin==1) sprintf(str,"Crop Image: Offset(%d %d) Dim(%d %d)\n\n",
							offsetx,offsety,nsamUnbin.x,nsamUnbin.y);
	else sprintf(str,"Crop Image: Offset(%d %d) RawDim(%d %d) BinnedDim(%d %d)\n\n",
							offsetx,offsety,nsamUnbin.x,nsamUnbin.y,nsam.x,nsam.y);
	pThis->TextOutput(str);
	pThis->m_nsam=nsam;
	//pThis->m_nsamRaw=nx/bin;

	//allocate memeory	
	size_t size=nsam.width()*nsam.height();
	size_t sizeb=nsamb*nsam.height();
	int sizebUnbin=(nsamUnbin.width()+2)*nsamUnbin.height();
	if(para.nStart<0) para.nStart=0;
	if(para.nEnd>=nz) para.nEnd=nz-1;
	int nframe=para.nEnd-para.nStart+1;
	pThis->UpdateDisplay();

	//host memory
	float *bufmrc=new float[nx*ny];
	float *bufmrcfft=new float[sizebUnbin];
	float *htmp=new float[sizeb];
	float *hbuf=new float[sizeb*nframe];  //host memory for entir stack
	float *hdisp=new float[sizeb];
	float *hsumRaw=new float[sizeb];
	float *hsumCorr=new float[sizeb];
	float *hFSCRaw0=new float[sizeb];  //even number
	float *hFSCRaw1=new float[sizeb];  //odd
	float *hFSCCorr0=new float[sizeb];  //even number
	float *hFSCCorr1=new float[sizeb];   //odd
	size_t refsize=0;
	if(para.bDark) 
	{
		if(pThis->m_pDark!=0) delete [] pThis->m_pDark;
		pThis->m_pDark=ReadRef(pThis->m_fnDark,nx,ny);
		if(pThis->m_pDark==0) 
		{
			Message("Failed to get dark reference.");	
			pThis->m_bRun=false;
			return (void *)0;
		}
		refsize+=nx*ny;
	}
	if(para.bGain) 
	{
		if(pThis->m_pGain!=0) delete [] pThis->m_pGain;
		pThis->m_pGain=ReadRef(pThis->m_fnGain,nx,ny);
		if(pThis->m_pGain==0) 
		{
			Message("Failed to get gain reference.");	
			pThis->m_bRun=false;
			return (void *)0;
		}
		refsize+=nx*ny;
	}
	
	
	sprintf(str,"Allocate host memory: %f Gb\n",(nx*ny+sizeb*(nframe+8)+sizebUnbin+refsize)/256.0/1024.0/1024.0);
	pThis->TextOutput(str);
	if(hbuf==0)
	{
		if(bufmrc!=NULL) delete [] bufmrc;
		Message("Failed to allocate host memeory.");
		pThis->m_bRun=false;
		return (void *)0;
	}


	//device memory
	bool success=initGPU(para.GPUNum);
	if(!success)
	{
		sprintf(str,"Failed to initialize GPU #%d.",para.GPUNum);
		Message(str);
		delete [] bufmrc;
		delete [] hbuf;
		pThis->m_bRun=false;
		return (void *)0;
	}
	

	float *dsum=0;
	float *dsumcorr=0;
	float *dfft=0;
	float *dtmp=0;
	GPUMemAlloc((void **)&dsum,sizeof(float)*sizeb);	
	GPUMemAlloc((void **)&dsumcorr,sizeof(float)*sizeb);	
	GPUMemAlloc((void **)&dtmp,sizeof(float)*sizeb);
	cufftHandle fft_plan,ifft_plan;
	
	//prepare fft for unbinned image
	fft_plan=GPUFFTPlan(nsamUnbin);
	GPUSync();
	GPUMemAlloc((void **)&dfft,sizeof(float)*sizebUnbin);


	//make a list 
	int sizec=(nsam.width()/2+1)*nsam.height();
	MASK *hPosList=new MASK[sizec];
	MASK *dPosList=0;
	MkPosList(hPosList,nsam,para.bfactor);
	GPUMemAlloc((void **)&dPosList,sizeof(MASK)*sizec);
	GPUMemH2D((void **)dPosList,(void **)hPosList,sizeof(MASK)*sizec);

	size_t theFree, theTotal;
	GPUMemCheck(theFree,theTotal);
	sprintf(str,"GPU memory:  free:%.0fMb    total:%.0fMb\n", theFree/1024.0/1024.0, theTotal/1024.0/1024.0);
	pThis->TextOutput(str);

	//Read stack
	pThis->TextOutput("\nRead stack:\n");
	
	float sx=0;
	float sy=0;
	float shiftx,shifty,cc;
	float avgcc=0.0;
	bool bFSCEven=true;
	
	//prepare frame sum numbers
	//Only the frame inside sum range is used for stack and sum output 
	int nStartSum=para.nStartSum-para.nStart;
	int nEndSum=para.nEndSum-para.nStart;	
	if(nStartSum<0) nStartSum=0;
	if(para.nEndSum>para.nEnd) nEndSum=para.nEnd-para.nStart+1;
	if(nEndSum<=nStartSum || nEndSum>=nframe) nEndSum=nframe-1;

	//1. calculate sum
	GPUMemZero((void **)&dsum,sizeof(float)*sizeb);
	GPUSync();
	GPUMemZero((void **)&dsumcorr,sizeof(float)*sizeb);
	GPUSync();
	for(j=para.nStart;j<=para.nEnd;j++)
	{
		//read from file and crop
		if(stack.read2DIm_32bit(bufmrc,j)!=stack.getImSize())
		{
			sprintf(str,"Error when reading #%03d\n",j);
			pThis->TextOutput(str);
		}

		if(para.flp) {	//flip image along y axis
			FlipYAxis(bufmrc, nx, ny);
		}

		//apply gain and dark reference
		if(!ApplyRef(bufmrc, para.bDark, pThis->m_pDark, para.bGain, pThis->m_pGain, nx*ny))
		{
			sprintf(str,"Error when applying dark and/or gain to #%03d\n",j);
			pThis->TextOutput(str);
		}
		
		
		crop2fft(bufmrc,DIM(nx,ny),bufmrcfft,offsetx,offsety,nsamUnbin,bin);
		
		//copy to GPU
		GPUMemH2D((void *)dfft,(void *)bufmrcfft,sizeof(float)*sizebUnbin);
		//do fft
		GPUFFT2d(dfft,fft_plan);
		GPUSync();

		//do binning
		if(bin>1)
		{
			GPUMemBinD2D(dtmp, dfft, nsam, nsamUnbin);
			GPUMemD2D(dfft, dtmp, sizeof(float)*sizeb);
		}

		//Sum
		if( (j-para.nStart)>=nStartSum && (j-para.nStart)<=nEndSum )
		{
			if(bFSCEven) GPUAdd(dsum,dfft,sizeb);
			else GPUAdd(dsumcorr,dfft,sizeb);
			bFSCEven=!bFSCEven;
		}
		//copy ffted image to host
		GPUMemD2H((void *)(hbuf+(j-para.nStart)*sizeb),(void *)dfft,sizeof(float)*sizeb);
		GPUSync();
		
		sprintf(str,"......Read and sum frame #%03d   mean:%f\n",j,(hbuf+(j-para.nStart)*sizeb)[0]/nsam.x/nsam.y);
		pThis->TextOutput(str);
		
	}
	GPUMemD2H((void *)hFSCRaw0,(void *)dsum,sizeof(float)*sizeb);
	GPUMemD2H((void *)hFSCRaw1,(void *)dsumcorr,sizeof(float)*sizeb);
	GPUAdd(dsum,dsumcorr,sizeb);
	GPUSync();
	

	//free memory for unbined image
	delete [] bufmrcfft;
	bufmrcfft=0;
	GPUMemFree((void **)&dfft);
	GPUFFTDestroy(fft_plan);
	fft_plan=0;	
	//finish GPU memory allocate
	GPUMemAlloc((void **)&dfft,sizeof(float)*sizeb);
	GPUMemZero((void **)&dsumcorr,sizeof(float)*sizeb);
	GPUSync();
	ifft_plan=GPUIFFTPlan(nsam);
	GPUSync();
	//copy sum image to host for save and display
	if(para.bDispFFTRaw || para.bSaveRawSum)
	{
		GPUIFFT2d(dsum,ifft_plan);
		GPUSync();
		GPUMultiplyNum(dsum,1.0/size,sizeb);
		GPUMemD2H((void *)hsumRaw,(void *)dsum,sizeof(float)*sizeb);
		fft2buf(bufmrc,hsumRaw,nsam);
	}

	//save
	MRC mrcraw;
	if(para.bSaveRawSum)
	{
		//write to file
		mrcraw.open(pThis->m_fnRawsum,"wb");
		mrcraw.createMRC(bufmrc,nsam.width(),nsam.height(),1);
		//stats
		sprintf(str,"Mean=%f   Min=%f   Max=%f\n",
					mrcraw.m_header.dmean,mrcraw.m_header.dmin,mrcraw.m_header.dmax);
		pThis->TextOutput(str);
		mrcraw.close();
		sprintf(str,"Save Uncorrected Sum to: %s\n",pThis->m_fnRawsum);
		pThis->TextOutput(str);
	}
	
	
	//save un-corrected stack
	MRC stackRaw;
	if(para.bSaveStackRaw)
	{
		pThis->TextOutput("\nWrite uncorrected stack:\n");
		stackRaw.open(pThis->m_fnStackRaw,"wb");
		stackRaw.m_header.nx=nsam.x;
		stackRaw.m_header.ny=nsam.y;
		stackRaw.m_header.nz=nEndSum-nStartSum+1;
		stackRaw.updateHeader();

		for(j=nStartSum;j<=nEndSum;j++)
		{
			//copy to GPU
			GPUMemH2D((void *)dfft,(void *)(hbuf+j*sizeb),sizeof(float)*sizeb);
			//ifft
			GPUIFFT2d(dfft,ifft_plan);
			GPUSync();
			GPUMultiplyNum(dfft,1.0/size,sizeb);
			GPUSync();
			GPUMemD2H((void *)htmp,(void *)dfft,sizeof(float)*sizeb);
			fft2buf(bufmrc,htmp,nsam);
			stackRaw.write2DIm(bufmrc,j-nStartSum);
			
			sprintf(str,"......Write frame #%03d\n",j+para.nStart);
			pThis->TextOutput(str);
		}
		MinMaxMean(bufmrc,nsam.x*nsam.y, stackRaw.m_header.dmin, stackRaw.m_header.dmax, stackRaw.m_header.dmean);
		stackRaw.updateHeader();
		sprintf(str,"Mean=%f   Min=%f   Max=%f\n",
					stackRaw.m_header.dmean,stackRaw.m_header.dmin,stackRaw.m_header.dmax);
		pThis->TextOutput(str);
		stackRaw.close();
		sprintf(str,"Save Uncorrected Stack to: %s\n",pThis->m_fnStackRaw);
		pThis->TextOutput(str);
	}

	// added by Wen Jiang
	// generate running average frames for CC
	float *hbuf_orig=hbuf;
	float *hbuf_ra=0;
	if(para.nrw>1) {
		sprintf(str,"\nStart generating running average of %d frames\n",para.nrw);
		pThis->TextOutput(str);

		hbuf_ra = new float[sizeb*nframe];  //host memory for entire stack
		memcpy(hbuf_ra, hbuf, sizeof(float)*sizeb*nframe);	// hbuf stores FFT of all frames
		for(int fi = 0; fi<nframe; fi++) {
			sprintf(str,"......Generating runing average of frame %d\n",fi);
			pThis->TextOutput(str);
			for(int ri=0; ri<para.nrw; ri++) {
				int fi2 = fi - para.nrw/2 + ri;
				if(fi2<0 || fi2==fi || fi2>=nframe) continue;
				for(size_t pi=0; pi<sizeb; pi++)
					hbuf_ra[sizeb*fi+pi]+=hbuf[sizeb*fi2+pi];
			}
		}
		hbuf = hbuf_ra;
		sprintf(str,"Running average of %d frames are generated\n\n",para.nrw);
		pThis->TextOutput(str);
	}

	//2. frame to frame shift
	pThis->TextOutput("\nCalculate relative drift between frames\n");
	Matrix<complex<double> > A;
	vector<complex<int> > compList;
	int ncomp=OD_SetEquation_All(A,compList, nframe, para.FrameDistOffset);
	Vector<complex<double> > b=Vector<complex<double> >(ncomp);
	int box=para.CCPeakSearchDim;
	float *hboxmap=new float[box*box*ncomp];
	int par0,par1;
	for(j=0;j<ncomp;j++)
	{
		par0=compList[j].real();
		par1=compList[j].imag();
		//copy to GPU
		GPUMemH2D((void *)dsum,(void *)(hbuf+par0*sizeb),sizeof(float)*sizeb);
		GPUMemH2D((void *)dfft,(void *)(hbuf+par1*sizeb),sizeof(float)*sizeb);
		//shift and cc
		sx=0;
		sy=0;
		GPUShiftCC(dfft, dsum, dPosList,sx, sy, nsam);
		GPUSync();
		//do ifft
		GPUIFFT2d(dfft,ifft_plan);
		GPUSync();
		//find shift
		cc=FindShift(dfft,nsam, hboxmap+j*box*box, box, shiftx, shifty, para.NoisePeakSize-1);
		b[j]=complex<double>(shiftx,shifty);
		avgcc+=cc;
		sprintf(str,"......%03d Frame #%03d VS #%03d xy-shift: %8.4f %8.4f      CC:%f\n",j,par0+para.nStart,par1+para.nStart,shiftx,shifty,cc);
		pThis->TextOutput(str);
	}

	// added by Wen Jiang
	// restore original stack buffer and delete running averages
	hbuf=hbuf_orig;
	if(hbuf_ra) delete[] hbuf_ra;

	//3. sovle overdetermined equation
	Vector<complex<double> > shift=lsSolver(A,b);
	Vector<double> ki=abs(A*shift-b);
	sprintf(str,"\n......ki: First round \n");
	pThis->TextOutput(str);
	for(j=0;j<ki.size();j++)
	{
		par0=compList[j].real();
		par1=compList[j].imag();
		sprintf(str,"......ki #%03d of Frame #%03d VS #%03d: %8.4lf \n",j+para.nStart,par0+para.nStart,par1+para.nStart,ki[j]);
		pThis->TextOutput(str);
	}
	sprintf(str,"................................Average ki: %8.4lf \n\n",sum(ki)/ki.size());
	pThis->TextOutput(str);
	//display CCMap
	if(para.bDispCCMap)
	{
		pThis->CCMapOutput(hboxmap,(void *)&ki);
	}
	//3.1 re-sovle overdetermined equation after removing large ki elments
	double kiThresh=para.kiThresh;
	vector<int> goodlist=OD_Threshold(A, b, ki, kiThresh);
	shift=lsSolver(A,b);
	ki=abs(A*shift-b);
	sprintf(str,"......ki: Second round \n");
	pThis->TextOutput(str);
	for(j=0;j<ki.size();j++)
	{
		par0=compList[goodlist[j] ].real();
		par1=compList[goodlist[j] ].imag();
		sprintf(str,"......ki #%03d of Frame #%03d VS #%03d: %8.4f \n",j+para.nStart,par0+para.nStart,par1+para.nStart,ki[j]);
		pThis->TextOutput(str);
	}
	sprintf(str,"................................Average ki: %8.4lf \n\n",sum(ki)/ki.size());
	pThis->TextOutput(str);

	//output final shift
	//calculate average shift
	double avgshift=0.0;
	for(j=0;j<shift.size();j++) avgshift+=abs(shift[j]);
	avgshift/=shift.size();
	sprintf(str,"Final shift (Average %8.4lf pixels/frame):\n",avgshift);
	//output
	pThis->TextOutput(str);
	vector<complex<double> > shiftlist;
	complex<double> totalshift=0;
	sprintf(str,"......Shift of Frame #%03d : %8.4f %8.4f\n",para.nStart,totalshift.real(),totalshift.imag());
	pThis->TextOutput(str);
	shiftlist.push_back(totalshift);
	for(j=0;j<shift.size();j++)
	{
		totalshift=totalshift+shift[j];
		sprintf(str,"......Shift of Frame #%03d : %8.4f %8.4f\n",j+para.nStart+1,totalshift.real(),totalshift.imag());
		pThis->TextOutput(str);
		shiftlist.push_back(totalshift);
	}
	pThis->PlotOutput(shiftlist);

	//save CCMap image
	if(para.bSaveCCmap) 
	{
		buf2mrc(pThis->m_fnCCmap,hboxmap,box,box,ncomp);
		sprintf(str,"Save CC map to: %s\n",pThis->m_fnCCmap);
		pThis->TextOutput(str);
	}
		

	MRC stackCorr;
	if(para.bSaveStackCorr)
	{
		stackCorr.open(pThis->m_fnStackCorr,"wb");
		stackCorr.m_header.nx=nsam.x;
		stackCorr.m_header.ny=nsam.y;
		stackCorr.m_header.nz=nEndSum-nStartSum+1;
		stackCorr.updateHeader();
	}

	//3. correct xy-shift

	
	//reset memory
	GPUMemZero((void **)&dsum,sizeof(float)*sizeb);
	GPUSync();
	GPUMemZero((void **)&dsumcorr,sizeof(float)*sizeb);
	GPUSync();
	//calculate middle frame shift
	complex<double> midshift=0.0;
	int RefFrame=0;
	if(para.bAlignToMid==1) {
        if(para.nStart > 0 || para.nEnd > 0) {
                RefFrame = para.nStart + (para.nEnd - para.nStart) / 2;
        }
        else {
                RefFrame=nz/2+1;
        }
	}
	if(para.bAlignToMid<=0) RefFrame=abs(para.bAlignToMid);
	if(para.bAlignToMid!=0) 
	{
		if(RefFrame<para.nStart) RefFrame=para.nStart;
		if(RefFrame>para.nEnd) RefFrame=para.nEnd;
		if(para.nStartSum>para.nEnd) para.nStartSum=para.nEnd;
		for(j=0;j<RefFrame-para.nStart;j++) midshift+=shift[j];
	}
	sprintf(str,"\nSum Frame #%03d - #%03d (Reference Frame #%03d):\n",nStartSum+para.nStart,nEndSum+para.nStart,RefFrame);
	pThis->TextOutput(str);

	//Add(copy) first frame to GPU
	totalshift=0;
	for(j=1;j<nStartSum+1;j++)
	{
		totalshift+=shift[j-1];
	}
	GPUMemH2D((void *)dsumcorr,(void *)(hbuf+nStartSum*sizeb),sizeof(float)*sizeb);
	if(para.bAlignToMid) GPUShift(dsumcorr,dPosList,-totalshift.real()+midshift.real(),-totalshift.imag()+midshift.imag(), nsam);
	GPUSync();
	bFSCEven=false;
	sprintf(str,"......Add Frame #%03d with xy shift: %8.4lf %8.4lf\n",nStartSum+para.nStart,-totalshift.real()+midshift.real(),-totalshift.imag()+midshift.imag());
	pThis->TextOutput(str);
	//Save stack
	if(para.bSaveStackCorr)
	{
		GPUMemD2D((void *)dfft,(void *)dsumcorr,sizeof(float)*sizeb);
		GPUIFFT2d(dfft,ifft_plan);
		GPUSync();
		GPUMultiplyNum(dfft,1.0/size,sizeb);
		GPUSync();
		GPUMemD2H((void *)htmp,(void *)dfft,sizeof(float)*sizeb);
		fft2buf(bufmrc,htmp,nsam);
		stackCorr.write2DIm(bufmrc,0);
	}
	//*******
	//sum other frame
	for(j=nStartSum+1;j<=nEndSum;j++)
	{
		totalshift+=shift[j-1];
		
		//copy to GPU
		GPUMemH2D((void *)dfft,(void *)(hbuf+j*sizeb),sizeof(float)*sizeb);
		//shift
		GPUShift(dfft,dPosList,-totalshift.real()+midshift.real(),-totalshift.imag()+midshift.imag(), nsam);
		GPUSync();
		//Sum
		if(bFSCEven) GPUAdd(dsumcorr,dfft,sizeb);
		else GPUAdd(dsum,dfft,sizeb);
		bFSCEven=!bFSCEven;

		sprintf(str,"......Add Frame #%03d with xy shift: %8.4lf %8.4lf\n",j+para.nStart,-totalshift.real()+midshift.real(),-totalshift.imag()+midshift.imag());
		pThis->TextOutput(str);

		//save stack
		if(para.bSaveStackCorr)
		{
			GPUIFFT2d(dfft,ifft_plan);
			GPUSync();
			GPUMultiplyNum(dfft,1.0/size,sizeb);
			GPUSync();
			GPUMemD2H((void *)htmp,(void *)dfft,sizeof(float)*sizeb);
			fft2buf(bufmrc,htmp,nsam);
			stackCorr.write2DIm(bufmrc,j-nStartSum);
		}
	}
	if(para.bSaveStackCorr) 
	{
		MinMaxMean(bufmrc,nsam.x*nsam.y,stackCorr.m_header.dmin, stackCorr.m_header.dmax, stackCorr.m_header.dmean);
	}
	//final sum
	GPUMemD2H((void *)hFSCCorr0,(void *)dsumcorr,sizeof(float)*sizeb);
	GPUMemD2H((void *)hFSCCorr1,(void *)dsum,sizeof(float)*sizeb);
	GPUAdd(dsumcorr,dsum,sizeb);
	GPUSync();

	//copy binned sum to display
	if(para.bDispSumCorr)
	{
		DIM dispdim(DISPDIM,DISPDIM);
		if(nsam.x<nsam.y) dispdim.x=int(DISPDIM*float(nsam.x)/float(nsam.y)+0.5)/2*2;
		if(nsam.x>nsam.y) dispdim.y=int(DISPDIM*float(nsam.y)/float(nsam.x)+0.5)/2*2;
		
		pThis->m_dispdim=dispdim;
		GPUMemBinD2H(hdisp, dsumcorr, dispdim, nsam);
		pThis->ImageOutput(hdisp);
	}

	//copy sum image to host
	float *tsum=dsumcorr;
	GPUIFFT2d(tsum,ifft_plan);
	GPUMultiplyNum(tsum,1.0/size,sizeb);
	GPUMemD2H((void *)hsumCorr,(void *)tsum,sizeof(float)*sizeb);
	fft2buf(bufmrc,hsumCorr,nsam);
	
	//save
	MRC mrc;
	mrc.open(pThis->m_fnAlignsum,"wb");
	mrc.createMRC(bufmrc,nsam.width(),nsam.height(),1);
	//stats
	sprintf(str,"Mean=%f   Min=%f   Max=%f\n",mrc.m_header.dmean,mrc.m_header.dmin,mrc.m_header.dmax);
	pThis->TextOutput(str);
	mrc.close();
	sprintf(str,"Save Corrected Sum to: %s\n",pThis->m_fnAlignsum);
	pThis->TextOutput(str);
	
	//close save Corrected stack
	if(para.bSaveStackCorr) 
	{
		stackCorr.updateHeader();
		pThis->TextOutput("\nWrite corrected stack:\n");
		sprintf(str,"Mean=%f   Min=%f   Max=%f\n",
					stackCorr.m_header.dmean,stackCorr.m_header.dmin,stackCorr.m_header.dmax);
		pThis->TextOutput(str);
		stackCorr.close();
		sprintf(str,"Save Corrected Stack to: %s\n",pThis->m_fnStackCorr);
		pThis->TextOutput(str);
	}

	if(para.bLogFSC)
	{
		complex<double> avgshift=0.0;
		for(i=0;i<shift.size();i++)
		{
			avgshift+=shift[i]/abs(shift[i]);
		}
		pThis->PlotFSC((cuComplex *)hFSCRaw0, (cuComplex *)hFSCRaw1, (cuComplex *)hFSCCorr0, 
					(cuComplex *)hFSCCorr1,hPosList,nsam,avgshift);
	}
	
	
	//free GPU FFT plan, new plan will be created for rectangular image
	//GPUFFTDestroy(fft_plan);
	GPUFFTDestroy(ifft_plan);


	///////////////////////////
	DIM nsamsub=nsam.MinSquare();
	//prepare new fft
	if(para.bDispFFTRaw || para.bDispFFTCorr) fft_plan=GPUFFTPlan(nsamsub);
	//Make Raw fft modulus for display
	if(para.bDispFFTRaw)
	{
		GPUMemH2D((void *)dsum,(void *)hsumRaw,sizeof(float)*sizeb);
		GPURectFFTLogModulus(dfft, dsum, dtmp, dsumcorr, nsam, para.fftscale,fft_plan);
		//copy to host, make pwr image
		GPUMemD2H(htmp,dfft,sizeof(float)*(nsamsub.width()/2+1)*nsamsub.height());
		FFTModulusToDispBuf(htmp,hdisp, nsamsub);
		//copy back to device
		GPUMemH2D(dtmp,hdisp,sizeof(float)*(nsamsub.width()+2)*nsamsub.height());
		//do binning
		GPUBinFFT(dfft, DISPDIM, dtmp, nsamsub, fft_plan);
		GPUMemD2H((void *)hdisp,(void *)dfft,sizeof(float)*(DISPDIM+2)*DISPDIM);
		//display
		pThis->FFTOutputRaw(hdisp); 
	}
	//Make Corrected fft modulus for display
	if(para.bDispFFTCorr)
	{
		GPUMemH2D((void *)dsum,(void *)hsumCorr,sizeof(float)*sizeb);
		GPURectFFTLogModulus(dfft, dsum, dtmp, dsumcorr, nsam, para.fftscale,fft_plan);
		//copy to host, make pwr image
		GPUMemD2H(htmp,dfft,sizeof(float)*(nsamsub.width()/2+1)*nsamsub.height());
		FFTModulusToDispBuf(htmp,hdisp, nsamsub);
		//copy back to device
		GPUMemH2D(dtmp,hdisp,sizeof(float)*(nsamsub.width()+2)*nsamsub.height());
		//do binning
		GPUBinFFT(dfft, DISPDIM, dtmp, nsamsub, fft_plan);
		GPUMemD2H((void *)hdisp,(void *)dfft,sizeof(float)*(DISPDIM+2)*DISPDIM);
		//display
		pThis->FFTOutputCorr(hdisp);   
	}	
	//destory fft
	if(para.bDispFFTRaw || para.bDispFFTCorr) GPUFFTDestroy(fft_plan);
	/////////////////////////////////////////

	
	
	

	delete [] bufmrc;
	delete [] hbuf;
	delete [] hPosList;
	GPUMemFree((void **)&dPosList);
	GPUMemFree((void **)&dsum);
	GPUMemFree((void **)&dsumcorr);
	GPUMemFree((void **)&dfft);
	GPUMemFree((void **)&dtmp);


	delete [] htmp;
	delete [] hboxmap;
	delete [] hdisp;
	delete [] hsumRaw;
	delete [] hsumCorr;
	delete [] hFSCRaw0;
	delete [] hFSCRaw1;
	delete [] hFSCCorr0;
	delete [] hFSCCorr1;

	ResetGPU();
	pThis->Done();
	
	sprintf(str,"Done.\n");
	pThis->TextOutput(str);

	return (void *)0;
}
Пример #10
0
void Host::initialize() {
    initCPU();
    initGPU();
    initOS();
}
Пример #11
0
int main(int argc, char** argv)
{
    const int n = NN;
    const int m = NM;
    const int iter_max = 500;

    const double tol = 1.0e-6;
    double error     = 1.0;
    
    memset(A, 0, n * m * sizeof(double));
    memset(Anew, 0, n * m * sizeof(double));
        
    for (int j = 0; j < n; j++)
    {
        A[j][0]    = 1.0;
        Anew[j][0] = 1.0;
    }
    
#ifdef _OPENACC
  initGPU(argc, argv);
#endif
    
    printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
    
    StartTimer();
    int iter = 0;

#pragma acc data copy(A[0:NN][0:NM]), create(Anew[0:NN][0:NM])    
    while ( error > tol && iter < iter_max )
    {
        error = 0.0;

#pragma acc kernels present(A,Anew) 
    {
#pragma acc loop gang(512)        
		for( int j = 1; j < n-1; j++)
        {
            for( int i = 1; i < m-1; i++ )
            {
                Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
			+ A[j-1][i] + A[j+1][i]);
                error = fmax( error, fabs(Anew[j][i] - A[j][i]));
            }
        }

#pragma acc loop gang(512)        
        for( int j = 1; j < n-1; j++)
        {
            for( int i = 1; i < m-1; i++ )
            {
                A[j][i] = Anew[j][i];    
            }
        }
	}

        if(iter % 100 == 0) printf("%5d, %0.6f\n", iter, error);
        
        iter++;
    }

    double runtime = GetTimer();
/*
    int passed = checkSolution(error, &A[0][0], NN, NM, iter_max);
    if (passed == 0) {
        printf("Error check passed.\n");
    } else if (passed > 0) {
        printf("Error check failed.\n");
    }
*/ 
    printf(" total : %f s\n", runtime / 1000);
    return 0;
}