Exemplo n.º 1
0
// backproject from 2D to 3D for a single image
int bckpj3(Vec3i volsize, int nrays, int   nnz, float *dm, 
           Vec3i  origin, int    ri, int *ptrs, int *cord, 
           float      *x, float *y)
{
    int       i, j, iqx,iqy, xc, yc, zc;
    float     xb, yb, dx, dy, dx1m, dy1m, dxdy;
    int       status = 0; 

    int xcent = origin[0];
    int ycent = origin[1];
    int zcent = origin[2];

    int nx = volsize[0];
    int ny = volsize[1];

    // Phi: adding the shift parameters that get passed in as the last two entries of dm
    float sx, sy;

    sx = dm(7);
    sy = dm(8);


    if ( nx > 2*ri) {
	for (i = 1; i <= nrays; i++) {
	    zc = cord(1,i) - zcent;
	    yc = cord(2,i) - ycent;
            xc = cord(3,i) - xcent;

            xb = zc*dm(1)+yc*dm(2)+xc*dm(3) + xcent + sx;
            yb = zc*dm(4)+yc*dm(5)+xc*dm(6) + ycent + sy;

            for (j = ptrs(i); j <ptrs(i+1); j++) {
		iqx = ifix(xb);
		iqy = ifix(yb);

		dx = xb - iqx;
		dy = yb - iqy;
		dx1m = 1.0 - dx;
		dy1m = 1.0 - dy;
		dxdy = dx*dy;
/*
c               y(j) = y(j) + dx1m*dy1m*x(iqx  , iqy)
c     &                     + dx1m*dy  *x(iqx  , iqy+1)
c     &                     + dx  *dy1m*x(iqx+1, iqy)
c     &                     + dx  *dy  *x(iqx+1, iqy+1)  
c
c              --- faster version of the above commented out
c                  code (derived by summing the following table 
c                  of coefficients along  the colunms) ---
c
c                        1         dx        dy      dxdy
c                     ------   --------  --------  -------
c                      x(i,j)   -x(i,j)   -x(i,j)    x(i,j)  
c                                        x(i,j+1) -x(i,j+1)
c                              x(i+1,j)           -x(i+1,j)
c                                                x(i+1,j+1) 
c
*/
		// Phi: add index checking, now that shifts are being used
		if ( iqx <= nx && iqy <= ny && iqx >= 1 && iqy >= 1 ) {
		    y(j) += x(iqx,iqy);
		    if ( iqx + 1 <= nx && iqx + 1 >= 1 ) {
			y(j) += dx*(-x(iqx,iqy)+x(iqx+1,iqy));
		    }
		    if ( iqy + 1 <= ny && iqy + 1 >= 1 ) {
			y(j) += dy*(-x(iqx,iqy)+x(iqx,iqy+1));
		    }
		    if ( iqx + 1 <= nx && iqy + 1 <= ny && iqx + 1 >= 1 && iqy + 1 >= 1 ) {
			y(j) += dxdy*( x(iqx,iqy) - x(iqx,iqy+1) -x(iqx+1,iqy) + x(iqx+1,iqy+1) );
		    }
		}

//                y(j) += x(iqx,iqy)
//                     +  dx*(-x(iqx,iqy)+x(iqx+1,iqy))
//                     +  dy*(-x(iqx,iqy)+x(iqx,iqy+1))
//                     +  dxdy*( x(iqx,iqy) - x(iqx,iqy+1) 
//                              -x(iqx+1,iqy) + x(iqx+1,iqy+1) );

               xb += dm(1);
               yb += dm(4);
	    } // end for j
	} // end for i
     }
    else {
	fprintf(stderr, "bckpj3: nx must be greater than 2*ri\n");
    }

    return status;
}
 pointer at(std::size_t n)const{return ptrs()+n;}
Exemplo n.º 3
0
// project from 3D to 2D (single image)
int fwdpj3(Vec3i volsize, int nrays, int   nnz, float *dm, 
           Vec3i  origin, int    ri, int *ptrs, int *cord, 
           float      *x, float  *y)
{
    /*
        purpose:  y <--- proj(x)
        input  :  volsize  the size (nx,ny,nz) of the volume
                  nrays    number of rays within the compact spherical 
                           representation
                  nnz      number of voxels within the sphere
                  dm       an array of size 9 storing transformation 
                           associated with the projection direction
                  origin   coordinates of the center of the volume
                  ri       radius of the sphere
                  ptrs     the beginning address of each ray
                  cord     the coordinates of the first point in each ray
                  x        3d input volume
                  y        2d output image 
    */

    int    iqx, iqy, i, j, xc, yc, zc;
    float  ct, dipx, dipy, dipx1m, dipy1m, xb, yb, dm1, dm4;
    int    status = 0;
    
    // Phi: adding the shift parameters that get passed in as the last two entries of dm
    float sx, sy;

    sx = dm(7);
    sy = dm(8);

    int xcent = origin[0];
    int ycent = origin[1];
    int zcent = origin[2];

    int nx = volsize[0];
    int ny = volsize[1];

    dm1 = dm(1);
    dm4 = dm(4);
 
    if ( nx > 2*ri ) {
	for (i = 1; i <= nrays; i++) {

            zc = cord(1,i)-zcent;
            yc = cord(2,i)-ycent;
            xc = cord(3,i)-xcent;
            xb = zc* dm(1) +yc* dm(2) +xc* dm(3) + xcent + sx;
            yb = zc* dm(4) +yc* dm(5) +xc* dm(6) + ycent + sy;

            for (j = ptrs(i); j< ptrs(i+1); j++) {
               iqx = ifix(xb);
               iqy = ifix(yb);

  	       ct   = x(j);

               // dipx =  xb - (float)(iqx);
               // dipy = (yb - (float)(iqy)) * ct;
	           dipx =  xb - iqx;
	           dipy = (yb - iqy) * ct;

               dipy1m = ct - dipy;
               dipx1m = 1.0 - dipx;

			if (iqx <= nx && iqy <= ny && iqx >= 1 && iqy >= 1) 
               // y(iqx  ,iqy)   = y(iqx  ,iqy)   + dipx1m*dipy1m;
               y(iqx  ,iqy)   +=  dipx1m*dipy1m;
			if (iqx + 1 <= nx && iqy <= ny && iqx >= 0 && iqy >= 1) 
               // y(iqx+1,iqy)   = y(iqx+1,iqy)   + dipx*dipy1m; 
               y(iqx+1,iqy)   +=  dipx*dipy1m; 
			if (iqx + 1 <= nx && iqy + 1 <= ny && iqx >= 0 && iqy >= 0) 
               // y(iqx+1,iqy+1) = y(iqx+1,iqy+1) + dipx*dipy;         
               y(iqx+1,iqy+1) +=  dipx*dipy;         
			if (iqx <= nx && iqy + 1 <= ny && iqx >= 1 && iqy >= 0) 
               // y(iqx  ,iqy+1) = y(iqx  ,iqy+1) + dipx1m*dipy;
               y(iqx  ,iqy+1) +=  dipx1m*dipy;
               xb += dm1;
               yb += dm4;
	   }
	}
    }
    else {
	fprintf(stderr, " nx must be greater than 2*ri\n");
        exit(1);
    }
    return status;
}
 pointer begin()const{return ptrs();}
 pointer end()const{return ptrs()+size_;}
Exemplo n.º 6
0
void selection_properties_t::notify_save_inline_edit(const char * value)
{
	static_api_ptr_t<metadb_io_v2> tagger_api;
	if (strcmp(value, "<mixed values>"))
	{
		pfc::list_t<pfc::string8> values;
		const char *ptr = value, *start = ptr;
		while (*ptr)
		{
			start = ptr;
			while (*ptr != ';' && *ptr) ptr++;
			values.add_item(pfc::string8(start, ptr - start));
			while (*ptr == ' ' || *ptr == ';') ptr++;
		}

		t_size j, value_count = values.get_count();

		metadb_handle_list ptrs(m_edit_handles);
		pfc::list_t<file_info_impl> infos;
		pfc::list_t<bool> mask;
		pfc::list_t<const file_info *> infos_ptr;
		t_size i, count = ptrs.get_count();
		mask.set_count(count);
		infos.set_count(count);
		//infos.set_count(count);
		for (i = 0; i < count; i++)
		{
			assert(ptrs[i].is_valid());
			mask[i] = !ptrs[i]->get_info(infos[i]);
			infos_ptr.add_item(&infos[i]);
			if (!mask[i])
			{
				pfc::string8 old_value;
				g_print_field(m_edit_field, infos[i], old_value);
				if (!(mask[i] = !((strcmp(old_value, value)))))
				{
					infos[i].meta_remove_field(m_edit_field);
					for (j = 0; j < value_count; j++)
						infos[i].meta_add(m_edit_field, values[j]);
				}
			}
		}
		infos_ptr.remove_mask(mask.get_ptr());
		ptrs.remove_mask(mask.get_ptr());

		{
			service_ptr_t<file_info_filter_impl>  filter = new service_impl_t<file_info_filter_impl>(ptrs, infos_ptr);
			tagger_api->update_info_async(ptrs, filter, GetAncestor(get_wnd(), GA_ROOT), metadb_io_v2::op_flag_no_errors | metadb_io_v2::op_flag_background | metadb_io_v2::op_flag_delay_ui, NULL);
		}
	}

	/*if (m_edit_index < m_fields.get_count())
	{
	(m_edit_column ? m_fields[m_edit_index].m_name : m_fields[m_edit_index].m_name_friendly) = value;
	pfc::list_t<t_list_view:: t_item_insert> items;
	items.set_count(1);
	items[0].m_subitems.add_item(m_fields[m_edit_index].m_name_friendly);
	items[0].m_subitems.add_item(m_fields[m_edit_index].m_name);
	replace_items(m_edit_index, items);
	}*/
	m_edit_column = pfc_infinite;
	m_edit_index = pfc_infinite;
	m_edit_field.reset();
	m_edit_handles.remove_all();
}
Exemplo n.º 7
0
 value_type* at(std::size_t n)const{return &ptrs()[n];}
Exemplo n.º 8
0
 value_type* end()const{return &ptrs()[size_];}
Exemplo n.º 9
0
 value_type* begin()const{return &ptrs()[0];}
Exemplo n.º 10
0
int main( int argc, char *argv[] )
{
#ifndef NOCATCH
	try
	{
#endif
		// Initialize geometry and input/output buffers
		TestCase testCase( argc, argv );
#ifndef PHYSICS
		int numInput = testCase.xres * testCase.yres;
#else
		int numInput = testCase.getSize();
#endif
		
		const int numOutput = numInput;
		
		
		int number_of_increments = 5;
		

		const char *BINARY_FILE_NAME = "prog.ptx";
		const char *SOURCE_FILE_NAME = "prog.cl";
		
		// Command line handling
		bool isBinary = false;
		
		for ( int i=1; i<argc; ++i )
		{
			const std::string arg( argv[i] );
			if ( arg == "-b" )
				isBinary = true;
			/*else
				throw std::runtime_error( "Invalid option "+arg );*/
		}
		
		// Load or compile program
		std::cerr << (isBinary ? "Loading" : "Compiling") << "...";
		//EDIT: Removing all of his clock functions, can use the VS Porfiler if necessary.
		//const my_clock_t tc0 = my_clock();
		
		CL::SingleFileSingleGPUSetup gpuSetup(
			isBinary ? BINARY_FILE_NAME : SOURCE_FILE_NAME,
			isBinary, COMPILER_FLAGS );
		
		//const my_clock_t tc1 = my_clock();
		std::cout<<"SETUP complete"<<std::endl;

		//std::cerr << "done in " << tdiff(tc0,tc1) << " seconds\n\n";
		
		#ifdef VERBOSE
		std::cerr << " ---- Build log\n" << gpuSetup.getBuildLog() << "\n";
		#endif
		
		// Save "binary" (PTX bytecode) for reuse
		if ( !isBinary )
		{
			std::ofstream f( BINARY_FILE_NAME );
			gpuSetup.writeBinary( f );
		}
		
		//EDIT: Changing code to get rid of kernel getptr
		//1) Define new kernel relocate 
		//2) Use function getPtrs to get the vector ptrs and use it to define size (ptrs.size()) and offset (sizeof(int)) on host.
		//3) Allocate some mem on GPU to store ptrs and use it in the relocate kernel.
		//4) Move the enqueue write here so that the WorldVolumePointer is defined.
		//5) As soon as the kernel is done, free the memory used for ptrs.
		// Import handles to OpenCL kernels (functions)


		CL::Kernel kernelTrace( gpuSetup, "trace" );
			// Kernel trace is the main kernel which does the navigation.
		CL::Kernel kernelRelocate( gpuSetup, "relocate" );
			// The kernel version of relocate. It is possible to run relocate without using the kernel straight on the host.
		CL::Kernel kernelTest( gpuSetup, "test");
			// Kernel to check for inconsistencies. 
		
		//EDIT: New kernel check
		CL::Kernel kernelCheck ( gpuSetup, "check");
			
		
		//EDIT2
		CL::Kernel kernelCheckGeometry(gpuSetup, "checkgeom");
			// Kernel to confirm gemoetry relocation happened as it should.	

		// Reserve GPU & host buffers
		//EDIT Getting size of ptrs
		int size = testCase.geom->ptrs_size();
		//REMOVE:
		std::cout<<"Size of ptrs is = "<<size<<std::endl;

		int size_of_logical_checks = 1000;
		
		const int auxBufSz = sizeof(cl_mem);
		
		// Page-locked buffers for fast DMA-IO
		CL::PinnedBufferPair gpuInput( gpuSetup, numInput*sizeof(StubParticle), CL_MEM_READ_WRITE, CL_MAP_WRITE );
		CL::PinnedBufferPair gpuOutput( gpuSetup, numOutput*sizeof(G4double), CL_MEM_WRITE_ONLY, CL_MAP_READ );
		CL::PinnedBufferPair gpuAux( gpuSetup, auxBufSz, CL_MEM_READ_WRITE, CL_MAP_READ | CL_MAP_WRITE );
		//EDIT
		CL::PinnedBufferPair ptrs( gpuSetup, size*2*sizeof(int), CL_MEM_READ_WRITE, CL_MAP_WRITE );
		CL::PinnedBufferPair result( gpuSetup,size_of_logical_checks*sizeof(cl_mem), CL_MEM_WRITE_ONLY, CL_MAP_READ );
			
		#if (GLOBAL_MODE ==1)
		// sizeof( size_t ) ?
		CL::Buffer Numbers_Of_Solid( gpuSetup, CL_MEM_READ_WRITE, numInput*sizeof(int));
		
		CL::Buffer Sum_Of_Solid( gpuSetup, CL_MEM_READ_WRITE, numInput*sizeof(int));
		
		CL::Buffer Solids( gpuSetup, CL_MEM_READ_WRITE, numInput*sizeof(SolidInfo));
		
		CL::Buffer Result_For_Current_Solid( gpuSetup, CL_MEM_READ_WRITE, numInput*sizeof(ResultInfo));
		
		CL::Buffer Compacter_Result( gpuSetup, CL_MEM_READ_WRITE, numInput*sizeof(FinalResult));

        #endif
		CL::Buffer nullVNode( gpuSetup, CL_MEM_READ_WRITE, 2 *sizeof(G4SmartVoxelNode ));
		CL::Buffer noStepArray ( gpuSetup, CL_MEM_READ_WRITE, numInput*sizeof ( bool ));
		CL::Buffer LocationArray( gpuSetup, CL_MEM_READ_WRITE, numInput * sizeof( PointInformation));
		std::cout<<"Pinned Buffers allocation complete"<<std::endl;
		// GPU only buffers
		//EDIT
		
		CL::Buffer gpuGeom( gpuSetup, CL_MEM_READ_WRITE, testCase.geom->size());
		//EDIT2:
		//gpuSetup.enqueueWriteBuffer( gpuGeom, testCase.geom->getBuffer() );
		
		


		std::cout<<"Device Buffers allocation complete"<<std::endl;

		std::memcpy( ptrs.getHostPtr(), &(testCase.geom->ptrs[0]), size*2*sizeof(GEOMTYPE) );
		//EDIT 2:
	   //check_navigation(ptrs.getHostPtr(), size);
		ptrs.transferToDevice();
		gpuSetup.finish();

		//EDIT
		kernelTest.setArg(0, result.getDeviceBuffer());
		if( GLOBAL_MODE  == 1)
		   kernelTest.setArg(1, noStepArray);
		

		gpuSetup.enqueueKernel( kernelTest, 8, 8);
		gpuSetup.finish();
		result.transferFromDevice();
		gpuSetup.finish();
		FinalResult * final;
		
		int * a = ( int *)(ptrs.getHostPtr());
		std::cout<<"Printing input: \n";
		for (int i=0; i<8; i++)
			std::cout<< a[i]  << " ";
		//EDIT : Printing the output array
		std::cout<<"Printing output: \n";
		for (int i=0; i<8; i++)
			std::cout<< (( int *)(result.getHostPtr()))[i] << " ";
		//EDIT: Changed kernel Test to fix Prefix Sum
		//std::cout<< " Values that were returned: ";
		//std::cout<<" For thread 1: ";
		//final = (FinalResult *)result.getHostPtr();
		//final += sizeof( FinalResult);
		//std::cout<<": Min. Step value = "<< final->step<< " and safety returned = "<< final->safety << std::endl;


		// MODIFY: have to loop through this and add as a check

		/*
		for( int i =0 ; i < 4; i++)
		{	
			std::cout<<" For thread "<< i;
			final = (FinalResult **)result.getHostPtr();
			std::cout<<": Min. Step value = "<< final[i]->step<< " and safety returned = "<< final[i]->safety << std::endl;
			
		}
		*/
		/*std::cout<<"\nOriginal values: ";
		for( int i =0 ; i<32; i++)
		{
		std::cout << ((int *)ptrs.getHostPtr())[i]<<" ";
		}
		*/

		//NOTE: kernelCheck is badly named. It was originally the replacement for kernel getPtr and was used to return the geometry start location on the GPU.
		// IT can also be used to check the sizes on CPU and GPu are consistent

		kernelCheck.setArg( 0 , gpuGeom);
		kernelCheck.setArg(	1, result.getDeviceBuffer());
		gpuSetup.enqueueKernel(kernelCheck, 1, 1);
		gpuSetup.finish();
		//EDIT
		result.transferFromDevice();
		gpuSetup.finish();

		std::cout<< "On the CPU, size of GEOMTYPE = " << sizeof( GEOMTYPE )<<"\n";
		//REMOVE:
		std::cout<<"Size of GEOMTYPE on GPU -> "<<*((int  *)result.getHostPtr())<<std::endl;
		//MODIFY:
			// Assert that these are equal here.

		//EDIT2:
		//int answer = *(int*)(result.getHostPtr());
		//std::cout<<"Result before is "<< answer<< std::endl;
		
		std::cout<< "About to run relocate, no problem so far\n";

		//EDIT2
		/*
		gpuSetup.enqueueWriteBuffer( gpuGeom, testCase.geom->getBuffer() );
		gpuSetup.finish();
		kernelRelocate.setArg( 0, ptrs.getDeviceBuffer());
		kernelRelocate.setArg (1, gpuGeom);
		kernelRelocate.setArg(2, sizeof(int), &size);
		


		//MODIFY
		gpuSetup.enqueueKernel(kernelRelocate,size*2,2);
		gpuSetup.finish();
		*/
		//OLD
		// Fetch address of gpuGeom in device memory space (kludge)
		//kernelGetPtr.setArg( 0, gpuGeom );
		//kernelGetPtr.setArg( 1, gpuAux.getDeviceBuffer() );
		//gpuSetup.enqueueTask( kernelGetPtr );
		//gpuAux.transferFromDevice( CL_TRUE, 0, sizeof(cl_mem) );
		//cl_mem gpuhandle = *(cl_mem*)gpuAux.getHostPtr();
		
		//EDIT2
		cl_mem gpuhandle = *(cl_mem*)result.getHostPtr();
	
		
		
		
		//OLD:EDIT2
		testCase.geom->relocate( gpuhandle );

		//EDIT2
		gpuSetup.enqueueWriteBuffer( gpuGeom, testCase.geom->getBuffer() );

		//EDIT2: Kernel which returns the checks
		if(CHECK == 2 || CHECK == 4)
		{
			kernelCheckGeometry.setArg( 0 , gpuGeom);
			kernelCheckGeometry.setArg( 1 , result.getDeviceBuffer());
			kernelCheckGeometry.setArg( 2, sizeof(int), &number_of_increments);
			gpuSetup.enqueueKernel(kernelCheckGeometry, 1, 1);
			gpuSetup.finish();
	
			result.transferFromDevice();
			gpuSetup.finish();

			check_navigation( result.getHostPtr(), testCase.geom->VolumeStore, number_of_increments);
						// print for Geometry test;
		}

		

		// MODIFY: He also notes how this is not the optimal way of doing this. Memory can be saved here.
		std::memcpy( gpuInput.getHostPtr(), &(testCase.input[0]), gpuInput.size() );
		//REMOVE
		std::cout<<"About to run trace\n";
		
		// THis part was written for test purpose to see if error was caused due to shared memory

	
		// This kernel is getting to have WAY too many arguments
		gpuInput.transferToDevice();
		gpuSetup.finish();
		// Set GPU kernel arguments
		kernelTrace.setArg( 0, gpuInput.getDeviceBuffer() );
		kernelTrace.setArg( 1, gpuOutput.getDeviceBuffer() );
		kernelTrace.setArg( 2, gpuGeom );
		kernelTrace.setArg( 3, sizeof(G4double), &(testCase.phys_step) );
		kernelTrace.setArg( 4, sizeof(cl_int), &numInput );	
#ifdef CHECK
		kernelTrace.setArg( 5, result.getDeviceBuffer());
#endif
		// Two uses -: One for debugging and one for checking. Remove at some point.
		if( CHECK == 1 || CHECK == 4)
			kernelTrace.setArg( 5, result.getDeviceBuffer());
#if( GLOBAL_MODE ==1)
		//kernelTrace.setArg( 6, Numbers_Of_Solid );
		//kernelTrace.setArg( 7, Sum_Of_Solid );
		kernelTrace.setArg( 6, Solids );
		kernelTrace.setArg( 7, Result_For_Current_Solid );
		kernelTrace.setArg( 8, Compacter_Result );		
		kernelTrace.setArg( 9, nullVNode);
#endif

/*
NOTE: The current way of setting kernel trace's arguments is bad and is bound to cause problems in future
Change the implementation to either removes one check or perhaps just replace the existing check with something
more useful.
*/
		
		std::cout<<"Arguments set and value of Physical step sent on CPU is = "<< (testCase.phys_step)<<std::endl;
		// Write input to GPU memory
		// const my_clock_t t1 = my_clock();
		//OLD
		/*gpuSetup.enqueueWriteBuffer( gpuGeom, testCase.geom->getBuffer() );*/
		
		
		std::cout<< "Write complete, transfer done, finish\n";
		

		// Actual execution
		//const my_clock_t t2 = my_clock();
		//EDIT
		//gpuSetup.enqueueKernel( kernelTrace, numInput, blockSize );
		gpuSetup.enqueueKernel( kernelTrace, BlockSize, BlockSize );

		
		gpuSetup.finish();
		std::cout<<"Kernel trace done\n";
		

		//EDIT2:
		result.transferFromDevice();
		gpuSetup.finish();
		
		if( CHECK==1 || CHECK==4)
			check_navigation( result.getHostPtr(), testCase.geom->VolumeStore, 0);
					// Run distance check

		// Transfer results back to host memory
		//const my_clock_t t3 = my_clock();
		gpuOutput.transferFromDevice();
		gpuSetup.finish();


		//for (int i=0; i<10; i++)
			//std::cout<< (( int *)(result.getHostPtr()))[i] << " ";
		
			std::cout<< "\n";
		
		for (int i=0; i<32; i++)
			std::cout<< (( G4double *)(result.getHostPtr()))[i] << " ";

		std::cout<<"From CPU -> the first particles position and direction are -" <<" X-: "<<testCase.input[0].pos.x<<" Y-: "<<testCase.input[0].pos.y<<" Z-: "<<testCase.input[0].pos.z;
		std::cout<<" \n and directions are -:  X-: "<<testCase.input[0].dir.x<<" Y-: "<<testCase.input[0].dir.y<<" Z-: "<<testCase.input[0].dir.z;
		//const my_clock_t t4 = my_clock();
		
		// Print time summary
		//std::cerr << "Elapsed: " << tdiffms(t1,t4) << " ms"
			//<< "\n  Transfer: " << tdiffms( t1, t2 )+tdiffms(t3,t4)
			//<< "\n\tto GPU:\t" << tdiffms( t1, t2 )
			//<< "\n\tfrom GPU:\t" << tdiffms( t3, t4 )
			//<< "\n  Calculation: " << tdiffms( t2, t3 ) << "\n\n";

		//MODIFY
		// Output results (also a stupid copy)
		std::memcpy( &(testCase.output[0]), gpuOutput.getHostPtr(), gpuOutput.size() );
		testCase.outputData( "imggpu.txt" );

		return EXIT_SUCCESS;
		
#ifndef NOCATCH
	}
	catch ( const std::runtime_error &e )
	{
		std::cerr << e.what() << std::endl;
		return EXIT_FAILURE;
	}
#endif
}