void import( xpattern<value_type> const& pat )
        {
            std::vector<size_type> v_dims;

            for ( size_type index = 0; index != config.tilt_size; ++index )
            {
                v_dims.push_back( pat.diag[index].size() );
                //ar
                size_type const ar_offset = index * config.max_dim * config.max_dim;
                size_type const ar_size = pat.ar[index].size() * sizeof( size_type );
                cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.ar + ar_offset), reinterpret_cast<const void*>(pat.ar[index].data()), ar_size, cudaMemcpyHostToDevice ) );
                //diag
                size_type const diag_offset = index * config.max_dim;
                size_type const diag_size = pat.diag[index].size() * sizeof( value_type );
                cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.diag + diag_offset), reinterpret_cast<const void*>(pat.diag[index].data()), diag_size, cudaMemcpyHostToDevice ) );
                //intensity
                size_type const I_exp_offset = index * config.max_dim;
                size_type const I_exp_size = pat.intensity[index].size() * sizeof( value_type );
                cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.I_exp + I_exp_offset), reinterpret_cast<const void*>(pat.intensity[index].data()), I_exp_size, cudaMemcpyHostToDevice ) );
            }

            cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.thickness_array), reinterpret_cast<const void*>(pat.thickness_array.data()), sizeof(value_type)*pat.tilt_size, cudaMemcpyHostToDevice ) );

            cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.dim), reinterpret_cast<const void*>(v_dims.data()), sizeof(size_type) * v_dims.size(), cudaMemcpyHostToDevice ) );
        }
        cuda_pattern( xpattern<value_type> const& pat, int device_id = 0 ) : config{ make_cuda_xpattern_config( pat, device_id ) }, data{ config }
        {
            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != config.device_id ) cuda_assert( cudaSetDevice( config.device_id ) );

            import( pat );
        }
 hardware_concurrency( size_type the_device_id = 0 )
 {
     int num_devices = 0;
     cuda_assert( cudaGetDeviceCount( &num_devices ) );
     assert( num_devices > the_device_id );
     cudaDeviceProp properties;
     cuda_assert( cudaGetDeviceProperties( &properties, the_device_id ) );
     multiprocessor_number = properties.multiProcessorCount;
 }//ctor
Ejemplo n.º 4
0
	void const_copy_to(const char *name, void *host, size_t size)
	{
		CUdeviceptr mem;
		size_t bytes;

		cuda_push_context();
		cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name))
		//assert(bytes == size);
		cuda_assert(cuMemcpyHtoD(mem, host, size))
		cuda_pop_context();
	}
        cuda_pattern( pattern<value_type> const& pat, int device_id, std::string const& ug_path ) : config{ make_cuda_pattern_config( pat, device_id ) }, data{ config }
        {
            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != config.device_id ) cuda_assert( cudaSetDevice( config.device_id ) );

            import( pat );

            sim_ug.load( ug_path );
            update_ug();
        }
        value_type square_residual( value_type* ug, value_type thickness )
        {
            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != config.device_id ) cuda_assert( cudaSetDevice( config.device_id ) );

            update_I_diff(ug, thickness);

            value_type residual;
            cublasHandle_t handle;
            cublas_assert( cublasCreate_v2(&handle) );
            cublas_assert( cublasDdot_v2( handle, static_cast<int>(config.max_dim*config.tilt_size), data.I_diff, 1, data.I_diff, 1, &residual ) );
            cublas_assert( cublasDestroy_v2(handle) );
            return residual;
        }
 void update_ug()
 {
     matrix<double> sim_ug_( sim_ug.row(), 2 );
     std::copy( sim_ug.col_begin(1), sim_ug.col_end(1), sim_ug_.col_begin(0) );
     std::fill( sim_ug.col_begin(2), sim_ug.col_end(2), 0.0 );
     cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.ug), reinterpret_cast<const void*>(sim_ug_.data()), config.ug_size*sizeof(value_type)*2, cudaMemcpyHostToDevice ) );
 }
Ejemplo n.º 8
0
	void mem_copy_to(device_memory& mem)
	{
		cuda_push_context();
		if(mem.device_pointer)
			cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()))
		cuda_pop_context();
	}
Ejemplo n.º 9
0
	void mem_alloc(device_memory& mem, MemoryType type)
	{
		cuda_push_context();
		CUdeviceptr device_pointer;
		cuda_assert(cuMemAlloc(&device_pointer, mem.memory_size()))
		mem.device_pointer = (device_ptr)device_pointer;
		cuda_pop_context();
	}
Ejemplo n.º 10
0
	void mem_zero(device_memory& mem)
	{
		memset((void*)mem.data_pointer, 0, mem.memory_size());

		cuda_push_context();
		cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()))
		cuda_pop_context();
	}
Ejemplo n.º 11
0
	void mem_free(device_memory& mem)
	{
		if(mem.device_pointer) {
			cuda_push_context();
			cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)))
			cuda_pop_context();

			mem.device_pointer = 0;
		}
	}
Ejemplo n.º 12
0
        fastest_device() : the_fastest_device_id(0)
        {
            int num_devices = 0;
            cuda_assert( cudaGetDeviceCount( &num_devices ) );
            assert( !!num_devices );

            size_type max_multiprocessors = 0;
            for ( size_type device = 0; device != num_devices; ++device )
            {
                cudaDeviceProp properties;
                cuda_assert( cudaGetDeviceProperties( &properties, device ) );

                if ( max_multiprocessors < properties.multiProcessorCount )
                {
                    max_multiprocessors = properties.multiProcessorCount;
                    the_fastest_device_id = device;
                }
            }
        }//ctor
Ejemplo n.º 13
0
	void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
	{
		size_t offset = elem*y*w;
		size_t size = elem*w*h;

		cuda_push_context();
		cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
			(CUdeviceptr)((uchar*)mem.device_pointer + offset), size))
		cuda_pop_context();
	}
 void update_ug( value_type* ug )
 {
     cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.ug), reinterpret_cast<const void*>(ug), config.ug_size*sizeof(value_type)*2, cudaMemcpyHostToDevice ) );
 }
        cuda_xpattern_data( cuda_xpattern_config const& cpc )
        {
            device_id = cpc.device_id;

            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );

            size_type const ug_size = sizeof(value_type) * cpc.ug_size * 2;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&ug), ug_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(ug), 0, ug_size ) );

            size_type const ar_size = sizeof(size_type) * cpc.tilt_size * cpc.max_dim * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&ar), ar_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(ar), 0, ar_size ) );

            size_type const diag_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&diag), diag_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(diag), 0, diag_size ) );

            size_type const dim_size = sizeof(size_type) * cpc.tilt_size;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&dim), dim_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(dim), 0, dim_size ) );

            size_type const I_exp_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&I_exp), I_exp_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(I_exp), 0, I_exp_size ) );

            size_type const I_diff_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&I_diff), I_diff_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(I_diff), 0, I_diff_size ) );

            size_type const I_zigmoid_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&I_zigmoid), I_zigmoid_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(I_zigmoid), 0, I_zigmoid_size ) );

            size_type const thickness_array_size = sizeof(value_type) * cpc.tilt_size;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&thickness_array), thickness_array_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(thickness_array), 0, thickness_array_size ) );

            size_type const cache_size = sizeof(complex_type) * cpc.tilt_size * cpc.max_dim * cpc.max_dim * 6;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&cache), cache_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(cache), 0, cache_size ) );
        }
Ejemplo n.º 16
0
	void cuda_pop_context()
	{
		cuda_assert(cuCtxSetCurrent(NULL));
	}
Ejemplo n.º 17
0
		return true;
	}

#define cuda_error(stmt) cuda_error_(stmt, #stmt)

	void cuda_error_message(const string& message)
	{
		if(error_msg == "")
			error_msg = message;
		fprintf(stderr, "%s\n", message.c_str());
		cuda_error_documentation();
	}

	void cuda_push_context()
	{
		cuda_assert(cuCtxSetCurrent(cuContext))
	}

	void cuda_pop_context()
	{
		cuda_assert(cuCtxSetCurrent(NULL));
	}

	CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(stats)
	{
		first_error = true;
		background = background_;

		cuDevId = info.num;
		cuDevice = 0;
		cuContext = 0;
        ~cuda_xpattern_data()
        {
            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );

            if ( ar ) cuda_assert( cudaFree(ar) );
            if ( dim ) cuda_assert( cudaFree(dim) );
            if ( I_diff ) cuda_assert( cudaFree(I_diff) );
            if ( I_exp ) cuda_assert( cudaFree(I_exp) );
            if ( I_exp ) cuda_assert( cudaFree(I_zigmoid) );
            if ( diag ) cuda_assert( cudaFree(diag) );
            if ( ug ) cuda_assert( cudaFree(ug) );
            if ( thickness_array ) cuda_assert( cudaFree( thickness_array ) );
            if ( cache ) cuda_assert( cudaFree(cache) );

            ar = 0;
            dim = 0;
            I_diff = 0;
            I_exp = 0;
            I_zigmoid = 0;
            diag = 0;
            ug = 0;
            thickness_array = 0;
            cache = 0;
        }
 void update_kt_factor( value_type* host_kt_factor )
 {
     cuda_assert( cudaMemcpy( reinterpret_cast<void*>(data.kt_factor), reinterpret_cast<const void*>(host_kt_factor), config.tilt_size*sizeof(value_type)*3, cudaMemcpyHostToDevice ) );
 }
        ~cuda_pattern_data()
        {
            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );

            if ( ar ) cuda_assert( cudaFree(ar) );
            if ( dim ) cuda_assert( cudaFree(dim) );
            if ( I_diff ) cuda_assert( cudaFree(I_diff) );
            if ( I_exp ) cuda_assert( cudaFree(I_exp) );
            if ( I_exp ) cuda_assert( cudaFree(I_zigmoid) );
            if ( diag ) cuda_assert( cudaFree(diag) );
            if ( ug ) cuda_assert( cudaFree(ug) );
            if ( cache ) cuda_assert( cudaFree(cache) );
            if ( beams ) cuda_assert( cudaFree(beams) );
            if ( kt_factor ) cuda_assert( cudaFree(kt_factor) );
            if ( gvec ) cuda_assert( cudaFree(gvec) );
            if ( tilt ) cuda_assert( cudaFree(tilt) );

            ar = 0;
            dim = 0;
            I_diff = 0;
            I_exp = 0;
            I_zigmoid = 0;
            diag = 0;
            ug = 0;
            cache = 0;
            gvec = 0;
            tilt = 0;
        }
        cuda_pattern_data( cuda_pattern_config const& cpc )
        {
            device_id = cpc.device_id;

            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );

            size_type const kt_factor_size = sizeof(value_type) * cpc.tilt_size * 3;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&kt_factor), kt_factor_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(kt_factor), 0, kt_factor_size ) );

            size_type const beams_size = sizeof(value_type) * cpc.tilt_size * 10;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&beams), beams_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(beams), 0, beams_size ) );

            size_type const ug_size = sizeof(value_type) * cpc.ug_size * 2;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&ug), ug_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(ug), 0, ug_size ) );

            size_type const ar_size = sizeof(size_type) * cpc.tilt_size * cpc.max_dim * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&ar), ar_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(ar), 0, ar_size ) );

            size_type const diag_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&diag), diag_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(diag), 0, diag_size ) );

            size_type const dim_size = sizeof(size_type) * cpc.tilt_size;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&dim), dim_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(dim), 0, dim_size ) );

            size_type const I_exp_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&I_exp), I_exp_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(I_exp), 0, I_exp_size ) );

            size_type const I_diff_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&I_diff), I_diff_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(I_diff), 0, I_diff_size ) );

            size_type const I_zigmoid_size = sizeof(value_type) * cpc.tilt_size * cpc.max_dim;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&I_zigmoid), I_zigmoid_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(I_zigmoid), 0, I_zigmoid_size ) );

            size_type const cache_size = sizeof(complex_type) * cpc.tilt_size * cpc.max_dim * cpc.max_dim * 6;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&cache), cache_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(cache), 0, cache_size ) );

            size_type const gvec_size = sizeof(value_type) * cpc.ug_size * 2;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&gvec), gvec_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(gvec), 0, gvec_size ) );

            size_type const tilt_size = sizeof(value_type) * cpc.tilt_size * 2;
            cuda_assert( cudaMalloc( reinterpret_cast<void**>(&tilt), tilt_size ) );
            cuda_assert( cudaMemset( reinterpret_cast<void*>(tilt), 0, tilt_size ) );
        }