C++ (Cpp) CudaMallocFromHostArrayの例

プログラミング言語: C++ (Cpp)

メソッド/関数: CudaMallocFromHostArray

hotexamples.comのコード掲載数: 3

C++ (Cpp) CudaMallocFromHostArray - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のCudaMallocFromHostArrayの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

TEST(Reconstruction, Fourier)
{
	cudaDeviceReset();

	//Case 1:
	/*{
		int3 dimsvolume = {16, 16, 16};
		int3 dimsimage = {16, 16, 2249};

		tfloat* d_inputproj = (tfloat*)CudaMallocFromBinaryFile("Data\\Reconstruction\\Input_ARTProj_2.bin");
		tfloat3* h_inputangles = (tfloat3*)MallocFromBinaryFile("Data\\Reconstruction\\Input_ARTAngles_2.bin");
		tfloat* desired_output = (tfloat*)MallocFromBinaryFile("Data\\Reconstruction\\Output_ART_2.bin");

		tfloat* d_volume;
		cudaMalloc((void**)&d_volume, Elements(dimsvolume) * sizeof(tfloat));
		

		tfloat* h_output = (tfloat*)MallocFromDeviceArray(d_volume, Elements(dimsvolume) * sizeof(tfloat));
		tfloat outputmax = (tfloat)-999999;
		for (int i = 0; i < Elements(dimsvolume); i++)
			outputmax = max(outputmax, h_output[i]);
		//for (int i = 0; i < Elements(dimsvolume); i++)
			//h_output[i] /= outputmax;
	
		double MeanAbsolute = GetMeanAbsoluteError((tfloat*)desired_output, (tfloat*)h_output, Elements(dimsvolume));
		//ASSERT_LE(MeanRelative, 1e-5);

		cudaFree(d_volume);
		cudaFree(d_inputproj);
		free(desired_output);
		free(h_output);
	}*/

	//Case 2:
	{
		int3 dimsori = toInt3(128, 128, 128);
		int3 dimspadded = toInt3(259, 259, 259);

		tfloat* h_weights = (tfloat*)malloc(ElementsFFT(dimspadded) * sizeof(tfloat));
		ReadMRC("d_weights.mrc", (void**)&h_weights);
		tfloat* d_weights = (tfloat*)CudaMallocFromHostArray(h_weights, ElementsFFT(dimspadded) * sizeof(tfloat));

		tcomplex* h_data = (tcomplex*)malloc(ElementsFFT(dimspadded) * sizeof(tcomplex));
		ReadMRC("d_dataRe.mrc", (void**)&h_weights);
		for (int i = 0; i < ElementsFFT(dimspadded); i++)
			h_data[i].x = h_weights[i];
		ReadMRC("d_dataIm.mrc", (void**)&h_weights);
		for (int i = 0; i < ElementsFFT(dimspadded); i++)
			h_data[i].y = h_weights[i];
		tcomplex* d_data = (tcomplex*)CudaMallocFromHostArray(h_data, ElementsFFT(dimspadded) * sizeof(tcomplex));

		tfloat* d_reconstructed = CudaMallocValueFilled(Elements(dimsori), (tfloat)0);

		d_ReconstructGridding(d_data, d_weights, d_reconstructed, dimsori, dimspadded);
	}

	cudaDeviceReset();
}

コード例 #2

ファイルを表示

TEST(Transformation, Warp2D)
{
	cudaDeviceReset();

	//Case 1:
	{
		int2 dimsimage = toInt2(1024, 1024);
		int nframes = 40;
		tfloat* d_input = (tfloat*)CudaMallocFromBinaryFile("Data\\Transformation\\Input_Warp2D.bin");
		tfloat* d_output = CudaMallocValueFilled(Elements2(dimsimage) * nframes, (tfloat)0);

		int2 dimsgrid = toInt2(4, 4);
		tfloat2* h_grid = (tfloat2*)MallocValueFilled(Elements2(dimsgrid) * 2, (tfloat)0);

		for (uint n = 0; n < nframes; n++)
		{
			h_grid[1 * 4 + 0] = tfloat2(n * 1.0, 0.0);
			h_grid[1 * 4 + 1] = tfloat2(n * 1.0, 0.0);
			h_grid[1 * 4 + 2] = tfloat2(n * 1.0, 0.0);
			h_grid[1 * 4 + 3] = tfloat2(n * 1.0, 0.0);
			h_grid[2 * 4 + 0] = tfloat2(n * 1.0, 0.0);
			h_grid[2 * 4 + 1] = tfloat2(n * 1.0, 0.0);
			h_grid[2 * 4 + 2] = tfloat2(n * 1.0, 0.0);
			h_grid[2 * 4 + 3] = tfloat2(n * 1.0, 0.0);
			tfloat2* d_grid = (tfloat2*)CudaMallocFromHostArray(h_grid, Elements2(dimsgrid) * sizeof(tfloat2));

			d_Warp2D(d_input, dimsimage, d_grid, dimsgrid, d_output + Elements2(dimsimage) * n);

			cudaFree(d_grid);
		}

		d_WriteMRC(d_output, toInt3(dimsimage.x, dimsimage.y, nframes), "d_warped.mrc");
	}

	cudaDeviceReset();
}

コード例 #3

ファイルを表示

TEST(Transformation, Bin)
{
	for(int i = 11; i < 12; i++)
	{	
		cudaDeviceReset();

		srand(i);
		int size = (1<<i);
		int batch = 1;
		int bincount = 5;

		tfloat* h_input = (tfloat*)malloc(size * size * batch * sizeof(tfloat));
		for(int b = 0; b < batch; b++)
		{
			for(int j = 0; j < size * size; j++)
				h_input[b * size * size + j] = (tfloat)(j % (1<<bincount));
		}
		tfloat* d_input = (tfloat*)CudaMallocFromHostArray(h_input, size * size * batch * sizeof(tfloat));

		tfloat* d_result;
		cudaMalloc((void**)&d_result, size * size / (1<<(bincount * 2)) * sizeof(tfloat));

		int3 dims;
		dims.x = size;
		dims.y = size;
		d_Bin(d_input, d_result, dims, bincount, 1);

		tfloat* h_result = (tfloat*)MallocFromDeviceArray(d_result, size * size / (1<<(bincount * 2)) * sizeof(tfloat));

		ASSERT_ARRAY_EQ(h_result, (tfloat)((1<<bincount) - 1) / (tfloat)2, size * size / (1<<(bincount * 2)));

		cudaFree(d_input);
		cudaFree(d_result);
		free(h_input);
		free(h_result);

		cudaDeviceReset();
	}

	for(int i = 9; i < 10; i++)
	{	
		cudaDeviceReset();

		srand(i);
		size_t size = (1<<i);
		size_t batch = 1;
		size_t bincount = 2;

		tfloat* h_input;
		cudaMallocHost((void**)&h_input, size * size * size * batch * sizeof(tfloat), 0);
		for(int b = 0; b < batch; b++)
		{
			for(int j = 0; j < size * size * size; j++)
				h_input[b * size * size * size + j] = (tfloat)(j % (1<<bincount));
		}
		tfloat* d_input = (tfloat*)CudaMallocFromHostArray(h_input, size * size * size * batch * sizeof(tfloat));

		tfloat* d_result;
		cudaMalloc((void**)&d_result, size * size * size / (1<<(bincount * 3)) * batch * sizeof(tfloat));

		int3 dims;
		dims.x = size;
		dims.y = size;
		dims.z = size;
		d_Bin(d_input, d_result, dims, bincount, batch);

		tfloat* h_result = (tfloat*)MallocFromDeviceArray(d_result, size * size * size / (1<<(bincount * 3)) * batch * sizeof(tfloat));

		ASSERT_ARRAY_EQ(h_result, (tfloat)((1<<bincount) - 1) / (tfloat)2, size * size / (1<<(bincount * 2)));

		cudaFreeHost(h_input);
		free(h_result);
		cudaFree(d_input);
		cudaFree(d_result);

		cudaDeviceReset();
	}
}