示例#1
0
int test_long_vector_add()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/75570546
	const int length{ 100000000 };
	std::unique_ptr<float[]> A(new float[length]);
	std::unique_ptr<float[]> B(new float[length]);
	std::unique_ptr<float[]> C1(new float[length]);
	std::unique_ptr<float[]> C2(new float[length]);

	generator_random_number(A.get(), length, -1000.f, 1000.f);
	generator_random_number(B.get(), length, -1000.f, 1000.f);

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	int ret = long_vector_add_cpu(A.get(), B.get(), C1.get(), length, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(long_vector_add_cpu);

	ret = long_vector_add_gpu(A.get(), B.get(), C2.get(), length, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(matrix_mul_gpu);

	int count_error{ 0 };
	for (int i = 0; i < length; ++i) {
		if (count_error > 100) return -1;
		if (fabs(C1[i] - C2[i]) > EPS_) {
			fprintf(stderr, "Result verification failed at element %d, C1: %f, C2: %f\n",
				i, C1[i], C2[i]);
			++count_error;
		}
	}

	fprintf(stderr, "cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#2
0
int test_vector_add()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/74120057
	// Vector addition: C = A + B, implements element by element vector addition
	const int numElements{ 50000 };
	std::vector<float> A(numElements), B(numElements), C1(numElements), C2(numElements);

	// Initialize vector
	for (int i = 0; i < numElements; ++i) {
		A[i] = rand() / (float)RAND_MAX;
		B[i] = rand() / (float)RAND_MAX;
	}

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	int ret = vector_add_cpu(A.data(), B.data(), C1.data(), numElements, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(vectorAdd_cpu);

	ret = vector_add_gpu(A.data(), B.data(), C2.data(), numElements, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(vectorAdd_gpu);

	for (int i = 0; i < numElements; ++i) {
		if (fabs(C1[i] - C2[i]) > EPS_) {
			fprintf(stderr, "Result verification failed at element %d!\n", i);
			return -1;
		}
	}

	fprintf(stderr, "cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#3
0
int test_dot_product()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/75669764
	//       http://blog.csdn.net/fengbingchun/article/details/76571955
	const int length{ 1024 * 1024 * 33 };
	std::unique_ptr<float[]> A(new float[length]);
	std::unique_ptr<float[]> B(new float[length]);

	generator_random_number(A.get(), length, -10.f, 10.f);
	generator_random_number(B.get(), length, -10.f, 10.f);

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	float value1{ 0.f }, value2{ 0.f };

	int ret = dot_product_cpu(A.get(), B.get(), &value1, length, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(long_vector_add_cpu);

	ret = dot_product_gpu(A.get(), B.get(), &value2, length, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(matrix_mul_gpu);

	if (fabs(value1 - value2) > EPS_) {
		fprintf(stderr, "Result verification failed value1: %f, value2: %f\n", value1, value2);
	}

	fprintf(stderr, "test dot product: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#4
0
int test_streams()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76532198
	const int length{ 1024 * 1024 * 20};
	std::unique_ptr<int[]> A(new int[length]);
	std::unique_ptr<int[]> B(new int[length]);
	std::unique_ptr<int[]> C1(new int[length]);
	std::unique_ptr<int[]> C2(new int[length]);

	generator_random_number<int>(A.get(), length, -100, 100);
	generator_random_number<int>(B.get(), length, -100, 100);
	std::for_each(C1.get(), C1.get() + length, [](int& n) {n = 0; });
	std::for_each(C2.get(), C2.get() + length, [](int& n) {n = 0; });

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds

	int ret = streams_cpu(A.get(), B.get(), C1.get(), length, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(streams_cpu);

	ret = streams_gpu(A.get(), B.get(), C2.get(), length, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(streams_gpu);

	for (int i = 0; i < length; ++i) {
		if (C1[i] != C2[i]) {
			fprintf(stderr, "their values are different at: %d, val1: %d, val2: %d\n",
				i, C1[i], C2[i]);
			return -1;
		}
	}

	fprintf(stderr, "test streams' usage: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#5
0
int test_matrix_mul()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76618165
	// Matrix multiplication: C = A * B
	// 矩阵A、B的宽、高应是32的整数倍
	const int rowsA{ 352 }, colsA{ 672 }, rowsB = colsA, colsB{ 384 };
	std::unique_ptr<float[]> A(new float[colsA*rowsA]);
	std::unique_ptr<float[]> B(new float[colsB*rowsB]);
	std::unique_ptr<float[]> C1(new float[rowsA*colsB]);
	std::unique_ptr<float[]> C2(new float[rowsA*colsB]);

	generator_random_number(A.get(), colsA*rowsA, -1.f, 1.f);
	generator_random_number(B.get(), colsB*rowsB, -1.f, 1.f);

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	int ret = matrix_mul_cpu(A.get(), B.get(), C1.get(), colsA, rowsA, colsB, rowsB, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(matrix_mul_cpu);

	ret = matrix_mul_gpu(A.get(), B.get(), C2.get(), colsA, rowsA, colsB, rowsB, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(matrix_mul_gpu);

	int count{ 0 };
	for (int i = 0; i < rowsA*colsB; ++i) {
		if (count > 100) return -1;
		if (fabs(C1[i] - C2[i]) > EPS_) {
			fprintf(stderr, "Result verification failed at element %d, C1: %f, C2: %f\n",
				i, C1[i], C2[i]);
			++count;
		}
	}

	fprintf(stderr, "test matrix mul: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#6
0
int test_layer_prior_vbox()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/77850422
	std::vector<float> vec1{423.f, 245.f, 1333.f, 1444.f, 123.f, 23.f, 32.f, 66.f};
	std::vector<float> vec2(vec1[6]);
	std::vector<float> vec3(4);
	int length = int(vec1[0] * vec1[1] * vec1[6] * 4 * 2);

	std::unique_ptr<float[]> data1(new float[length]), data2(new float[length]);
	std::for_each(data1.get(), data1.get() + length, [](float& n) {n = 0.f; });
	std::for_each(data2.get(), data2.get() + length, [](float& n) {n = 0.f; });
	generator_random_number(vec2.data(), vec2.size(), 10.f, 100.f);
	generator_random_number(vec3.data(), vec3.size(), 1.f, 10.f);

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	int ret = layer_prior_vbox_cpu(data1.get(), length, vec1, vec2, vec3, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(layer_prior_vbox_cpu);

	ret = layer_prior_vbox_gpu(data2.get(), length, vec1, vec2, vec3, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(layer_prior_vbox_gpu);

	compare_result(data1.get(), data2.get(), length);

	fprintf(stderr, "test layer prior vbox: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#7
0
void remove_watch_directory(GNode *directory, gboolean remove_watch)
{
	CHECK_INOTIFY_ENABLED();
	if (FILE(directory)->watch == -1)
		PRINT_ERROR_INFO("Directory doesn't have a watch associated");
	else if (remove_watch && inotify_rm_watch(inotify_descriptor, FILE(directory)->watch) == -1)
		PRINT_ERRNO_INFO();
	else if (g_hash_table_remove(watches_table, &(FILE(directory)->watch)) == FALSE)
		PRINT_ERROR_INFO("Element removal failed in watches_table");
}
示例#8
0
int test_ray_tracking()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76135606
	const int spheres{ 20 };
	std::unique_ptr<float[]> A(new float[spheres * 3]);
	std::unique_ptr<float[]> B(new float[spheres * 3]);
	std::unique_ptr<float[]> C(new float[spheres]);

	generator_random_number(A.get(), spheres * 3, 0.f, 1.f);
	generator_random_number(B.get(), spheres * 3, -400.f, 400.f);
	generator_random_number(C.get(), spheres, 20.f, 120.f);

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds

	const int width{ 512 }, height = width;
	cv::Mat mat1(height, width, CV_8UC4), mat2(height, width, CV_8UC4);

	int ret = ray_tracking_cpu(A.get(), B.get(), C.get(), spheres, mat1.data, width, height, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(ray_tracking_cpu);

	ret = ray_tracking_gpu(A.get(), B.get(), C.get(), spheres, mat2.data, width, height, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(ray_tracking_gpu);

	for (int y = 0; y < height; ++y) {
		for (int x = 0; x < width; ++x) {
			cv::Vec4b val1 = mat1.at<cv::Vec4b>(y, x);
			cv::Vec4b val2 = mat2.at<cv::Vec4b>(y, x);

			for (int i = 0; i < 4; ++i) {
				if (val1[i] != val2[i]) {
					fprintf(stderr, "their values are different at (%d, %d), i: %d, val1: %d, val2: %d\n",
						x, y, i, val1[i], val2[i]);
					//return -1;
				}
			}
		}
	}

#ifdef __linux__
	const std::string save_image_name{ "test_data/ray_tracking.jpg" };
#else
	const std::string save_image_name{ "E:/GitCode/CUDA_Test/ray_tracking.jpg" };
#endif
	cv::imwrite(save_image_name, mat2);

	fprintf(stderr, "ray tracking: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#9
0
int test_get_device_info()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76902556
	int ret = get_device_info();
	if (ret != 0) PRINT_ERROR_INFO(get_device_info);

	return 0;
}
示例#10
0
int test_layer_reverse()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/77160872
#ifdef __linux__
	std::string image_name{ "test_data/images/lena.png" };
#else
	std::string image_name{ "E:/GitCode/CUDA_Test/test_data/images/lena.png" };
#endif
	cv::Mat matSrc = cv::imread(image_name);
	CHECK(matSrc.data);

	cv::cvtColor(matSrc, matSrc, CV_BGR2GRAY);
	const int width{ 1511 }, height{ 1473 };
	const auto length = width * height;
	cv::resize(matSrc, matSrc, cv::Size(width, height));
	cv::Mat matTmp1;
	matSrc.convertTo(matTmp1, CV_32FC1);

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	const std::vector<int> vec{ 5, 7};
	std::unique_ptr<float[]> dst1(new float[length]), dst2(new float[length]);
	std::for_each(dst1.get(), dst1.get() + length, [](float& n) {n = 0.f; });
	std::for_each(dst2.get(), dst2.get() + length, [](float& n) {n = 0.f; });

	int ret = layer_reverse_cpu((float*)matTmp1.data, dst1.get(), length, vec, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(image_reverse_cpu);

	ret = layer_reverse_gpu((float*)matTmp1.data, dst2.get(), length, vec, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(image_reverse_gpu);

	compare_result(dst1.get(), dst2.get(), length);

	cv::Mat matTmp2(height, width, CV_32FC1, dst2.get()), matDst;
	matTmp2.convertTo(matDst, CV_8UC1);

#ifdef __linux__
	save_image(matSrc, matDst, 400, 200, "test_data/images/image_reverse.png");
#else
	save_image(matSrc, matDst, 400, 200, "E:/GitCode/CUDA_Test/test_data/images/image_reverse.png");
#endif

	fprintf(stderr, "test layer reverse: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#11
0
int test_julia()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76020154
	const int width{ 512 }, height = width;
	const float scale{ 1.5f };
	cv::Mat mat1(height, width, CV_8UC4), mat2(height, width, CV_8UC4);

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds

	int ret = julia_cpu(mat1.data, width, height, scale, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(julia_cpu);

	ret = julia_gpu(mat2.data, width, height, scale, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(julia_gpu);

	for (int y = 0; y < height; ++y) {
		for (int x = 0; x < width; ++x) {
			cv::Vec4b val1 = mat1.at<cv::Vec4b>(y, x);
			cv::Vec4b val2 = mat2.at<cv::Vec4b>(y, x);

			for (int i = 0; i < 4; ++i) {
				if (val1[i] != val2[i]) {
					fprintf(stderr, "their values are different at (%d, %d), i: %d, val1: %d, val2: %d\n",
						x, y, i, val1[i], val2[i]);
					//return -1;
				}
			}
		}
	}

#ifdef __linux__
	const std::string save_image_name{ "test_data/julia.jpg" };
#else
	const std::string save_image_name{ "E:/GitCode/CUDA_Test/julia.jpg" };
#endif
	cv::imwrite(save_image_name, mat2);

	fprintf(stderr, "cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#12
0
int test_calculate_histogram()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76417598
	const int length{ 10 * 1024 * 1024 }; // 100MB
	std::unique_ptr<unsigned char[]> data(new unsigned char[length]);
	generator_random_number<unsigned char>(data.get(), length, 0, 255);

	const int hist_size{ 256 };
	std::unique_ptr<unsigned int[]> hist1(new unsigned int[hist_size]), hist2(new unsigned int[hist_size]);
	std::for_each(hist1.get(), hist1.get() + hist_size, [](unsigned int& n) {n = 0; });
	std::for_each(hist2.get(), hist2.get() + hist_size, [](unsigned int& n) {n = 0; });

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	unsigned int value1{ 0 }, value2{ 0 };

	int ret = calculate_histogram_cpu(data.get(), length, hist1.get(), value1, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(calculate_histogram_cpu);

	ret = calculate_histogram_gpu(data.get(), length, hist2.get(), value2, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(calculate_histogram_gpu);

	if (value1 != value2) {
		fprintf(stderr, "their values are different: val1: %d, val2: %d\n", value1, value2);
		return -1;
	}
	for (int i = 0; i < hist_size; ++i) {
		if (hist1[i] != hist2[i]) {
			fprintf(stderr, "their values are different at: %d, val1: %d, val2: %d\n",
				i, hist1[i], hist2[i]);
			return -1;
		}
	}

	fprintf(stderr, "test calculate histogram: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#13
0
int test_heat_conduction()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76228257
	const int width{ 1024 }, height = width;
	cv::Mat mat1(height, width, CV_8UC4), mat2(height, width, CV_8UC4);

	const float speed{ 0.25f }, max_temp{ 1.f }, min_temp{0.0001f};
	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds

	// intialize the constant data
	std::unique_ptr<float[]> temp(new float[width * height]);
	for (int i = 0; i < width*height; ++i) {
		temp[i] = 0;
		int x = i % width;
		int y = i / height;
		if ((x>300) && (x<600) && (y>310) && (y<601))
			temp[i] = max_temp;
	}

	temp[width * 100 + 100] = (max_temp + min_temp) / 2;
	temp[width * 700 + 100] = min_temp;
	temp[width * 300 + 300] = min_temp;
	temp[width * 200 + 700] = min_temp;

	for (int y = 800; y < 900; ++y) {
		for (int x = 400; x < 500; ++x) {
			temp[x + y * width] = min_temp;
		}
	}

	int ret = heat_conduction_cpu(mat1.data, width, height, temp.get(), speed, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(heat_conduction_cpu);

	ret = heat_conduction_gpu(mat2.data, width, height, temp.get(), speed, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(heat_conduction_gpu);

	for (int y = 0; y < height; ++y) {
		for (int x = 0; x < width; ++x) {
			cv::Vec4b val1 = mat1.at<cv::Vec4b>(y, x);
			cv::Vec4b val2 = mat2.at<cv::Vec4b>(y, x);

			for (int i = 0; i < 4; ++i) {
				if (val1[i] != val2[i]) {
					fprintf(stderr, "their values are different at (%d, %d), i: %d, val1: %d, val2: %d\n",
						x, y, i, val1[i], val2[i]);
					//return -1;
				}
			}
		}
	}

#ifdef __linux__
	std::string save_image_name{ "test_data/heat_conduction.jpg" };
#else
	std::string save_image_name{ "E:/GitCode/CUDA_Test/heat_conduction.jpg" };
#endif
	cv::resize(mat2, mat2, cv::Size(width / 2, height / 2), 0.f, 0.f, 2);
	cv::imwrite(save_image_name, mat2);

	fprintf(stderr, "test heat conduction: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}
示例#14
0
int test_layer_channel_normalize()
{
	// Blog: http://blog.csdn.net/fengbingchun/article/details/76976024
#ifdef __linux__
	std::string image_name{ "test_data/images/lena.png" };
#else
	std::string image_name{ "E:/GitCode/CUDA_Test/test_data/images/lena.png" };
#endif
	cv::Mat matSrc = cv::imread(image_name);
	if (!matSrc.data) {
		fprintf(stderr, "read image fail: %s\n", image_name.c_str());
		return -1;
	}

	const int width{ 511 }, height{ 473 }, channels{ 3 };
	cv::resize(matSrc, matSrc, cv::Size(width, height));
	matSrc.convertTo(matSrc, CV_32FC3);
	std::vector<cv::Mat> matSplit;
	cv::split(matSrc, matSplit);
	CHECK(matSplit.size() == channels);
	std::unique_ptr<float[]> data(new float[matSplit[0].cols * matSplit[0].rows * channels]);
	size_t length{ matSplit[0].cols * matSplit[0].rows * sizeof(float) };
	for (int i = 0; i < channels; ++i) {
		memcpy(data.get() + matSplit[0].cols * matSplit[0].rows * i, matSplit[i].data, length);
	}

	float elapsed_time1{ 0.f }, elapsed_time2{ 0.f }; // milliseconds
	std::unique_ptr<float[]> dst1(new float[matSplit[0].cols * matSplit[0].rows * channels]);
	std::unique_ptr<float[]> dst2(new float[matSplit[0].cols * matSplit[0].rows * channels]);

	int ret = layer_channel_normalize_cpu(data.get(), dst1.get(), width, height, channels, &elapsed_time1);
	if (ret != 0) PRINT_ERROR_INFO(image_normalize_cpu);

	ret = layer_channel_normalize_gpu(data.get(), dst2.get(), width, height, channels, &elapsed_time2);
	if (ret != 0) PRINT_ERROR_INFO(image_normalize_gpu);

	int count{ 0 }, num{ width * height * channels };
	for (int i = 0; i < num; ++i) {
		if (fabs(dst1[i] - dst2[i]) > 0.01/*EPS_*/) {
			fprintf(stderr, "index: %d, val1: %f, val2: %f\n", i, dst1[i], dst2[i]);
			++count;
		}
		if (count > 100) return -1;
	}

	std::vector<cv::Mat> merge(channels);
	for (int i = 0; i < channels; ++i) {
		merge[i] = cv::Mat(height, width, CV_32FC1, dst2.get() + i * width * height);
	}
	cv::Mat dst3;
	cv::merge(merge, dst3);
	dst3.convertTo(dst3, CV_8UC3, 255.f);
#ifdef __linux__
	cv::imwrite("test_data/images/image_normalize.png", dst3);
#else
	cv::imwrite("E:/GitCode/CUDA_Test/test_data/images/image_normalize.png", dst3);
#endif
	//cv::resize(matSrc, matSrc, cv::Size(width, height));
#ifdef __linux__
	//cv::imwrite("test_data/images/image_src.png", matSrc);
#else
	//cv::imwrite("E:/GitCode/CUDA_Test/test_data/images/image_src.png", matSrc);
#endif

	fprintf(stderr, "test layer channel normalize: cpu run time: %f ms, gpu run time: %f ms\n", elapsed_time1, elapsed_time2);

	return 0;
}