Example #1
0
File: main.c Project: barotto/vktut
int main(int argc, char **argv)
{
	VkResult res;
	int retval = EXIT_FAILURE;
	VkInstance vk;
	struct tut1_physical_device devs[MAX_DEVICES];
	uint32_t dev_count = MAX_DEVICES;

	/* Fire up Vulkan */
	res = tut1_init(&vk);
	if (res)
	{
		printf("Could not initialize Vulkan: %s\n", tut1_VkResult_string(res));
		goto exit_bad_init;
	}

	printf("Vulkan is in the house.\n");

	/* Take a look at what devices there are */
	res = tut1_enumerate_devices(vk, devs, &dev_count);
	if (res < 0)
	{
		printf("Could not enumerate devices: %s\n", tut1_VkResult_string(res));
		goto exit_bad_enumerate;
	}
	else if (res == VK_INCOMPLETE)
	{
		print_surprise("", "you've got", "devices", "dream of");
		printf("I have information on only %"PRIu32" of them:\n", dev_count);
	}
	else
		printf("I detected the following %"PRIu32" device%s:\n", dev_count, dev_count == 1?"":"s");

	/*
	 * Print out some of the information taken when enumerating physical devices.  This is by no means an
	 * exhaustive printout, but to give you the idea.
	 */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		struct tut1_physical_device *dev = &devs[i];
		VkPhysicalDeviceProperties *pr = &dev->properties;

		printf("  - %s: %s (id: 0x%04X) from vendor 0x%04X [driver version: 0x%04X, API version: 0x%04X]\n",
				tut1_VkPhysicalDeviceType_string(pr->deviceType), pr->deviceName,
				pr->deviceID, pr->vendorID, pr->driverVersion, pr->apiVersion);
		if (dev->queue_families_incomplete)
		{
			print_surprise("    ", "your device", "queue families", "imagine");
			printf("    I have information on only %"PRIu32" of them:\n", dev->queue_family_count);
		}
		else
			printf("    The device supports the following %"PRIu32" queue famil%s:\n", dev->queue_family_count, dev->queue_family_count == 1?"y":"ies");

		for (uint32_t j = 0; j < dev->queue_family_count; ++j)
		{
			VkQueueFamilyProperties *qf = &dev->queue_families[j];

			printf("    * %"PRIu32" queue%s with the following capabilit%s:\n", qf->queueCount, qf->queueCount == 1?"":"s",
					qf->queueFlags && (qf->queueFlags & (qf->queueFlags - 1)) == 0?"y":"ies");
			if (qf->queueFlags == 0)
				printf("          None\n");
			if ((qf->queueFlags & VK_QUEUE_GRAPHICS_BIT))
				printf("          Graphics\n");
			if ((qf->queueFlags & VK_QUEUE_COMPUTE_BIT))
				printf("          Compute\n");
			if ((qf->queueFlags & VK_QUEUE_TRANSFER_BIT))
				printf("          Transfer\n");
			if ((qf->queueFlags & VK_QUEUE_SPARSE_BINDING_BIT))
				printf("          Sparse binding\n");
		}

		printf("    The device supports memories of the following types:\n");
		for (uint32_t j = 0; j < dev->memories.memoryTypeCount; ++j)
		{
			printf("    *");
			if (dev->memories.memoryTypes[j].propertyFlags == 0)
				printf(" <no properties>");
			if ((dev->memories.memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
				printf(" device-local");
			if ((dev->memories.memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
				printf(" host-visible");
			if ((dev->memories.memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
				printf(" host-coherent");
			if ((dev->memories.memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
				printf(" host-cached");
			if ((dev->memories.memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT))
				printf(" lazy");
			printf(": Available in Heap of size %"PRIu64"MB\n", dev->memories.memoryHeaps[dev->memories.memoryTypes[j].heapIndex].size / (1024 * 1024));
		}
	}

	/* Congratulations, you can now duplicate the `vulkaninfo` program. */

	retval = 0;

	/* Cleanup after yourself */

exit_bad_enumerate:
	tut1_exit(vk);

exit_bad_init:
	return retval;
}
Example #2
0
File: main.c Project: ShabbyX/vktut
int main(int argc, char **argv)
{
	tut1_error res;
	int retval = EXIT_FAILURE;
	VkInstance vk;
	struct tut1_physical_device phy_devs[MAX_DEVICES];
	struct tut2_device devs[MAX_DEVICES];
	uint32_t dev_count = MAX_DEVICES;
	VkShaderModule shaders[MAX_DEVICES] = {NULL};
	struct tut3_pipelines pipelines[MAX_DEVICES];
	struct tut4_data test_data[MAX_DEVICES];
	int success = 0;

	/* How many threads to do the work on */
	size_t thread_count = 8;
	/* Whether the threads should take some CPU time as well */
	bool busy_threads = false;
	/* Default to 1MB of buffer data to work on */
	size_t buffer_size = 1024 * 1024 / sizeof(float);

	bool bad_args = false;
	if (argc < 2)
		bad_args = true;
	if (argc > 2 && sscanf(argv[2], "%zu", &thread_count) != 1)
		bad_args = true;
	if (argc > 3)
	{
		int temp;
		if (sscanf(argv[3], "%d", &temp) != 1)
			bad_args = true;
		else
			busy_threads = temp;
	}
	if (argc > 4)
	{
		if (sscanf(argv[4], "%zu", &buffer_size) != 1)
			bad_args = true;
		else
			buffer_size /= sizeof(float);
	}

	if (bad_args)
	{
		printf("Usage: %s shader_file [thread_count(8) [busy_threads(0) [buffer_size(1MB)]]]\n\n", argv[0]);
		return EXIT_FAILURE;
	}

	/* Fire up Vulkan */
	res = tut1_init(&vk);
	if (!tut1_error_is_success(&res))
	{
		tut1_error_printf(&res, "Could not initialize Vulkan\n");
		goto exit_bad_init;
	}

	/* Enumerate devices */
	res = tut1_enumerate_devices(vk, phy_devs, &dev_count);
	if (tut1_error_is_error(&res))
	{
		tut1_error_printf(&res, "Could not enumerate devices\n");
		goto exit_bad_enumerate;
	}

	/* Set up devices */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		res = tut2_setup(&phy_devs[i], &devs[i], VK_QUEUE_COMPUTE_BIT);
		if (!tut1_error_is_success(&res))
		{
			tut1_error_printf(&res, "Could not setup logical device %u, command pools and queues\n", i);
			goto exit_bad_setup;
		}
	}

	/* Load our compute shader */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		res = tut3_load_shader(&devs[i], argv[1], &shaders[i]);
		if (!tut1_error_is_success(&res))
		{
			tut1_error_printf(&res, "Could not load shader on device %u\n", i);
			goto exit_bad_shader;
		}
	}

	/*
	 * Create the pipelines.  There are as many pipelines created as command buffers (just for example).  If
	 * there are not actually enough resources for them, as many as possible are created.  In this test, we are
	 * not going to handle the case where some pipelines are not created.
	 */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		res = tut3_make_compute_pipeline(&devs[i], &pipelines[i], shaders[i]);
		if (!tut1_error_is_success(&res))
		{
			tut1_error_printf(&res, "Could not allocate enough pipelines on device %u\n", i);
			goto exit_bad_pipeline;
		}
	}

	/*
	 * Prepare our test.  Both the buffers and threads are divided near-equally among the physical devices, which
	 * are likely to be just 1 in your case, but who knows.
	 */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		size_t this_buffer_size = buffer_size / dev_count;
		size_t this_thread_count = thread_count / dev_count;

		/* Make sure the last device gets all the left-over */
		if (i == dev_count - 1)
		{
			this_buffer_size = buffer_size - buffer_size / dev_count * (dev_count - 1);
			this_thread_count = thread_count - thread_count / dev_count * (dev_count - 1);
		}

		res = tut4_prepare_test(&phy_devs[i], &devs[i], &pipelines[i], &test_data[i], this_buffer_size, this_thread_count);
		if (!tut1_error_is_success(&res))
		{
			tut1_error_printf(&res, "Could not allocate resources on device %u\n", i);
			goto exit_bad_test_prepare;
		}
	}

	/*
	 * Ok, this was a LOT of initializing!  But we are finally ready to run something.  tut4_start_test() creates
	 * a test thread for us, which further spawns the corresponding device's thread_count threads that do the
	 * calculations.  We then wait for the tests to finish with tut4_wait_test_end().
	 */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		if (tut4_start_test(&test_data[i], busy_threads))
		{
			printf("Could not start the test threads for device %u\n", i);
			perror("Error");
		}
	}

	printf("Running the tests...\n");

	for (uint32_t i = 0; i < dev_count; ++i)
		tut4_wait_test_end(&test_data[i]);

	success = 1;
	for (uint32_t i = 0; i < dev_count; ++i)
		if (!test_data[i].success)
		{
			if (!tut1_error_is_success(&test_data[i].error))
				tut1_error_printf(&test_data[i].error, "Error starting test on device %u\n", i);
			else
				printf("The test didn't produce expected results (device %u)\n", i);
			success = 0;
		}

	if (success)
		printf("Everything went well :) We just wasted your GPU doing something stupid\n");

	/*
	 * You can time the execution of the program with time(1):
	 *
	 *     $ time ./tut4/tut4 shaders/tut3.comp.spv <threads> ...
	 *
	 * Then try to play with different number of threads and see if the total execution time of the application
	 * changes and how!
	 *
	 * ...
	 *
	 * Did you try that?  Already?  Well, that was disappointing.  More threads probably resulted in higher
	 * execution time, right?  That actually makes sense.  You see, we have N data to compute, and whether you tell
	 * the GPU to do N computations from one thread, or N/T computations each from T threads, you aren't actually
	 * doing any less computation.  You probably just have more overhead from the threads.
	 *
	 * So what's the deal with multi-threaded and Vulkan?  Well, the problem is that this test was heavily
	 * GPU-bound, and as you have noticed, multi-CPU-threaded doesn't help.  For this reason, this test has a
	 * little feature to "fake" some execution on the CPU threads as well.  If you run the program like this:
	 *
	 *     $ time ./tut4/tut4 shaders/tut3.comp.spv <threads> <fake> ...
	 *
	 * where <fake> can be either 0 (no CPU usage) or 1 (some fake CPU usage), and then experiment with different
	 * number of threads, you can see the benefit of multi-threading.  In this case, while the GPU is working, the
	 * CPU thread spends time fake-doing something.  If there is only one thread, the CPU cannot keep the GPU
	 * constantly busy, so the computation slows down.  On the other hand, with multiple threads, the same amount
	 * of CPU work is spread out and done in parallel, so the threads together can feed the GPU with instructions
	 * faster.
	 *
	 * In this test, the total amount of time to waste is 3.2 seconds (32ms for each "render" operation, and there
	 * are a hundred of them).  Depending on your GPU, you may notice that above a certain number of threads, there
	 * is no more any speedup.  That is when the amount of time spent in each CPU thread becomes less than the time
	 * spent in the GPU for that thread's task, so whether the CPU spent time doing something before waiting for
	 * the GPU doesn't make a difference in the execution time.
	 */

	retval = 0;

	/* Cleanup after yourself */

exit_bad_test_prepare:
	for (uint32_t i = 0; i < dev_count; ++i)
		tut4_free_test(&devs[i], &test_data[i]);

exit_bad_pipeline:
	for (uint32_t i = 0; i < dev_count; ++i)
		tut3_destroy_pipeline(&devs[i], &pipelines[i]);

exit_bad_shader:
	for (uint32_t i = 0; i < dev_count; ++i)
		tut3_free_shader(&devs[i], shaders[i]);

exit_bad_setup:
	for (uint32_t i = 0; i < dev_count; ++i)
		tut2_cleanup(&devs[i]);

exit_bad_enumerate:
	tut1_exit(vk);

exit_bad_init:
	return retval;
}
Example #3
0
File: main.c Project: ShabbyX/vktut
int main(int argc, char **argv)
{
	tut1_error res;
	int retval = EXIT_FAILURE;
	VkInstance vk;
	struct tut1_physical_device phy_devs[MAX_DEVICES];
	struct tut2_device devs[MAX_DEVICES];
	uint32_t dev_count = MAX_DEVICES;
	VkShaderModule shaders[MAX_DEVICES] = {NULL};
	struct tut3_pipelines pipelines[MAX_DEVICES];

	if (argc < 2)
	{
		printf("Usage: %s shader_file\n\n", argv[0]);
		return EXIT_FAILURE;
	}

	/* Fire up Vulkan */
	res = tut1_init(&vk);
	if (!tut1_error_is_success(&res))
	{
		tut1_error_printf(&res, "Could not initialize Vulkan\n");
		goto exit_bad_init;
	}

	/* Enumerate devices */
	res = tut1_enumerate_devices(vk, phy_devs, &dev_count);
	if (tut1_error_is_error(&res))
	{
		tut1_error_printf(&res, "Could not enumerate devices\n");
		goto exit_bad_enumerate;
	}

	/* Set up devices */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		res = tut2_setup(&phy_devs[i], &devs[i], VK_QUEUE_COMPUTE_BIT);
		if (!tut1_error_is_success(&res))
		{
			tut1_error_printf(&res, "Could not setup logical device %u, command pools and queues\n", i);
			goto exit_bad_setup;
		}
	}

	/* Load our compute shader */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		res = tut3_load_shader(&devs[i], argv[1], &shaders[i]);
		if (!tut1_error_is_success(&res))
		{
			tut1_error_printf(&res, "Could not load shader on device %u\n", i);
			goto exit_bad_shader;
		}
	}

	printf("Loaded the shader, awesome!\n");

	/*
	 * Create the pipelines.  There are as many pipelines created as command buffers (just for example).  If
	 * there are not actually enough resources for them, as many as possible are created.
	 */
	for (uint32_t i = 0; i < dev_count; ++i)
		tut3_make_compute_pipeline(&devs[i], &pipelines[i], shaders[i]);

	/*
	 * Like tutorial 2, we have covered a lot of ground in this tutorial.  Let's keep actual usage of our compute
	 * shader to the next tutorial, where we would see the effect of multiple threads on the processing speed.
	 */
	for (uint32_t i = 0; i < dev_count; ++i)
	{
		uint32_t count = 0;
		for (uint32_t j = 0; j < pipelines[i].pipeline_count; ++j)
			if (pipelines[i].pipelines[j].pipeline)
				++count;

		printf("Created %u pipeline%s on device %u\n", count, count == 1?"":"s", i);
	}

	retval = 0;

	/* Cleanup after yourself */

	for (uint32_t i = 0; i < dev_count; ++i)
		tut3_destroy_pipeline(&devs[i], &pipelines[i]);

exit_bad_shader:
	for (uint32_t i = 0; i < dev_count; ++i)
		tut3_free_shader(&devs[i], shaders[i]);

exit_bad_setup:
	for (uint32_t i = 0; i < dev_count; ++i)
		tut2_cleanup(&devs[i]);

exit_bad_enumerate:
	tut1_exit(vk);

exit_bad_init:
	return retval;
}