/// The assumed layout of the area for mutable WeightVars is: /// data | gpu_0/data | results static uint8_t *allocateMutableWeightVars(const BundleConfig &config) { auto *weights = static_cast<uint8_t *>( alignedAlloc(config, config.mutableWeightVarsMemSize)); printf("Allocated mutable weight variables of size: %lu\n", config.mutableWeightVarsMemSize); return weights; }
/// Initialize the constant weights memory block by loading the weights from the /// weights file. static uint8_t *initConstantWeights(const char *weightsFileName, const BundleConfig &config) { // Load weights. FILE *weightsFile = fopen(weightsFileName, "rb"); if (!weightsFile) { fprintf(stderr, "Could not open the weights file: %s\n", weightsFileName); exit(1); } fseek(weightsFile, 0, SEEK_END); size_t fileSize = ftell(weightsFile); fseek(weightsFile, 0, SEEK_SET); uint8_t *baseConstantWeightVarsAddr = static_cast<uint8_t *>(alignedAlloc(config, fileSize)); printf("Allocated weights of size: %lu\n", fileSize); printf("Expected weights of size: %lu\n", config.constantWeightVarsMemSize); assert(fileSize == config.constantWeightVarsMemSize && "Wrong weights file size"); int result = fread(baseConstantWeightVarsAddr, fileSize, 1, weightsFile); if (result != 1) { perror("Could not read the weights file"); } else { printf("Loaded weights of size: %lu from the file %s\n", fileSize, weightsFileName); } fclose(weightsFile); return baseConstantWeightVarsAddr; }
void * StackAllocator::alloc(uint size_bytes) { //NEWLOG("[StackAllocator] INFO: requested %u bytes.", size_bytes); if(size_bytes > size) { NEWLOG("[StackAllocator] ERROR: this StackAllocator is unable to hold such large objects. %u bytes requested, %u per unit available.", size_bytes, size); return NULL; } uint actual_size = size_bytes; if (actual_size % alignment != 0u) // rounding up to alignment actual_size = ((actual_size / alignment) + 1u) * alignment; if (getMemoryLeftInPresentUnit() < actual_size) { NEWLOG("[StackAllocator] Warning: not enough memory, allocating new Unit, #%u", units.size()); units.push_back(MemoryUnit(alignedAlloc(alignment, size_bytes))); currentUnit = &units[units.size()-1]; } void * result = currentUnit->top; currentUnit->top = (void*) ((uint64) currentUnit->top + actual_size); return result; }
StackAllocator::StackAllocator(uint a_size, uint a_alignment) : size(a_size), alignment(a_alignment) { HASSERT(a_alignment > 0); HASSERT(!(a_alignment & (a_alignment - 1))); //true for powers of two void * new_begin = alignedAlloc(a_alignment, a_size); units.push_back(MemoryUnit(new_begin)); currentUnit = &units[0]; }
// Prepare and initialize uniform buffer containing shader uniforms void prepareUniformBuffers() { // Allocate data for the dynamic uniform buffer object // We allocate this manually as the alignment of the offset differs between GPUs // Calculate required alignment depending on device limits size_t uboAlignment = vulkanDevice->properties.limits.minUniformBufferOffsetAlignment; dynamicAlignment = (sizeof(glm::mat4) / uboAlignment) * uboAlignment + ((sizeof(glm::mat4) % uboAlignment) > 0 ? uboAlignment : 0); size_t bufferSize = OBJECT_INSTANCES * dynamicAlignment; uboDataDynamic.model = (glm::mat4*)alignedAlloc(bufferSize, dynamicAlignment); assert(uboDataDynamic.model); std::cout << "minUniformBufferOffsetAlignment = " << uboAlignment << std::endl; std::cout << "dynamicAlignment = " << dynamicAlignment << std::endl; // Vertex shader uniform buffer block // Static shared uniform buffer object with projection and view matrix VK_CHECK_RESULT(vulkanDevice->createBuffer( VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &uniformBuffers.view, sizeof(uboVS))); // Uniform buffer object with per-object matrices VK_CHECK_RESULT(vulkanDevice->createBuffer( VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &uniformBuffers.dynamic, bufferSize)); // Map persistent VK_CHECK_RESULT(uniformBuffers.view.map()); VK_CHECK_RESULT(uniformBuffers.dynamic.map()); // Prepare per-object matrices with offsets and random rotations std::mt19937 rndGen(static_cast<uint32_t>(time(0))); std::normal_distribution<float> rndDist(-1.0f, 1.0f); for (uint32_t i = 0; i < OBJECT_INSTANCES; i++) { rotations[i] = glm::vec3(rndDist(rndGen), rndDist(rndGen), rndDist(rndGen)) * 2.0f * (float)M_PI; rotationSpeeds[i] = glm::vec3(rndDist(rndGen), rndDist(rndGen), rndDist(rndGen)); } updateUniformBuffers(); updateDynamicUniformBuffer(true); }
static uint8_t *initActivations(const BundleConfig &config) { return static_cast<uint8_t *>( alignedAlloc(config, config.activationsMemSize)); }