inline __device__ float get_feature_value_tex2d( const int x, const int y, const channel_index_t channel_index, const box_coordinate_t box_min_corner_x, const box_coordinate_t box_min_corner_y, const box_coordinate_t box_max_corner_x, const box_coordinate_t box_max_corner_y, const int integral_channels_height) { // if x or y are too high, some of these indices may be fall outside the channel memory const float y_offset = y + channel_index*integral_channels_height; // in CUDA 5 (4.2 ?) references to textures are not allowed, we use macro work around // gpu_integral_channels_2d_texture_t &tex = integral_channels_2d_texture; #define tex integral_channels_2d_texture //const gpu_integral_channels_t::Type // could cause overflows during a + c const float a = tex2D(tex, x + box_min_corner_x, box_min_corner_y + y_offset), // top left b = tex2D(tex, x + box_max_corner_x, box_min_corner_y + y_offset), // top right c = tex2D(tex, x + box_max_corner_x, box_max_corner_y + y_offset), // bottom right d = tex2D(tex, x + box_min_corner_x, box_max_corner_y + y_offset); // bottom left #undef tex const float feature_value = a +c -b -d; return feature_value; }
static D3DPtr<IDirect3DTexture9> LoadDDS(const void* data, size_t size, float sample_ratio, TextureInfo& info) { D3DPtr<IDirect3DTexture9> ret; auto tex = gli::load(static_cast<const char*>(data), size); if (tex.format() == gli::FORMAT_UNDEFINED) return nullptr; auto format = static_cast<D3DFORMAT>(gli::dx{}.translate(tex.format()).D3DFormat); gli::texture2d tex2D(tex); if (tex2D.empty()) return nullptr; auto Width = tex2D.extent().x; auto Height = tex2D.extent().y; auto Levels = tex2D.levels(); // The mipmap level that we will use as the main level for the new texture. // The reason for this is resizing: The mipmaps already contain downsampled // versions, so using one of these avoids a resize step being done at runtime. auto InitialMipmapLevel = GetInitialMipmapLevel(Width, Height, Levels, sample_ratio); if (InitialMipmapLevel > 0) { Width = std::max(Width >> InitialMipmapLevel, 4); Height = std::max(Height >> InitialMipmapLevel, 4); Levels -= InitialMipmapLevel; }
PS_OUT PS_MAIN(PS_IN Input) { PS_OUT Out = (PS_OUT)0; Out.vAlbedo = vector(tex2D(BaseTexture , Input.vTexUV)); return Out; }
__device__ inline bc2s compute_feature_tex(const i_int2& n) { bc2s b; for(int i = 0; i < 8; i ++) { b[i] = tex2D(bc2s_tex_s1, n.c() + circle_r3[i][1], n.r() + circle_r3[i][0]).x; } for(int i = 0; i < 8; i ++) { b[i+8] = tex2D(bc2s_tex_s2, n.c() + circle_r3[i][1] * 2, n.r() + circle_r3[i][0] * 2).x; } return b; }
PS_OUT PS_MAIN(PS_IN In) { PS_OUT Out = (PS_OUT)0; Out.vColor = tex2D(BaseSampler, In.vUV); Out.vColor.a = 0.f; return Out; }
PS_OUT PS_MAIN(PS_IN In) { PS_OUT Out = (PS_OUT)0; Out.vColor = tex2D(BaseSampler, In.vUV); //float fAlpha = Out.vColor.r + Out.vColor.g + Out.vColor.b; return Out; }
__device__ __forceinline__ T operator ()(float y, float x) const { #if CV_CUDEV_ARCH < 300 // Use the texture reference return tex2D(CvCudevTextureRef<T>::ref, x, y); #else // Use the texture object return tex2D<T>(texObj, x, y); #endif }
__device__ inline int distance_tex(const bc2s& a, const i_short2& n, const unsigned scale = 1) { int d = 0; if (scale == 1) { for(int i = 0; i < 8; i ++) { int v = tex2D(bc2s_tex_s1, n.c() + circle_r3[i][1], n.r() + circle_r3[i][0]).x; d += ::abs(v - a[i]); } } //else { for(int i = 0; i < 8; i ++) { int v = tex2D(bc2s_tex_s2, n.c() + circle_r3[i][1] * 2, n.r() + circle_r3[i][0] * 2).x; d += ::abs(v - a[8+i]) * 5; } } // return d / (255.f * 16.f); return d; }
__global__ void convolve_kernel(I, kernel_image2d<O> out, unsigned kernelsize) { i_int2 p = thread_pos2d(); if (!out.has(p)) return; bt_change_vtype(O, type_mult(bt_vtype(O), float)) r = zero(); for(int i = 0; i < kernelsize; i++) { float w = tex1Dfetch(tex_weights, i); point2d<int> n = i_int2(tex1Dfetch(tex_dpoints, i)) + p; if (out.has(n)) r += O(tex2D(conv_input_tex<I>::tex(), n)) * w; } out(p) = r; }
/*! \fn void SeparableFilter::createAndLoadRowShaders_ATI(); \brief Procedurally Generate the Row Filter Shaders */ void SeparableFilter::createAndLoadRowShaders_ATI() { int i,p; for (i=0; i<NUM_FILTER_LEVELS; i++) row_shader[i]=NULL; char fragmentSource[4*4096]; for (p=0;p< num_kernel_levels; p++) { i = kernel_levels[p]; /////////////// Shader for ROW CONVOLUTION WITH BOUNDARY PADDING ////////////////////////////// sprintf(fragmentSource,"\n \ fragout main( vf30 IN, uniform sampler2D texture, \n \ uniform float offset, \n \ uniform float kernel1[%d], \n \ uniform float kernel2[%d]) { \n \ fragout OUT; \n \ float4 vec0, vec1, vec2, vec3, vec4, vec5, acc1, acc2; \n \ float sum1,sum2; \n \ vec0 = float4(kernel2[%d], kernel2[%d], kernel2[%d], 0.0 ); \n \ vec1 = float4(kernel1[%d], kernel1[%d], kernel1[%d], 0.0 ); \n \ vec2.x = tex2D(texture, IN.TEX1.xy).r; \n \ vec2.y = tex2D(texture, IN.TEX0.xy).r; \n \ vec2.z = tex2D(texture, IN.TEX2.xy).r; \n \ vec2.w = 0.0; \n \ acc1 = vec1.xyzw * vec2.xyzw; \n \ acc2 = vec0.xyzw * vec2.xyzw; \n \ sum1 = acc1.r + acc1.g + acc1.b ; \n \ sum2 = acc2.r + acc2.g + acc2.b ; \n \ \n", 2*i+1, 2*i+1, i-1, i, i+1, i-1, i, i+1 ); for (int j = 1; j <= 1; j++) // HACK HACK !!! Save 2 ms by doing only 1 iteration. Actually j <=i/2 { char loopSource[4096]; if (j==1) { sprintf(loopSource,"\n \ vec3 = float4( kernel1[%2d], kernel1[%2d], kernel1[%2d], kernel1[%2d]); \n \ vec5 = float4( kernel2[%2d], kernel2[%2d], kernel2[%2d], kernel2[%2d]); \n \ vec4.x = tex2D(texture, IN.TEX3.xy).r; \n \ vec4.y = tex2D(texture, IN.TEX4.xy).r; \n \ vec4.z = tex2D(texture, IN.TEX5.xy).r; \n \ vec4.w = tex2D(texture, IN.TEX6.xy).r; \n \ acc1 = vec3.xyzw * vec4.xyzw; \n \ acc2 = vec5.xyzw * vec4.xyzw; \n \ sum1 = sum1 + acc1.r + acc1.g + acc1.b + acc1.a; \n \ sum2 = sum2 + acc2.r + acc2.g + acc2.b + acc2.a; \n", i-2*j-1 , i-2*j , i+2*j , i+2*j+1, i-2*j-1 , i-2*j , i+2*j , i+2*j+1, 2*j+1, 2*j , 2*j , 2*j+1); } else { sprintf(loopSource,"\n \ vec3 = float4( kernel1[%2d], kernel1[%2d], kernel1[%2d], kernel1[%2d]); \n \ vec5 = float4( kernel2[%2d], kernel2[%2d], kernel2[%2d], kernel2[%2d]); \n \ vec4.x = tex2D(texture, float2(IN.TEX0.x - %2d*offset , IN.TEX0.y)).r; \n \ vec4.y = tex2D(texture, float2(IN.TEX0.x - %2d*offset , IN.TEX0.y)).r; \n \ vec4.z = tex2D(texture, float2(IN.TEX0.x + %2d*offset , IN.TEX0.y)).r; \n \ vec4.w = tex2D(texture, float2(IN.TEX0.x + %2d*offset , IN.TEX0.y)).r; \n \ acc1 = vec3.xyzw * vec4.xyzw; \n \ acc2 = vec5.xyzw * vec4.xyzw; \n \ sum1 = sum1 + acc1.r + acc1.g + acc1.b + acc1.a; \n \ sum2 = sum2 + acc2.r + acc2.g + acc2.b + acc2.a; \n", i-2*j-1 , i-2*j , i+2*j , i+2*j+1, i-2*j-1 , i-2*j , i+2*j , i+2*j+1, 2*j+1, 2*j , 2*j , 2*j+1); } strcat(fragmentSource,loopSource); } strcat(fragmentSource,"\n \ OUT.col = float4(sum1, 0.5 + 0.5 * sum2 ,1.0,1.0); \n \ return OUT; \n \ } \n");
float4 test(float2 p, float2 d, float range){ if(p.x > range || p.y > range) return float4(1.0,1.0,1.0,1.0); return tex2D(gphTexture0,(p*0.5)/float2(range,range)); }
void loadTexture(std::string fileName, VkFormat format, bool forceLinearTiling) { #if defined(__ANDROID__) // Textures are stored inside the apk on Android (compressed) // So they need to be loaded via the asset manager AAsset* asset = AAssetManager_open(androidApp->activity->assetManager, fileName.c_str(), AASSET_MODE_STREAMING); assert(asset); size_t size = AAsset_getLength(asset); assert(size > 0); void *textureData = malloc(size); AAsset_read(asset, textureData, size); AAsset_close(asset); gli::texture2d tex2D(gli::load((const char*)textureData, size)); #else gli::texture2d tex2D(gli::load(fileName)); #endif assert(!tex2D.empty()); VkFormatProperties formatProperties; texture.width = static_cast<uint32_t>(tex2D[0].extent().x); texture.height = static_cast<uint32_t>(tex2D[0].extent().y); // calculate num of mip maps // numLevels = 1 + floor(log2(max(w, h, d))) // Calculated as log2(max(width, height, depth))c + 1 (see specs) texture.mipLevels = floor(log2(std::max(texture.width, texture.height))) + 1; // Get device properites for the requested texture format vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &formatProperties); // Mip-chain generation requires support for blit source and destination assert(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT); assert(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT); VkMemoryAllocateInfo memAllocInfo = vkTools::initializers::memoryAllocateInfo(); VkMemoryRequirements memReqs = {}; // Create a host-visible staging buffer that contains the raw image data VkBuffer stagingBuffer; VkDeviceMemory stagingMemory; VkBufferCreateInfo bufferCreateInfo = vkTools::initializers::bufferCreateInfo(); bufferCreateInfo.size = tex2D.size(); // This buffer is used as a transfer source for the buffer copy bufferCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, nullptr, &stagingBuffer)); vkGetBufferMemoryRequirements(device, stagingBuffer, &memReqs); memAllocInfo.allocationSize = memReqs.size; memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &stagingMemory)); VK_CHECK_RESULT(vkBindBufferMemory(device, stagingBuffer, stagingMemory, 0)); // Copy texture data into staging buffer uint8_t *data; VK_CHECK_RESULT(vkMapMemory(device, stagingMemory, 0, memReqs.size, 0, (void **)&data)); memcpy(data, tex2D.data(), tex2D.size()); vkUnmapMemory(device, stagingMemory); // Create optimal tiled target image VkImageCreateInfo imageCreateInfo = vkTools::initializers::imageCreateInfo(); imageCreateInfo.imageType = VK_IMAGE_TYPE_2D; imageCreateInfo.format = format; imageCreateInfo.mipLevels = texture.mipLevels; imageCreateInfo.arrayLayers = 1; imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; imageCreateInfo.extent = { texture.width, texture.height, 1 }; imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, nullptr, &texture.image)); vkGetImageMemoryRequirements(device, texture.image, &memReqs); memAllocInfo.allocationSize = memReqs.size; memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &texture.deviceMemory)); VK_CHECK_RESULT(vkBindImageMemory(device, texture.image, texture.deviceMemory, 0)); VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); VkImageSubresourceRange subresourceRange = {}; subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; subresourceRange.levelCount = 1; subresourceRange.layerCount = 1; // Optimal image will be used as destination for the copy, so we must transfer from our initial undefined image layout to the transfer destination layout vkTools::setImageLayout(copyCmd, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresourceRange); // Copy the first mip of the chain, remaining mips will be generated VkBufferImageCopy bufferCopyRegion = {}; bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; bufferCopyRegion.imageSubresource.mipLevel = 0; bufferCopyRegion.imageSubresource.baseArrayLayer = 0; bufferCopyRegion.imageSubresource.layerCount = 1; bufferCopyRegion.imageExtent.width = texture.width; bufferCopyRegion.imageExtent.height = texture.height; bufferCopyRegion.imageExtent.depth = 1; vkCmdCopyBufferToImage(copyCmd, stagingBuffer, texture.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion); // Transition first mip level to transfer source for read during blit texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; vkTools::setImageLayout( copyCmd, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresourceRange); VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true); // Clean up staging resources vkFreeMemory(device, stagingMemory, nullptr); vkDestroyBuffer(device, stagingBuffer, nullptr); // Generate the mip chain // --------------------------------------------------------------- // We copy down the whole mip chain doing a blit from mip-1 to mip // An alternative way would be to always blit from the first mip level and sample that one down VkCommandBuffer blitCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); // Copy down mips from n-1 to n for (int32_t i = 1; i < texture.mipLevels; i++) { VkImageBlit imageBlit{}; // Source imageBlit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imageBlit.srcSubresource.layerCount = 1; imageBlit.srcSubresource.mipLevel = i-1; imageBlit.srcOffsets[1].x = int32_t(texture.width >> (i - 1)); imageBlit.srcOffsets[1].y = int32_t(texture.height >> (i - 1)); imageBlit.srcOffsets[1].z = 1; // Destination imageBlit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imageBlit.dstSubresource.layerCount = 1; imageBlit.dstSubresource.mipLevel = i; imageBlit.dstOffsets[1].x = int32_t(texture.width >> i); imageBlit.dstOffsets[1].y = int32_t(texture.height >> i); imageBlit.dstOffsets[1].z = 1; VkImageSubresourceRange mipSubRange = {}; mipSubRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; mipSubRange.baseMipLevel = i; mipSubRange.levelCount = 1; mipSubRange.layerCount = 1; // Transiton current mip level to transfer dest vkTools::setImageLayout( blitCmd, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipSubRange, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT); // Blit from previous level vkCmdBlitImage( blitCmd, texture.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &imageBlit, VK_FILTER_LINEAR); // Transiton current mip level to transfer source for read in next iteration vkTools::setImageLayout( blitCmd, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, mipSubRange, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); } // After the loop, all mip layers are in TRANSFER_SRC layout, so transition all to SHADER_READ subresourceRange.levelCount = texture.mipLevels; vkTools::setImageLayout( blitCmd, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture.imageLayout, subresourceRange); VulkanExampleBase::flushCommandBuffer(blitCmd, queue, true); // --------------------------------------------------------------- // Create samplers samplers.resize(3); VkSamplerCreateInfo sampler = vkTools::initializers::samplerCreateInfo(); sampler.magFilter = VK_FILTER_LINEAR; sampler.minFilter = VK_FILTER_LINEAR; sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; sampler.mipLodBias = 0.0f; sampler.compareOp = VK_COMPARE_OP_NEVER; sampler.minLod = 0.0f; sampler.maxLod = 0.0f; sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; sampler.maxAnisotropy = 1.0; sampler.anisotropyEnable = VK_FALSE; // Without mip mapping VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &samplers[0])); // With mip mapping sampler.maxLod = (float)texture.mipLevels; VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &samplers[1])); // With mip mapping and anisotropic filtering if (vulkanDevice->features.samplerAnisotropy) { sampler.maxAnisotropy = vulkanDevice->properties.limits.maxSamplerAnisotropy; sampler.anisotropyEnable = VK_TRUE; } VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &samplers[2])); // Create image view VkImageViewCreateInfo view = vkTools::initializers::imageViewCreateInfo(); view.image = texture.image; view.viewType = VK_IMAGE_VIEW_TYPE_2D; view.format = format; view.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; view.subresourceRange.baseMipLevel = 0; view.subresourceRange.baseArrayLayer = 0; view.subresourceRange.layerCount = 1; view.subresourceRange.levelCount = texture.mipLevels; VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &texture.view)); }
void loadTexture(const char* fileName, VkFormat format, bool forceLinearTiling) { VkFormatProperties formatProperties; VkResult err; AAsset* asset = AAssetManager_open(app->activity->assetManager, fileName, AASSET_MODE_STREAMING); assert(asset); size_t size = AAsset_getLength(asset); assert(size > 0); void *textureData = malloc(size); AAsset_read(asset, textureData, size); AAsset_close(asset); gli::texture2D tex2D(gli::load((const char*)textureData, size)); assert(!tex2D.empty()); texture.width = tex2D[0].dimensions().x; texture.height = tex2D[0].dimensions().y; texture.mipLevels = tex2D.levels(); // Get device properites for the requested texture format vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &formatProperties); // Only use linear tiling if requested (and supported by the device) // Support for linear tiling is mostly limited, so prefer to use // optimal tiling instead // On most implementations linear tiling will only support a very // limited amount of formats and features (mip maps, cubemaps, arrays, etc.) VkBool32 useStaging = true; // Only use linear tiling if forced if (forceLinearTiling) { // Don't use linear if format is not supported for (linear) shader sampling useStaging = !(formatProperties.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); } VkImageCreateInfo imageCreateInfo = vkTools::initializers::imageCreateInfo(); imageCreateInfo.imageType = VK_IMAGE_TYPE_2D; imageCreateInfo.format = format; imageCreateInfo.mipLevels = 1; imageCreateInfo.arrayLayers = 1; imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; imageCreateInfo.tiling = VK_IMAGE_TILING_LINEAR; imageCreateInfo.usage = (useStaging) ? VK_IMAGE_USAGE_TRANSFER_SRC_BIT : VK_IMAGE_USAGE_SAMPLED_BIT; imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; imageCreateInfo.flags = 0; imageCreateInfo.extent = { texture.width, texture.height, 1 }; VkMemoryAllocateInfo memAllocInfo = vkTools::initializers::memoryAllocateInfo(); VkMemoryRequirements memReqs; startSetupCommandBuffer(); if (useStaging) { // Load all available mip levels into linear textures // and copy to optimal tiling target struct MipLevel { VkImage image; VkDeviceMemory memory; }; std::vector<MipLevel> mipLevels; mipLevels.resize(texture.mipLevels); // Copy mip levels for (uint32_t level = 0; level < texture.mipLevels; ++level) { imageCreateInfo.extent.width = tex2D[level].dimensions().x; imageCreateInfo.extent.height = tex2D[level].dimensions().y; imageCreateInfo.extent.depth = 1; err = vkCreateImage(device, &imageCreateInfo, nullptr, &mipLevels[level].image); assert(!err); vkGetImageMemoryRequirements(device, mipLevels[level].image, &memReqs); memAllocInfo.allocationSize = memReqs.size; getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &memAllocInfo.memoryTypeIndex); err = vkAllocateMemory(device, &memAllocInfo, nullptr, &mipLevels[level].memory); assert(!err); err = vkBindImageMemory(device, mipLevels[level].image, mipLevels[level].memory, 0); assert(!err); VkImageSubresource subRes = {}; subRes.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; VkSubresourceLayout subResLayout; void *data; vkGetImageSubresourceLayout(device, mipLevels[level].image, &subRes, &subResLayout); assert(!err); err = vkMapMemory(device, mipLevels[level].memory, 0, memReqs.size, 0, &data); assert(!err); size_t levelSize = tex2D[level].size(); memcpy(data, tex2D[level].data(), levelSize); vkUnmapMemory(device, mipLevels[level].memory); LOGW("setImageLayout %d", 1); // Image barrier for linear image (base) // Linear image will be used as a source for the copy vkTools::setImageLayout( setupCmdBuffer, mipLevels[level].image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); } // Setup texture as blit target with optimal tiling imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; imageCreateInfo.mipLevels = texture.mipLevels; imageCreateInfo.extent = { texture.width, texture.height, 1 }; err = vkCreateImage(device, &imageCreateInfo, nullptr, &texture.image); assert(!err); vkGetImageMemoryRequirements(device, texture.image, &memReqs); memAllocInfo.allocationSize = memReqs.size; getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &memAllocInfo.memoryTypeIndex); err = vkAllocateMemory(device, &memAllocInfo, nullptr, &texture.deviceMemory); assert(!err); err = vkBindImageMemory(device, texture.image, texture.deviceMemory, 0); assert(!err); // Image barrier for optimal image (target) // Optimal image will be used as destination for the copy vkTools::setImageLayout( setupCmdBuffer, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); // Copy mip levels one by one for (uint32_t level = 0; level < texture.mipLevels; ++level) { // Copy region for image blit VkImageCopy copyRegion = {}; copyRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; copyRegion.srcSubresource.baseArrayLayer = 0; copyRegion.srcSubresource.mipLevel = 0; copyRegion.srcSubresource.layerCount = 1; copyRegion.srcOffset = { 0, 0, 0 }; copyRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; copyRegion.dstSubresource.baseArrayLayer = 0; // Set mip level to copy the linear image to copyRegion.dstSubresource.mipLevel = level; copyRegion.dstSubresource.layerCount = 1; copyRegion.dstOffset = { 0, 0, 0 }; copyRegion.extent.width = tex2D[level].dimensions().x; copyRegion.extent.height = tex2D[level].dimensions().y; copyRegion.extent.depth = 1; // Put image copy into command buffer vkCmdCopyImage( setupCmdBuffer, mipLevels[level].image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©Region); // Change texture image layout to shader read after the copy texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; vkTools::setImageLayout( setupCmdBuffer, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, texture.imageLayout); } // Clean up linear images // No longer required after mip levels // have been transformed over to optimal tiling for (auto& level : mipLevels) { vkDestroyImage(device, level.image, nullptr); vkFreeMemory(device, level.memory, nullptr); } } else { // Prefer using optimal tiling, as linear tiling // may support only a small set of features // depending on implementation (e.g. no mip maps, only one layer, etc.) VkImage mappableImage; VkDeviceMemory mappableMemory; // Load mip map level 0 to linear tiling image err = vkCreateImage(device, &imageCreateInfo, nullptr, &mappableImage); assert(!err); // Get memory requirements for this image // like size and alignment vkGetImageMemoryRequirements(device, mappableImage, &memReqs); // Set memory allocation size to required memory size memAllocInfo.allocationSize = memReqs.size; // Get memory type that can be mapped to host memory getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &memAllocInfo.memoryTypeIndex); // Allocate host memory err = vkAllocateMemory(device, &memAllocInfo, nullptr, &mappableMemory); assert(!err); // Bind allocated image for use err = vkBindImageMemory(device, mappableImage, mappableMemory, 0); assert(!err); // Get sub resource layout // Mip map count, array layer, etc. VkImageSubresource subRes = {}; subRes.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; VkSubresourceLayout subResLayout; void *data; // Get sub resources layout // Includes row pitch, size offsets, etc. vkGetImageSubresourceLayout(device, mappableImage, &subRes, &subResLayout); assert(!err); // Map image memory err = vkMapMemory(device, mappableMemory, 0, memReqs.size, 0, &data); assert(!err); // Copy image data into memory memcpy(data, tex2D[subRes.mipLevel].data(), tex2D[subRes.mipLevel].size()); vkUnmapMemory(device, mappableMemory); // Linear tiled images don't need to be staged // and can be directly used as textures texture.image = mappableImage; texture.deviceMemory = mappableMemory; texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; // Setup image memory barrier vkTools::setImageLayout( setupCmdBuffer, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, texture.imageLayout); } flushSetupCommandBuffer(); // Create sampler // In Vulkan textures are accessed by samplers // This separates all the sampling information from the // texture data // This means you could have multiple sampler objects // for the same texture with different settings // Similar to the samplers available with OpenGL 3.3 VkSamplerCreateInfo sampler = vkTools::initializers::samplerCreateInfo(); sampler.magFilter = VK_FILTER_LINEAR; sampler.minFilter = VK_FILTER_LINEAR; sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; sampler.addressModeV = sampler.addressModeU; sampler.addressModeW = sampler.addressModeU; sampler.mipLodBias = 0.0f; sampler.compareOp = VK_COMPARE_OP_NEVER; sampler.minLod = 0.0f; // Max level-of-detail should match mip level count sampler.maxLod = (useStaging) ? (float)texture.mipLevels : 0.0f; // Enable anisotropic filtering sampler.maxAnisotropy = 8; sampler.anisotropyEnable = VK_TRUE; sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; err = vkCreateSampler(device, &sampler, nullptr, &texture.sampler); assert(!err); // Create image view // Textures are not directly accessed by the shaders and // are abstracted by image views containing additional // information and sub resource ranges VkImageViewCreateInfo view = vkTools::initializers::imageViewCreateInfo(); view.image = VK_NULL_HANDLE; view.viewType = VK_IMAGE_VIEW_TYPE_2D; view.format = format; view.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; view.subresourceRange.baseMipLevel = 0; view.subresourceRange.baseArrayLayer = 0; view.subresourceRange.layerCount = 1; // Linear tiling usually won't support mip maps // Only set mip map count if optimal tiling is used view.subresourceRange.levelCount = (useStaging) ? texture.mipLevels : 1; view.image = texture.image; err = vkCreateImageView(device, &view, nullptr, &texture.view); assert(!err); }
PS_OUTPUT ps_main(PS_INPUT input ) { \n\ PS_OUTPUT output; \n\ float4 texColor = tex2D(Tex0, input.TexCoord); \n\ output.Color = Color * texColor; \n\ return output; \n\ } \n";
void FFAST4(thread_info<CPU> ti, kernel_image2d<i_float4> frame_color, kernel_image2d<V> frame_s1, kernel_image2d<V> frame_s2, // kernel_image2d<dffast4> out, kernel_image2d<i_float1> pertinence, float grad_thresh) { point2d<int> p = thread_pos2d(ti); if (!frame_s1.has(p))//; || track(p).x == 0) return; if (p.row() < 6 || p.row() >= pertinence.domain().nrows() - 6 || p.col() < 6 || p.col() >= pertinence.domain().ncols() - 6) { pertinence(p).x = 0.f; return; } gl01f pv; { float min_diff = 9999999.f; float max_single_diff = 0.f; pv = V(tex2D(flag<CPU>(), s1_tex, frame_s1, p)); int sign = 0; for(int i = 0; i < 8; i++) { gl01f v1 = V(tex2D(flag<CPU>(), s1_tex, frame_s1, p.col() + circle_r3_h[i][1], p.row() + circle_r3_h[i][0])); gl01f v2 = V(tex2D(flag<CPU>(), s1_tex, frame_s1, p.col() + circle_r3_h[(i+8)][1], p.row() + circle_r3_h[(i+8)][0])); { float diff = pv - (v1 + v2) / 2.f; float adiff = fabs(diff); if (adiff < min_diff) { min_diff = adiff; if (min_diff < 0.001f) break; } if (max_single_diff < adiff) max_single_diff = adiff; } } pv = V(tex2D(flag<CPU>(), s2_tex, frame_s2, p.col()/2, p.row()/2)); float min_diff_large = 9999999.f; float max_single_diff_large = 0.f; //int min_orientation_large; for(int i = 0; i < 8; i++) { gl01f v1 = V(tex2D(flag<CPU>(), s2_tex, frame_s2, p.col()/2 + circle_r3_h[i][1], p.row()/2 + circle_r3_h[i][0])); gl01f v2 = V(tex2D(flag<CPU>(), s2_tex, frame_s2, p.col()/2 + circle_r3_h[(i+8)][1], p.row()/2 + circle_r3_h[(i+8)][0])); { float diff = pv - (v1 + v2) / 2.f; float adiff = fabs(diff); if (adiff < min_diff_large) { min_diff_large = adiff; if (min_diff_large < 0.001f) break; } if (max_single_diff_large < adiff) max_single_diff_large = adiff; } } if (min_diff < min_diff_large) { min_diff = min_diff_large; max_single_diff = max_single_diff_large; } if (max_single_diff >= grad_thresh) { min_diff = min_diff / max_single_diff; } else min_diff = 0; pertinence(p) = min_diff; // pertinence(p) = p.col() / float(frame_s1.ncols()); // pertinence(p) = float(frame_s1(p)) / 255.f; // out(p) = distances; } }
void main (void) { vec2 MotionBlurPos=vec2 (0.0,0.0); //float ff=tex2D (texture2d_1,oUV.xy).x; //ff=abs (ff-cl.x); //ff=0.5+0.5*sin(ff*10000.0); //cl=ff*vec4 (1.0); /* float PI=3.1415926535; float2 UV=oUV.xy; UV-=float2 (0.5,0.5); UV*=2.0; float3 DirV =float3 (UV.x,UV.y,0.6); float lgn=length (DirV); DirV/=lgn; UV.xy=UV.xy+(DirV.xy-UV.xy)*-0.0; UV*=0.5; UV+=float2 (0.5,0.5); */ float2 UV=oUV.xy; float4 cl=tex2D (texture2d_1,UV.xy); cl=float4 (0.0); float3 Direction=tex2D (texture2d_velocities,UV.xy).xyz; //Direction.xy=float2 (0.5); Direction.xyz-=float3 (0.5); Direction*=2.0; float alpha=1.0*length (Direction.xyz); alpha=clamp (alpha,0.0,1.0); //alpha=1-alpha; //alpha*=alpha; //alpha=1-alpha; //alpha*=alpha; //alpha=0.1; Direction*=0.01; if (alpha>0.01) { for (int t=0;t<10;t++) { UV.xy-=Direction.xy; cl+=tex2D (texture2d_1,UV.xy); }; cl*=1.0/10.0; //cl.x+=0.15*alpha; //=float4 (1.0,0.0,0.0,1.0)*alpha; } else cl=tex2D (texture2d_1,UV.xy); float4 clnow=tex2D( texture2d_1,oUV.xy); cl=cl+(clnow-cl)*0.5; UV=oUV.xy; // blur effect float lgn2=length (float2 (1.0,2.0)*(UV.xy-float2 (0.5))); lgn2*=lgn2; float xxx=1.0+(gl_Color.x)*5.0; //xxx=1.0; xxx=clamp (xxx,1.0,100.0); //pow(lgn2,1.8); int count=0; { for (int y=-3;y<3;y++) { for (int x=-3;x<3;x++){ count++; float2 dd=float2 (x,y); if ( (x)==(y)) { //dd*=2.0; } float2 Dist=float2 (x,y)*0.0025*lgn2; Dist=dd*0.0018*lgn2*xxx; Dist+=MotionBlurPos; MotionBlurPos+=MotionBlur.xy*0.01; cl+=tex2D(texture2d_1,UV.xy+Dist); } } cl/=float (count)*(lgn2+(1.0-lgn2)*0.85); } vec4 cll=cl*1.03; cll+=gl_Color; //cll=tex2D (texture2d_1,oUV.xy); /* //Depth blurring float dpth=0.0; count=0; float Limit2=1.0-Limit; for (int y=-4;y<5;y++) { for (int x=-4;x<5;x++){ count++; float2 dd=float2 (x,y)*0.7*Limit2; //dd=ray.xy*5.0; float2 Dist=dd*0.0018; dpth+=tex2D(texture2d_2,UV.xy+Dist).x; } } dpth/=float (count); //dpth=sin(dpth*3.14159); //dpth+=0.1; dpth=pow(dpth,126.0); //dpth+=Limit; dpth=1.0-1.0*dpth; //if (dpth<0.7) dpth=0.0; count=0; cl=vec4(0.0); dpth=clamp(dpth,0.0,1.0); dpth=Limit+((1.0-Limit)-(Limit))*dpth; dpth*=Flo; //dpth=1.0; float vel=tex2D(texture2d_velocities,UV.xy).x; if (dpth>0.02) { for (int y=-4;y<5;y++) { for (int x=-4;x<5;x++){ count++; float2 dd=float2 (x,y)*0.7;//;*vec2 (3.0,3.0); //float2 dd2=float((x+4)*9+(y+4))*float2 (-1.0,1.0)*0.4; //dd=dd+(dd2-dd)*-vel*0.4; float2 Dist=dd*0.0018*dpth; cl+=tex2D(texture2d_1,UV.xy+Dist); //cl.x=dpth; } } cl/=float (count); } else cl=tex2D(texture2d_1,UV.xy); if (vel>0.01) { vec4 newcl=vec4 (0.0); for (int x=0;x<15;x++){ float2 dd2=float(x)*float2 (-1.0,1.0)*0.003*vel; newcl+=tex2D(texture2d_1,UV.xy+dd2); } newcl*=1.0/15.0; cl=cl+(newcl-cl)*0.5; } //cl*=0.4; //cl+=tex2D(texture2d_velocities,UV.xy)*0.5; //cl=tex2D(texture2d_1,UV.xy); //cl=vec4 (dpth); //cl=vec4 (tex2D(texture2d_2,UV.xy).x); */ cl=cll; gl_FragData[0]=cl; gl_FragData[1]=cl; }
//-- CgShaderCelScreen::CgShaderCelScreen(CGcontext cgContext) : CgShader(cgContext, CgShader::VertexFragment) { // [rad] This is vertex and fragment shader // [rad] Setup vertex shader: // [rad] Setup vertex shader entry point SetEntry(CgShader::Vertex, "main"); static const std::string sProgramVertex = " \ \ void main( float4 inPosition : POSITION, \ float2 inTexCoord : TEXCOORD0, \ \ out float4 outPosition : POSITION, \ out float2 outTexCoord : TEXCOORD0, \ \ uniform float4x4 uniModelViewProjMat) \ { \ outPosition = mul(uniModelViewProjMat, inPosition); \ outTexCoord = inTexCoord; \ } "; // [rad] Setup vertex shader program SetProgram(CgShader::Vertex, sProgramVertex); // [rad] Setup fragment shader: // [rad] Setup fragment shader entry point SetEntry(CgShader::Fragment, "main"); static const std::string sProgramFragment = " \ \ void main( float2 inTexCoord : TEXCOORD0, \ \ out float4 outColor : COLOR, \ \ uniform int2 uniTexSize, \ uniform sampler2D uniTexCel, \ uniform sampler2D uniTexOutline) \ { \ float4 cel = tex2D(uniTexCel, inTexCoord); \ float4 outline = tex2D(uniTexOutline, inTexCoord); \ \ if(outline.x == 0) \ { \ outColor = float4(0, 0, 0, 1); \ } \ else \ { \ outColor = cel; \ } \ } "; // [rad] Setup fragment shader program SetProgram(CgShader::Fragment, sProgramFragment); // [rad] Create shaders (both fragment and vertex) Create(); // [rad] Set params m_cgParamModelViewProjMatrix = cgGetNamedParameter(m_cgShaderVertex, "uniModelViewProjMat"); m_cgParamSamplerCel = cgGetNamedParameter(m_cgShaderFragment, "uniTexCel"); m_cgParamSamplerOutline = cgGetNamedParameter(m_cgShaderFragment, "uniTexOutline"); m_cgParamSamplerSize = cgGetNamedParameter(m_cgShaderFragment, "uniTexSize"); }
__device__ void FFAST4(thread_info<GPU> ti, kernel_image2d<i_float4> frame_color, kernel_image2d<V> frame_s1, kernel_image2d<V> frame_s2, kernel_image2d<i_float1> pertinence, float grad_thresh) { point2d<int> p = thread_pos2d(ti); if (!frame_s1.has(p))//; || track(p).x == 0) return; if (p.row() < 6 || p.row() >= pertinence.domain().nrows() - 6 || p.col() < 6 || p.col() >= pertinence.domain().ncols() - 6) { pertinence(p).x = 0.f; return; } float pv; { float min_diff = 9999999.f; float max_single_diff = 0.f; pv = tex2D(flag<GPU>(), s1_tex, frame_s1, p).x; int sign = 0; for(int i = 0; i < 8; i++) { gl01f v1 = tex2D(flag<GPU>(), s1_tex, frame_s1, p.col() + circle_r3[i][1], p.row() + circle_r3[i][0]).x; gl01f v2 = tex2D(flag<GPU>(), s1_tex, frame_s1, p.col() + circle_r3[(i+8)][1], p.row() + circle_r3[(i+8)][0]).x; { float diff = pv - (v1 + v2) / 2.f; float adiff = fabs(diff); if (adiff < min_diff) { min_diff = adiff; if (min_diff < 0.01) { min_diff = 0; break; } } if (max_single_diff < adiff) max_single_diff = adiff; } } pv = tex2D(flag<GPU>(), s2_tex, frame_s2, p.col()/2, p.row()/2).x; float min_diff_large = 9999999.f; float max_single_diff_large = 0.f; //int min_orientation_large; for(int i = 0; i < 8; i++) { gl01f v1 = tex2D(flag<GPU>(), s2_tex, frame_s2, p.col()/2 + circle_r3[i][1], p.row()/2 + circle_r3[i][0]); gl01f v2 = tex2D(flag<GPU>(), s2_tex, frame_s2, p.col()/2 + circle_r3[(i+8)][1], p.row()/2 + circle_r3[(i+8)][0]); { float diff = pv - (v1 + v2) / 2.f; float adiff = fabs(diff); if (adiff < min_diff_large) { min_diff_large = adiff; if (min_diff_large < 0.01) { min_diff_large = 0; break; } } if (max_single_diff_large < adiff) max_single_diff_large = adiff; } } if (min_diff < min_diff_large) { min_diff = min_diff_large; max_single_diff = max_single_diff_large; } if (max_single_diff >= grad_thresh) { min_diff = min_diff / max_single_diff; } else min_diff = 0; pertinence(p) = min_diff; } }