コード例 #1
0
inline
__device__
float get_feature_value_tex2d(
        const int x, const int y,
        const channel_index_t channel_index,
        const box_coordinate_t box_min_corner_x,
        const box_coordinate_t box_min_corner_y,
        const box_coordinate_t box_max_corner_x,
        const box_coordinate_t box_max_corner_y,
        const int integral_channels_height)
{
    // if x or y are too high, some of these indices may be fall outside the channel memory
    const float y_offset = y + channel_index*integral_channels_height;

    // in CUDA 5 (4.2 ?) references to textures are not allowed, we use macro work around
    //    gpu_integral_channels_2d_texture_t &tex = integral_channels_2d_texture;
#define tex integral_channels_2d_texture

    //const gpu_integral_channels_t::Type  // could cause overflows during a + c
    const float
            a = tex2D(tex, x + box_min_corner_x, box_min_corner_y + y_offset), // top left
            b = tex2D(tex, x + box_max_corner_x, box_min_corner_y + y_offset), // top right
            c = tex2D(tex, x + box_max_corner_x, box_max_corner_y + y_offset), // bottom right
            d = tex2D(tex, x + box_min_corner_x, box_max_corner_y + y_offset); // bottom left
#undef tex

    const float feature_value = a +c -b -d;
    return feature_value;
}
コード例 #2
0
ファイル: TextureLoader.cpp プロジェクト: Asunaya/RefinedGunz
static D3DPtr<IDirect3DTexture9> LoadDDS(const void* data, size_t size,
	float sample_ratio, TextureInfo& info)
{
	D3DPtr<IDirect3DTexture9> ret;

	auto tex = gli::load(static_cast<const char*>(data), size);
	if (tex.format() == gli::FORMAT_UNDEFINED)
		return nullptr;
	auto format = static_cast<D3DFORMAT>(gli::dx{}.translate(tex.format()).D3DFormat);

	gli::texture2d tex2D(tex);
	if (tex2D.empty())
		return nullptr;

	auto Width = tex2D.extent().x;
	auto Height = tex2D.extent().y;
	auto Levels = tex2D.levels();

	// The mipmap level that we will use as the main level for the new texture.
	// The reason for this is resizing: The mipmaps already contain downsampled
	// versions, so using one of these avoids a resize step being done at runtime.
	auto InitialMipmapLevel = GetInitialMipmapLevel(Width, Height, Levels, sample_ratio);
	if (InitialMipmapLevel > 0)
	{
		Width = std::max(Width >> InitialMipmapLevel, 4);
		Height = std::max(Height >> InitialMipmapLevel, 4);
		Levels -= InitialMipmapLevel;
	}
コード例 #3
0
PS_OUT PS_MAIN(PS_IN Input)
{
	PS_OUT		Out = (PS_OUT)0;

	Out.vAlbedo = vector(tex2D(BaseTexture , Input.vTexUV));
	return Out;
}
コード例 #4
0
ファイル: bc2s_feature.hpp プロジェクト: syjman/cuimg
    __device__ inline bc2s
    compute_feature_tex(const i_int2& n)
    {
      bc2s b;

      for(int i = 0; i < 8; i ++)
      {
	b[i] = tex2D(bc2s_tex_s1, n.c() + circle_r3[i][1], n.r() + circle_r3[i][0]).x;
      }

      for(int i = 0; i < 8; i ++)
      {
        b[i+8] = tex2D(bc2s_tex_s2, n.c() + circle_r3[i][1] * 2, n.r() + circle_r3[i][0] * 2).x;
      }

      return b;
    }
コード例 #5
0
ファイル: Shader_Default.hpp プロジェクト: chanona/Guardians
PS_OUT PS_MAIN(PS_IN In)
{
	PS_OUT		Out = (PS_OUT)0;

	Out.vColor = tex2D(BaseSampler, In.vUV);

	Out.vColor.a = 0.f;

	return Out;
}
コード例 #6
0
ファイル: Shader_UI.hpp プロジェクト: chanona/Guardians
PS_OUT PS_MAIN(PS_IN In)
{
	PS_OUT		Out = (PS_OUT)0;

	Out.vColor = tex2D(BaseSampler, In.vUV);

	//float	fAlpha = Out.vColor.r + Out.vColor.g + Out.vColor.b;

	return Out;
}
コード例 #7
0
ファイル: texture.hpp プロジェクト: Achraf33/opencv
 __device__ __forceinline__ T operator ()(float y, float x) const
 {
 #if CV_CUDEV_ARCH < 300
     // Use the texture reference
     return tex2D(CvCudevTextureRef<T>::ref, x, y);
 #else
     // Use the texture object
     return tex2D<T>(texObj, x, y);
 #endif
 }
コード例 #8
0
ファイル: bc2s_feature.hpp プロジェクト: syjman/cuimg
    __device__ inline int
    distance_tex(const bc2s& a, const i_short2& n, const unsigned scale = 1)
    {
      int d = 0;
      if (scale == 1)
      {
	for(int i = 0; i < 8; i ++)
	{
	  int v = tex2D(bc2s_tex_s1, n.c() + circle_r3[i][1], n.r() + circle_r3[i][0]).x;
	  d += ::abs(v - a[i]);
	}
      }
      //else
      {
	for(int i = 0; i < 8; i ++)
	{
	  int v = tex2D(bc2s_tex_s2, n.c() + circle_r3[i][1] * 2, n.r() + circle_r3[i][0] * 2).x;
	  d += ::abs(v - a[8+i]) * 5;
	}
      }

      // return d / (255.f * 16.f);
      return d;
    }
コード例 #9
0
ファイル: convolve.hpp プロジェクト: RobertoMalatesta/cuimg
    __global__ void convolve_kernel(I, kernel_image2d<O> out, unsigned kernelsize)
    {
      i_int2 p = thread_pos2d();

      if (!out.has(p))
        return;

      bt_change_vtype(O, type_mult(bt_vtype(O), float)) r  = zero();
      for(int i = 0; i < kernelsize; i++)
      {
        float w = tex1Dfetch(tex_weights, i);
        point2d<int> n = i_int2(tex1Dfetch(tex_dpoints, i)) + p;
        if (out.has(n))
          r += O(tex2D(conv_input_tex<I>::tex(), n)) * w;
      }
      out(p) = r;
    }
コード例 #10
0
/*!
 \fn void SeparableFilter::createAndLoadRowShaders_ATI();
 \brief Procedurally Generate the Row Filter Shaders
 */
void SeparableFilter::createAndLoadRowShaders_ATI() 
{
  int i,p;

  for (i=0; i<NUM_FILTER_LEVELS; i++)
	  row_shader[i]=NULL;

  char fragmentSource[4*4096];

  for (p=0;p< num_kernel_levels; p++) 
  {
	i = kernel_levels[p];
	/////////////// Shader for ROW CONVOLUTION WITH BOUNDARY PADDING //////////////////////////////
	sprintf(fragmentSource,"\n \
		fragout main( vf30 IN, uniform sampler2D texture,                \n \
					  uniform float offset,                             \n \
					  uniform float kernel1[%d],                         \n \
					  uniform float kernel2[%d]) {                       \n \
		fragout OUT;                                                     \n \
		float4  vec0, vec1, vec2, vec3, vec4, vec5, acc1, acc2;          \n \
		float   sum1,sum2;                                               \n \
		vec0   = float4(kernel2[%d], kernel2[%d], kernel2[%d], 0.0 );    \n \
		vec1   = float4(kernel1[%d], kernel1[%d], kernel1[%d], 0.0 );    \n \
		vec2.x = tex2D(texture, IN.TEX1.xy).r;                           \n \
		vec2.y = tex2D(texture, IN.TEX0.xy).r;                           \n \
		vec2.z = tex2D(texture, IN.TEX2.xy).r;                           \n \
		vec2.w = 0.0;                                                    \n \
		acc1    = vec1.xyzw * vec2.xyzw;                                 \n \
		acc2    = vec0.xyzw * vec2.xyzw;                                 \n \
		sum1    = acc1.r + acc1.g + acc1.b ;                             \n \
		sum2    = acc2.r + acc2.g + acc2.b ;                             \n \
																		 \n",
			2*i+1, 2*i+1, 
			i-1, i, i+1, 
			i-1, i, i+1 );
	
	for (int j = 1; j <= 1; j++) //  HACK HACK !!! Save 2 ms by doing only 1 iteration. Actually j <=i/2
	{                                                        
      char loopSource[4096];                              
      if (j==1)
	  {
	  sprintf(loopSource,"\n \
		vec3   = float4( kernel1[%2d], kernel1[%2d], kernel1[%2d], kernel1[%2d]);  \n \
		vec5   = float4( kernel2[%2d], kernel2[%2d], kernel2[%2d], kernel2[%2d]);  \n \
		vec4.x = tex2D(texture, IN.TEX3.xy).r;									   \n \
		vec4.y = tex2D(texture, IN.TEX4.xy).r;								       \n \
		vec4.z = tex2D(texture, IN.TEX5.xy).r;									   \n \
		vec4.w = tex2D(texture, IN.TEX6.xy).r;									   \n \
		acc1    = vec3.xyzw * vec4.xyzw;                                           \n \
		acc2    = vec5.xyzw * vec4.xyzw;                                           \n \
		sum1    = sum1 + acc1.r + acc1.g + acc1.b + acc1.a;                        \n \
		sum2    = sum2 + acc2.r + acc2.g + acc2.b + acc2.a; \n",
			i-2*j-1 , i-2*j , i+2*j  , i+2*j+1, 
			i-2*j-1 , i-2*j , i+2*j  , i+2*j+1,
			2*j+1,    2*j ,   2*j ,   2*j+1);
	  }
	  else 
	  {
		sprintf(loopSource,"\n \
		vec3   = float4( kernel1[%2d], kernel1[%2d], kernel1[%2d], kernel1[%2d]);  \n \
		vec5   = float4( kernel2[%2d], kernel2[%2d], kernel2[%2d], kernel2[%2d]);  \n \
		vec4.x = tex2D(texture, float2(IN.TEX0.x - %2d*offset , IN.TEX0.y)).r;     \n \
		vec4.y = tex2D(texture, float2(IN.TEX0.x - %2d*offset , IN.TEX0.y)).r;     \n \
		vec4.z = tex2D(texture, float2(IN.TEX0.x + %2d*offset , IN.TEX0.y)).r;     \n \
		vec4.w = tex2D(texture, float2(IN.TEX0.x + %2d*offset , IN.TEX0.y)).r;     \n \
		acc1    = vec3.xyzw * vec4.xyzw;                                           \n \
		acc2    = vec5.xyzw * vec4.xyzw;                                           \n \
		sum1    = sum1 + acc1.r + acc1.g + acc1.b + acc1.a;                        \n \
		sum2    = sum2 + acc2.r + acc2.g + acc2.b + acc2.a; \n",
			i-2*j-1 , i-2*j , i+2*j  , i+2*j+1, 
			i-2*j-1 , i-2*j , i+2*j  , i+2*j+1,
			2*j+1,    2*j ,   2*j ,   2*j+1);
	  }
      strcat(fragmentSource,loopSource);
    } 
	strcat(fragmentSource,"\n \
		OUT.col = float4(sum1, 0.5 + 0.5 * sum2 ,1.0,1.0);                         \n \
	    return OUT;                                                                \n \
	} \n");
コード例 #11
0
ファイル: roundleft.cpp プロジェクト: jae686/nctoolkit
float4 test(float2 p, float2 d, float range){
	if(p.x > range || p.y > range) return float4(1.0,1.0,1.0,1.0);
	return tex2D(gphTexture0,(p*0.5)/float2(range,range));
}
コード例 #12
0
	void loadTexture(std::string fileName, VkFormat format, bool forceLinearTiling)
	{
#if defined(__ANDROID__)
		// Textures are stored inside the apk on Android (compressed)
		// So they need to be loaded via the asset manager
		AAsset* asset = AAssetManager_open(androidApp->activity->assetManager, fileName.c_str(), AASSET_MODE_STREAMING);
		assert(asset);
		size_t size = AAsset_getLength(asset);
		assert(size > 0);

		void *textureData = malloc(size);
		AAsset_read(asset, textureData, size);
		AAsset_close(asset);

		gli::texture2d tex2D(gli::load((const char*)textureData, size));
#else
		gli::texture2d tex2D(gli::load(fileName));
#endif

		assert(!tex2D.empty());

		VkFormatProperties formatProperties;

		texture.width = static_cast<uint32_t>(tex2D[0].extent().x);
		texture.height = static_cast<uint32_t>(tex2D[0].extent().y);

		// calculate num of mip maps
		// numLevels = 1 + floor(log2(max(w, h, d)))
		// Calculated as log2(max(width, height, depth))c + 1 (see specs)
		texture.mipLevels = floor(log2(std::max(texture.width, texture.height))) + 1;

		// Get device properites for the requested texture format
		vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &formatProperties);

		// Mip-chain generation requires support for blit source and destination
		assert(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT);
		assert(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT);

		VkMemoryAllocateInfo memAllocInfo = vkTools::initializers::memoryAllocateInfo();
		VkMemoryRequirements memReqs = {};

		// Create a host-visible staging buffer that contains the raw image data
		VkBuffer stagingBuffer;
		VkDeviceMemory stagingMemory;

		VkBufferCreateInfo bufferCreateInfo = vkTools::initializers::bufferCreateInfo();
		bufferCreateInfo.size = tex2D.size();
		// This buffer is used as a transfer source for the buffer copy
		bufferCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
		bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;		
		VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, nullptr, &stagingBuffer));
		vkGetBufferMemoryRequirements(device, stagingBuffer, &memReqs);
		memAllocInfo.allocationSize = memReqs.size;
		memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
		VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &stagingMemory));
		VK_CHECK_RESULT(vkBindBufferMemory(device, stagingBuffer, stagingMemory, 0));

		// Copy texture data into staging buffer
		uint8_t *data;
		VK_CHECK_RESULT(vkMapMemory(device, stagingMemory, 0, memReqs.size, 0, (void **)&data));
		memcpy(data, tex2D.data(), tex2D.size());
		vkUnmapMemory(device, stagingMemory);

		// Create optimal tiled target image
		VkImageCreateInfo imageCreateInfo = vkTools::initializers::imageCreateInfo();
		imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
		imageCreateInfo.format = format;
		imageCreateInfo.mipLevels = texture.mipLevels;
		imageCreateInfo.arrayLayers = 1;
		imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
		imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
		imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT;
		imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
		imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
		imageCreateInfo.extent = { texture.width, texture.height, 1 };
		imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
		VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, nullptr, &texture.image));
		vkGetImageMemoryRequirements(device, texture.image, &memReqs);
		memAllocInfo.allocationSize = memReqs.size;
		memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
		VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &texture.deviceMemory));
		VK_CHECK_RESULT(vkBindImageMemory(device, texture.image, texture.deviceMemory, 0));

		VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);

		VkImageSubresourceRange subresourceRange = {};
		subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
		subresourceRange.levelCount = 1;
		subresourceRange.layerCount = 1;

		// Optimal image will be used as destination for the copy, so we must transfer from our initial undefined image layout to the transfer destination layout
		vkTools::setImageLayout(copyCmd, texture.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresourceRange);

		// Copy the first mip of the chain, remaining mips will be generated
		VkBufferImageCopy bufferCopyRegion = {};
		bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
		bufferCopyRegion.imageSubresource.mipLevel = 0;
		bufferCopyRegion.imageSubresource.baseArrayLayer = 0;
		bufferCopyRegion.imageSubresource.layerCount = 1;
		bufferCopyRegion.imageExtent.width = texture.width;
		bufferCopyRegion.imageExtent.height = texture.height;
		bufferCopyRegion.imageExtent.depth = 1;

		vkCmdCopyBufferToImage(copyCmd, stagingBuffer, texture.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion);

		// Transition first mip level to transfer source for read during blit
		texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
		vkTools::setImageLayout(
			copyCmd,
			texture.image,
			VK_IMAGE_ASPECT_COLOR_BIT,
			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
			VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
			subresourceRange);

		VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true);

		// Clean up staging resources
		vkFreeMemory(device, stagingMemory, nullptr);
		vkDestroyBuffer(device, stagingBuffer, nullptr);

		// Generate the mip chain
		// ---------------------------------------------------------------
		// We copy down the whole mip chain doing a blit from mip-1 to mip
		// An alternative way would be to always blit from the first mip level and sample that one down
		VkCommandBuffer blitCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);

		// Copy down mips from n-1 to n
		for (int32_t i = 1; i < texture.mipLevels; i++)
		{
			VkImageBlit imageBlit{};				

			// Source
			imageBlit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
			imageBlit.srcSubresource.layerCount = 1;
			imageBlit.srcSubresource.mipLevel = i-1;
			imageBlit.srcOffsets[1].x = int32_t(texture.width >> (i - 1));
			imageBlit.srcOffsets[1].y = int32_t(texture.height >> (i - 1));
			imageBlit.srcOffsets[1].z = 1;

			// Destination
			imageBlit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
			imageBlit.dstSubresource.layerCount = 1;
			imageBlit.dstSubresource.mipLevel = i;
			imageBlit.dstOffsets[1].x = int32_t(texture.width >> i);
			imageBlit.dstOffsets[1].y = int32_t(texture.height >> i);
			imageBlit.dstOffsets[1].z = 1;

			VkImageSubresourceRange mipSubRange = {};
			mipSubRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
			mipSubRange.baseMipLevel = i;
			mipSubRange.levelCount = 1;
			mipSubRange.layerCount = 1;

			// Transiton current mip level to transfer dest
			vkTools::setImageLayout(
				blitCmd,
				texture.image,
				VK_IMAGE_ASPECT_COLOR_BIT,
				VK_IMAGE_LAYOUT_UNDEFINED,
				VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
				mipSubRange,
				VK_PIPELINE_STAGE_TRANSFER_BIT,
				VK_PIPELINE_STAGE_HOST_BIT);

			// Blit from previous level
			vkCmdBlitImage(
				blitCmd,
				texture.image,
				VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
				texture.image,
				VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
				1,
				&imageBlit,
				VK_FILTER_LINEAR);

			// Transiton current mip level to transfer source for read in next iteration
			vkTools::setImageLayout(
				blitCmd,
				texture.image,
				VK_IMAGE_ASPECT_COLOR_BIT,
				VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
				VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
				mipSubRange,
				VK_PIPELINE_STAGE_HOST_BIT,
				VK_PIPELINE_STAGE_TRANSFER_BIT);
		}

		// After the loop, all mip layers are in TRANSFER_SRC layout, so transition all to SHADER_READ
		subresourceRange.levelCount = texture.mipLevels;
		vkTools::setImageLayout(
			blitCmd,
			texture.image,
			VK_IMAGE_ASPECT_COLOR_BIT,
			VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
			texture.imageLayout,
			subresourceRange);

		VulkanExampleBase::flushCommandBuffer(blitCmd, queue, true);
		// ---------------------------------------------------------------

		// Create samplers
		samplers.resize(3);
		VkSamplerCreateInfo sampler = vkTools::initializers::samplerCreateInfo();
		sampler.magFilter = VK_FILTER_LINEAR;
		sampler.minFilter = VK_FILTER_LINEAR;
		sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
		sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT;
		sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT;
		sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT;
		sampler.mipLodBias = 0.0f;
		sampler.compareOp = VK_COMPARE_OP_NEVER;
		sampler.minLod = 0.0f;
		sampler.maxLod = 0.0f;
		sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
		sampler.maxAnisotropy = 1.0;
		sampler.anisotropyEnable = VK_FALSE;

		// Without mip mapping
		VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &samplers[0]));

		// With mip mapping
		sampler.maxLod = (float)texture.mipLevels;
		VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &samplers[1]));

		// With mip mapping and anisotropic filtering
		if (vulkanDevice->features.samplerAnisotropy)
		{
			sampler.maxAnisotropy = vulkanDevice->properties.limits.maxSamplerAnisotropy;
			sampler.anisotropyEnable = VK_TRUE;
		}
		VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &samplers[2]));

		// Create image view
		VkImageViewCreateInfo view = vkTools::initializers::imageViewCreateInfo();
		view.image = texture.image;
		view.viewType = VK_IMAGE_VIEW_TYPE_2D;
		view.format = format;
		view.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
		view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
		view.subresourceRange.baseMipLevel = 0;
		view.subresourceRange.baseArrayLayer = 0;
		view.subresourceRange.layerCount = 1;
		view.subresourceRange.levelCount = texture.mipLevels;
		VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &texture.view));
	}
コード例 #13
0
ファイル: main.cpp プロジェクト: AsGreyWolf/Vulkan
	void loadTexture(const char* fileName, VkFormat format, bool forceLinearTiling)
	{
		VkFormatProperties formatProperties;
		VkResult err;

		AAsset* asset = AAssetManager_open(app->activity->assetManager, fileName, AASSET_MODE_STREAMING);
		assert(asset);
		size_t size = AAsset_getLength(asset);
		assert(size > 0);

		void *textureData = malloc(size);
		AAsset_read(asset, textureData, size);
		AAsset_close(asset);

		gli::texture2D tex2D(gli::load((const char*)textureData, size));
		assert(!tex2D.empty());

		texture.width = tex2D[0].dimensions().x;
		texture.height = tex2D[0].dimensions().y;
		texture.mipLevels = tex2D.levels();

		// Get device properites for the requested texture format
		vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &formatProperties);

		// Only use linear tiling if requested (and supported by the device)
		// Support for linear tiling is mostly limited, so prefer to use
		// optimal tiling instead
		// On most implementations linear tiling will only support a very
		// limited amount of formats and features (mip maps, cubemaps, arrays, etc.)
		VkBool32 useStaging = true;

		// Only use linear tiling if forced
		if (forceLinearTiling)
		{
			// Don't use linear if format is not supported for (linear) shader sampling
			useStaging = !(formatProperties.linearTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
		}

		VkImageCreateInfo imageCreateInfo = vkTools::initializers::imageCreateInfo();
		imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
		imageCreateInfo.format = format;
		imageCreateInfo.mipLevels = 1;
		imageCreateInfo.arrayLayers = 1;
		imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
		imageCreateInfo.tiling = VK_IMAGE_TILING_LINEAR;
		imageCreateInfo.usage = (useStaging) ? VK_IMAGE_USAGE_TRANSFER_SRC_BIT : VK_IMAGE_USAGE_SAMPLED_BIT;
		imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
		imageCreateInfo.flags = 0;
		imageCreateInfo.extent = { texture.width, texture.height, 1 };

		VkMemoryAllocateInfo memAllocInfo = vkTools::initializers::memoryAllocateInfo();
		VkMemoryRequirements memReqs;

		startSetupCommandBuffer();

		if (useStaging)
		{
			// Load all available mip levels into linear textures
			// and copy to optimal tiling target
			struct MipLevel {
				VkImage image;
				VkDeviceMemory memory;
			};
			std::vector<MipLevel> mipLevels;
			mipLevels.resize(texture.mipLevels);

			// Copy mip levels
			for (uint32_t level = 0; level < texture.mipLevels; ++level)
			{
				imageCreateInfo.extent.width = tex2D[level].dimensions().x;
				imageCreateInfo.extent.height = tex2D[level].dimensions().y;
				imageCreateInfo.extent.depth = 1;

				err = vkCreateImage(device, &imageCreateInfo, nullptr, &mipLevels[level].image);
				assert(!err);

				vkGetImageMemoryRequirements(device, mipLevels[level].image, &memReqs);
				memAllocInfo.allocationSize = memReqs.size;
				getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &memAllocInfo.memoryTypeIndex);
				err = vkAllocateMemory(device, &memAllocInfo, nullptr, &mipLevels[level].memory);
				assert(!err);
				err = vkBindImageMemory(device, mipLevels[level].image, mipLevels[level].memory, 0);
				assert(!err);

				VkImageSubresource subRes = {};
				subRes.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;

				VkSubresourceLayout subResLayout;
				void *data;

				vkGetImageSubresourceLayout(device, mipLevels[level].image, &subRes, &subResLayout);
				assert(!err);
				err = vkMapMemory(device, mipLevels[level].memory, 0, memReqs.size, 0, &data);
				assert(!err);
				size_t levelSize = tex2D[level].size();
				memcpy(data, tex2D[level].data(), levelSize);
				vkUnmapMemory(device, mipLevels[level].memory);

				LOGW("setImageLayout %d", 1);

				// Image barrier for linear image (base)
				// Linear image will be used as a source for the copy
				vkTools::setImageLayout(
					setupCmdBuffer,
					mipLevels[level].image,
					VK_IMAGE_ASPECT_COLOR_BIT,
					VK_IMAGE_LAYOUT_UNDEFINED,
					VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
			}

			// Setup texture as blit target with optimal tiling
			imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
			imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
			imageCreateInfo.mipLevels = texture.mipLevels;
			imageCreateInfo.extent = { texture.width, texture.height, 1 };

			err = vkCreateImage(device, &imageCreateInfo, nullptr, &texture.image);
			assert(!err);

			vkGetImageMemoryRequirements(device, texture.image, &memReqs);

			memAllocInfo.allocationSize = memReqs.size;

			getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &memAllocInfo.memoryTypeIndex);
			err = vkAllocateMemory(device, &memAllocInfo, nullptr, &texture.deviceMemory);
			assert(!err);
			err = vkBindImageMemory(device, texture.image, texture.deviceMemory, 0);
			assert(!err);

			// Image barrier for optimal image (target)
			// Optimal image will be used as destination for the copy
			vkTools::setImageLayout(
				setupCmdBuffer,
				texture.image,
				VK_IMAGE_ASPECT_COLOR_BIT,
				VK_IMAGE_LAYOUT_UNDEFINED,
				VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);

			// Copy mip levels one by one
			for (uint32_t level = 0; level < texture.mipLevels; ++level)
			{
				// Copy region for image blit
				VkImageCopy copyRegion = {};

				copyRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
				copyRegion.srcSubresource.baseArrayLayer = 0;
				copyRegion.srcSubresource.mipLevel = 0;
				copyRegion.srcSubresource.layerCount = 1;
				copyRegion.srcOffset = { 0, 0, 0 };

				copyRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
				copyRegion.dstSubresource.baseArrayLayer = 0;
				// Set mip level to copy the linear image to
				copyRegion.dstSubresource.mipLevel = level;
				copyRegion.dstSubresource.layerCount = 1;
				copyRegion.dstOffset = { 0, 0, 0 };

				copyRegion.extent.width = tex2D[level].dimensions().x;
				copyRegion.extent.height = tex2D[level].dimensions().y;
				copyRegion.extent.depth = 1;

				// Put image copy into command buffer
				vkCmdCopyImage(
					setupCmdBuffer,
					mipLevels[level].image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
					texture.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
					1, &copyRegion);

				// Change texture image layout to shader read after the copy
				texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
				vkTools::setImageLayout(
					setupCmdBuffer,
					texture.image,
					VK_IMAGE_ASPECT_COLOR_BIT,
					VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
					texture.imageLayout);
			}

			// Clean up linear images 
			// No longer required after mip levels
			// have been transformed over to optimal tiling
			for (auto& level : mipLevels)
			{
				vkDestroyImage(device, level.image, nullptr);
				vkFreeMemory(device, level.memory, nullptr);
			}
		}
		else
		{
			// Prefer using optimal tiling, as linear tiling 
			// may support only a small set of features 
			// depending on implementation (e.g. no mip maps, only one layer, etc.)

			VkImage mappableImage;
			VkDeviceMemory mappableMemory;

			// Load mip map level 0 to linear tiling image
			err = vkCreateImage(device, &imageCreateInfo, nullptr, &mappableImage);
			assert(!err);

			// Get memory requirements for this image 
			// like size and alignment
			vkGetImageMemoryRequirements(device, mappableImage, &memReqs);
			// Set memory allocation size to required memory size
			memAllocInfo.allocationSize = memReqs.size;

			// Get memory type that can be mapped to host memory
			getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &memAllocInfo.memoryTypeIndex);

			// Allocate host memory
			err = vkAllocateMemory(device, &memAllocInfo, nullptr, &mappableMemory);
			assert(!err);

			// Bind allocated image for use
			err = vkBindImageMemory(device, mappableImage, mappableMemory, 0);
			assert(!err);

			// Get sub resource layout
			// Mip map count, array layer, etc.
			VkImageSubresource subRes = {};
			subRes.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;

			VkSubresourceLayout subResLayout;
			void *data;

			// Get sub resources layout 
			// Includes row pitch, size offsets, etc.
			vkGetImageSubresourceLayout(device, mappableImage, &subRes, &subResLayout);
			assert(!err);

			// Map image memory
			err = vkMapMemory(device, mappableMemory, 0, memReqs.size, 0, &data);
			assert(!err);

			// Copy image data into memory
			memcpy(data, tex2D[subRes.mipLevel].data(), tex2D[subRes.mipLevel].size());

			vkUnmapMemory(device, mappableMemory);

			// Linear tiled images don't need to be staged
			// and can be directly used as textures
			texture.image = mappableImage;
			texture.deviceMemory = mappableMemory;
			texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

			// Setup image memory barrier
			vkTools::setImageLayout(
				setupCmdBuffer,
				texture.image,
				VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
				texture.imageLayout);
		}

		flushSetupCommandBuffer();

		// Create sampler
		// In Vulkan textures are accessed by samplers
		// This separates all the sampling information from the 
		// texture data
		// This means you could have multiple sampler objects
		// for the same texture with different settings
		// Similar to the samplers available with OpenGL 3.3
		VkSamplerCreateInfo sampler = vkTools::initializers::samplerCreateInfo();
		sampler.magFilter = VK_FILTER_LINEAR;
		sampler.minFilter = VK_FILTER_LINEAR;
		sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
		sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
		sampler.addressModeV = sampler.addressModeU;
		sampler.addressModeW = sampler.addressModeU;
		sampler.mipLodBias = 0.0f;
		sampler.compareOp = VK_COMPARE_OP_NEVER;
		sampler.minLod = 0.0f;
		// Max level-of-detail should match mip level count
		sampler.maxLod = (useStaging) ? (float)texture.mipLevels : 0.0f;
		// Enable anisotropic filtering
		sampler.maxAnisotropy = 8;
		sampler.anisotropyEnable = VK_TRUE;
		sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
		err = vkCreateSampler(device, &sampler, nullptr, &texture.sampler);
		assert(!err);

		// Create image view
		// Textures are not directly accessed by the shaders and
		// are abstracted by image views containing additional
		// information and sub resource ranges
		VkImageViewCreateInfo view = vkTools::initializers::imageViewCreateInfo();
		view.image = VK_NULL_HANDLE;
		view.viewType = VK_IMAGE_VIEW_TYPE_2D;
		view.format = format;
		view.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
		view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
		view.subresourceRange.baseMipLevel = 0;
		view.subresourceRange.baseArrayLayer = 0;
		view.subresourceRange.layerCount = 1;
		// Linear tiling usually won't support mip maps
		// Only set mip map count if optimal tiling is used
		view.subresourceRange.levelCount = (useStaging) ? texture.mipLevels : 1;
		view.image = texture.image;
		err = vkCreateImageView(device, &view, nullptr, &texture.view);
		assert(!err);
	}
コード例 #14
0
 PS_OUTPUT ps_main(PS_INPUT input ) {                    \n\
     PS_OUTPUT output;                                   \n\
     float4 texColor = tex2D(Tex0, input.TexCoord);      \n\
     output.Color = Color * texColor;			        \n\
     return output;                                      \n\
 }                                                       \n";
コード例 #15
0
  void FFAST4(thread_info<CPU> ti,
                 kernel_image2d<i_float4> frame_color,
                 kernel_image2d<V> frame_s1,
                 kernel_image2d<V> frame_s2,
                 // kernel_image2d<dffast4> out,
                 kernel_image2d<i_float1> pertinence,
                 float grad_thresh)
  {

    point2d<int> p = thread_pos2d(ti);
    if (!frame_s1.has(p))//; || track(p).x == 0)
      return;


    if (p.row() < 6 || p.row() >= pertinence.domain().nrows() - 6 ||
        p.col() < 6 || p.col() >= pertinence.domain().ncols() - 6)
    {
      pertinence(p).x = 0.f;
      return;
    }

    gl01f pv;

    {
      float min_diff = 9999999.f;
      float max_single_diff = 0.f;
      pv = V(tex2D(flag<CPU>(), s1_tex, frame_s1, p));
      int sign = 0;
      for(int i = 0; i < 8; i++)
      {

        gl01f v1 = V(tex2D(flag<CPU>(), s1_tex, frame_s1,
			   p.col() + circle_r3_h[i][1],
			   p.row() + circle_r3_h[i][0]));

        gl01f v2 = V(tex2D(flag<CPU>(), s1_tex, frame_s1,
			   p.col() + circle_r3_h[(i+8)][1],
			   p.row() + circle_r3_h[(i+8)][0]));

        {
          float diff = pv -
            (v1 + v2) / 2.f;

          float adiff = fabs(diff);

          if (adiff < min_diff)
	  {
            min_diff = adiff;
            if (min_diff < 0.001f) break;
	  }



          if (max_single_diff < adiff) max_single_diff = adiff;

        }
      }

      pv = V(tex2D(flag<CPU>(), s2_tex, frame_s2, p.col()/2, p.row()/2));
      float min_diff_large = 9999999.f;
      float max_single_diff_large = 0.f;
      //int min_orientation_large;
      for(int i = 0; i < 8; i++)
      {

        gl01f v1 = V(tex2D(flag<CPU>(), s2_tex, frame_s2,
                         p.col()/2 + circle_r3_h[i][1],
			   p.row()/2 + circle_r3_h[i][0]));

        gl01f v2 = V(tex2D(flag<CPU>(), s2_tex, frame_s2,
                         p.col()/2 + circle_r3_h[(i+8)][1],
                         p.row()/2 + circle_r3_h[(i+8)][0]));

        {
          float diff = pv - (v1 + v2) / 2.f;
          float adiff = fabs(diff);

          if (adiff < min_diff_large)
          {
            min_diff_large = adiff;
            if (min_diff_large < 0.001f) break;
          }

          if (max_single_diff_large < adiff) max_single_diff_large = adiff;
        }

      }


      if (min_diff < min_diff_large)
      {
        min_diff = min_diff_large;
        max_single_diff = max_single_diff_large;
      }


      if (max_single_diff >= grad_thresh)
      {
        min_diff = min_diff / max_single_diff;
      }
      else
        min_diff = 0;

       pertinence(p) = min_diff;
      // pertinence(p) = p.col() / float(frame_s1.ncols());
      // pertinence(p) = float(frame_s1(p)) / 255.f;
      // out(p) = distances;

    }

  }
コード例 #16
0
ファイル: f_blurpass2.cpp プロジェクト: armak/Shader_Minifier
void main (void) 
{

vec2 MotionBlurPos=vec2 (0.0,0.0);



//float ff=tex2D (texture2d_1,oUV.xy).x;
//ff=abs (ff-cl.x);
//ff=0.5+0.5*sin(ff*10000.0);
//cl=ff*vec4 (1.0);



/*


float PI=3.1415926535;
float2 UV=oUV.xy;

UV-=float2 (0.5,0.5);
UV*=2.0;


float3 DirV =float3 (UV.x,UV.y,0.6);
float lgn=length (DirV);
DirV/=lgn;

UV.xy=UV.xy+(DirV.xy-UV.xy)*-0.0;
UV*=0.5;
UV+=float2 (0.5,0.5);
*/
	float2 UV=oUV.xy;
float4 cl=tex2D (texture2d_1,UV.xy);

cl=float4 (0.0);
float3 Direction=tex2D (texture2d_velocities,UV.xy).xyz;
//Direction.xy=float2 (0.5);
Direction.xyz-=float3 (0.5);
Direction*=2.0;

float alpha=1.0*length (Direction.xyz);
alpha=clamp (alpha,0.0,1.0);
//alpha=1-alpha;
//alpha*=alpha;
//alpha=1-alpha;

//alpha*=alpha;
//alpha=0.1;
Direction*=0.01;
if (alpha>0.01)
{
	for (int t=0;t<10;t++) { 
UV.xy-=Direction.xy;
	cl+=tex2D (texture2d_1,UV.xy);
};
cl*=1.0/10.0;
//cl.x+=0.15*alpha;
//=float4 (1.0,0.0,0.0,1.0)*alpha;
}
else
cl=tex2D (texture2d_1,UV.xy);

float4 clnow=tex2D( texture2d_1,oUV.xy);

cl=cl+(clnow-cl)*0.5;

UV=oUV.xy;






// blur effect
float lgn2=length (float2 (1.0,2.0)*(UV.xy-float2 (0.5)));
lgn2*=lgn2;
float xxx=1.0+(gl_Color.x)*5.0;
//xxx=1.0;
xxx=clamp (xxx,1.0,100.0);
//pow(lgn2,1.8);
int count=0;
{
	for (int y=-3;y<3;y++) {
	for (int x=-3;x<3;x++){
count++;

float2 dd=float2 (x,y);
if ( (x)==(y)) {
	//dd*=2.0;
}

float2 Dist=float2 (x,y)*0.0025*lgn2;
Dist=dd*0.0018*lgn2*xxx;

Dist+=MotionBlurPos;
MotionBlurPos+=MotionBlur.xy*0.01;

cl+=tex2D(texture2d_1,UV.xy+Dist);

}
}
cl/=float (count)*(lgn2+(1.0-lgn2)*0.85);
}

vec4 cll=cl*1.03;
cll+=gl_Color;
//cll=tex2D (texture2d_1,oUV.xy);



/*



 
//Depth blurring

float dpth=0.0;
 count=0;
 float Limit2=1.0-Limit;
for (int y=-4;y<5;y++) {
	for (int x=-4;x<5;x++){
count++;
float2 dd=float2 (x,y)*0.7*Limit2;

//dd=ray.xy*5.0;

float2 Dist=dd*0.0018;
dpth+=tex2D(texture2d_2,UV.xy+Dist).x;
}
}
dpth/=float (count);
//dpth=sin(dpth*3.14159);
//dpth+=0.1;
dpth=pow(dpth,126.0);
//dpth+=Limit;
dpth=1.0-1.0*dpth;
//if (dpth<0.7) dpth=0.0;
count=0;
cl=vec4(0.0);
dpth=clamp(dpth,0.0,1.0);

dpth=Limit+((1.0-Limit)-(Limit))*dpth;
dpth*=Flo;
//dpth=1.0;

float vel=tex2D(texture2d_velocities,UV.xy).x;
if (dpth>0.02)
{ 
for (int y=-4;y<5;y++) {
	for (int x=-4;x<5;x++){
count++;

float2 dd=float2 (x,y)*0.7;//;*vec2 (3.0,3.0);

//float2 dd2=float((x+4)*9+(y+4))*float2 (-1.0,1.0)*0.4;
//dd=dd+(dd2-dd)*-vel*0.4;


float2 Dist=dd*0.0018*dpth;
cl+=tex2D(texture2d_1,UV.xy+Dist);
//cl.x=dpth;
	}
}

cl/=float (count);
}
else
cl=tex2D(texture2d_1,UV.xy);



if (vel>0.01) { 
vec4 newcl=vec4 (0.0);
for (int x=0;x<15;x++){
float2 dd2=float(x)*float2 (-1.0,1.0)*0.003*vel;
newcl+=tex2D(texture2d_1,UV.xy+dd2);
}
newcl*=1.0/15.0;

cl=cl+(newcl-cl)*0.5;
}



//cl*=0.4;

//cl+=tex2D(texture2d_velocities,UV.xy)*0.5;


//cl=tex2D(texture2d_1,UV.xy);
//cl=vec4 (dpth);

//cl=vec4 (tex2D(texture2d_2,UV.xy).x);




*/

cl=cll;



gl_FragData[0]=cl;
	gl_FragData[1]=cl;

}
コード例 #17
0
ファイル: CgShaderCelScreen.cpp プロジェクト: ttvd/cg-shaders
	//--
	CgShaderCelScreen::CgShaderCelScreen(CGcontext cgContext) : 
		CgShader(cgContext, CgShader::VertexFragment)
	{
		// [rad] This is vertex and fragment shader
	
		// [rad] Setup vertex shader:
		
		// [rad] Setup vertex shader entry point
		SetEntry(CgShader::Vertex, "main");

		static const std::string sProgramVertex = "							\
																			\
		void main(	float4 inPosition			: POSITION,					\
					float2 inTexCoord			: TEXCOORD0,				\
																			\
					out float4 outPosition		: POSITION,					\
					out float2 outTexCoord		: TEXCOORD0,				\
																			\
					uniform float4x4 uniModelViewProjMat)					\
		{																	\
			outPosition = mul(uniModelViewProjMat, inPosition);				\
			outTexCoord = inTexCoord;										\
		}																	";
		
		// [rad] Setup vertex shader program
		SetProgram(CgShader::Vertex, sProgramVertex);
		
		// [rad] Setup fragment shader:
		
		// [rad] Setup fragment shader entry point
		SetEntry(CgShader::Fragment, "main");
		
		static const std::string sProgramFragment = "							\
																				\
		void main(	float2 inTexCoord : TEXCOORD0,								\
																				\
					out float4 outColor : COLOR,								\
																				\
					uniform int2 uniTexSize,									\
					uniform sampler2D uniTexCel,								\
					uniform sampler2D uniTexOutline)							\
		{																		\
			float4 cel = tex2D(uniTexCel, inTexCoord);							\
			float4 outline = tex2D(uniTexOutline, inTexCoord);					\
																				\
			if(outline.x == 0)													\
			{																	\
				outColor = float4(0, 0, 0, 1);									\
			}																	\
			else																\
			{																	\
				outColor = cel;													\
			}																	\
		}																		";
		
		// [rad] Setup fragment shader program
		SetProgram(CgShader::Fragment, sProgramFragment);
		
		// [rad] Create shaders (both fragment and vertex)
		Create();
		
		// [rad] Set params 
		m_cgParamModelViewProjMatrix = cgGetNamedParameter(m_cgShaderVertex, "uniModelViewProjMat");	
		
		m_cgParamSamplerCel = cgGetNamedParameter(m_cgShaderFragment, "uniTexCel");	
		m_cgParamSamplerOutline = cgGetNamedParameter(m_cgShaderFragment, "uniTexOutline");
		
		m_cgParamSamplerSize = cgGetNamedParameter(m_cgShaderFragment, "uniTexSize");
	}
コード例 #18
0
  __device__ void FFAST4(thread_info<GPU> ti,
                            kernel_image2d<i_float4> frame_color,
                            kernel_image2d<V> frame_s1,
                            kernel_image2d<V> frame_s2,
                            kernel_image2d<i_float1> pertinence,
                            float grad_thresh)
  {
    point2d<int> p = thread_pos2d(ti);
    if (!frame_s1.has(p))//; || track(p).x == 0)
      return;


    if (p.row() < 6 || p.row() >= pertinence.domain().nrows() - 6 ||
        p.col() < 6 || p.col() >= pertinence.domain().ncols() - 6)
    {
      pertinence(p).x = 0.f;
      return;
    }

    float pv;

    {
      float min_diff = 9999999.f;
      float max_single_diff = 0.f;
      pv = tex2D(flag<GPU>(), s1_tex, frame_s1, p).x;
      int sign = 0;
      for(int i = 0; i < 8; i++)
      {

        gl01f v1 = tex2D(flag<GPU>(), s1_tex, frame_s1,
			   p.col() + circle_r3[i][1],
			   p.row() + circle_r3[i][0]).x;

        gl01f v2 = tex2D(flag<GPU>(), s1_tex, frame_s1,
			   p.col() + circle_r3[(i+8)][1],
			   p.row() + circle_r3[(i+8)][0]).x;

        {
          float diff = pv -
            (v1 + v2) / 2.f;


          float adiff = fabs(diff);

          if (adiff < min_diff)
	  {
            min_diff = adiff;

	    if (min_diff < 0.01)
	    {
	      min_diff = 0;
	      break;
	    }
	  }
          if (max_single_diff < adiff) max_single_diff = adiff;
        }
      }

      pv = tex2D(flag<GPU>(), s2_tex, frame_s2, p.col()/2, p.row()/2).x;
      float min_diff_large = 9999999.f;
      float max_single_diff_large = 0.f;
      //int min_orientation_large;
      for(int i = 0; i < 8; i++)
      {

        gl01f v1 = tex2D(flag<GPU>(), s2_tex, frame_s2,
			   p.col()/2 + circle_r3[i][1],
			   p.row()/2 + circle_r3[i][0]);

        gl01f v2 = tex2D(flag<GPU>(), s2_tex, frame_s2,
			   p.col()/2 + circle_r3[(i+8)][1],
			   p.row()/2 + circle_r3[(i+8)][0]);

        {
          float diff = pv - (v1 + v2) / 2.f;
          float adiff = fabs(diff);

          if (adiff < min_diff_large)
          {
            min_diff_large = adiff;
	    if (min_diff_large < 0.01)
	    {
	      min_diff_large = 0;
	      break;
	    }
          }

          if (max_single_diff_large < adiff) max_single_diff_large = adiff;
        }

      }


      if (min_diff < min_diff_large)
      {
        min_diff = min_diff_large;
        max_single_diff = max_single_diff_large;
      }


      if (max_single_diff >= grad_thresh)
      {
        min_diff = min_diff / max_single_diff;
      }
      else
        min_diff = 0;

      pertinence(p) = min_diff;

    }

  }