Beispiel #1
0
__kernel void ieee_ldpc_decode_init(const int ldpc_n, const int ldpc_m, __global float * r0, __global float *r1, __global float *q0, __global float * q1, __global float * prior_p0, __global float * prior_p1,__global char * matrix_n,__global char * matrix_h){
	size_t r=get_global_id(0);//ldpc_m
	size_t c=get_global_id(1);//ldpc_n
	if(matrix_n[c]==0){
		if(r<ldpc_m){
			r0[r*ldpc_n+c]=0;
			r1[r*ldpc_n+c]=0;
			q0[r*ldpc_n+c]=1.0;
			q1[r*ldpc_n+c]=0;
		}else if(r==ldpc_m){
			prior_p0[c]=1.0;
			prior_p1[c]=0;
		}
	}else if(matrix_n[c]==1){
		if(r<ldpc_m){
			r0[r*ldpc_n+c]=0;
			r1[r*ldpc_n+c]=0;
			q0[r*ldpc_n+c]=0;
			q1[r*ldpc_n+c]=1.0;
		}else if(r==ldpc_m){
			prior_p0[c]=0;
			prior_p1[c]=1.0;
		}
	}else if(matrix_n[c]==-1){
		if(r<ldpc_m){
			r0[r*ldpc_n+c]=0;
			r1[r*ldpc_n+c]=0;
			q0[r*ldpc_n+c]=0.5;
			q1[r*ldpc_n+c]=0.5;
		}else if(r==ldpc_m){
			prior_p0[c]=0.5;
			prior_p1[c]=0.5;
		}
	}
}
 __kernel void calculate_fractal(__global float*    config,
                                 __global unsigned short* output) {
   unsigned offset = (unsigned) config[7];
   unsigned rows   = (unsigned) config[8];
   unsigned x = get_global_id(0);
   unsigned y = get_global_id(1);
   if (y >= offset && y < (offset + rows)) {
     unsigned iterations = config[0];
     unsigned width = config[1];
     unsigned height = config[2];
     float min_re = config[3];
     float max_re = config[4];
     float min_im = config[5];
     float max_im = config[6];
     float re_factor = (max_re - min_re) / (width - 1);
     float im_factor = (max_im - min_im) / (height - 1);
     float z_re = min_re + x * re_factor;
     float z_im = max_im - y * im_factor;
     float const_re = z_re;
     float const_im = z_im;
     unsigned cnt = 0;
     float cond = 0;
     do {
       float tmp_re = z_re;
       float tmp_im = z_im;
       z_re = ( tmp_re * tmp_re - tmp_im * tmp_im ) + const_re;
       z_im = -1 * ( 2 * tmp_re * tmp_im ) + const_im;
       cond = z_re * z_re + z_im * z_im;
       ++cnt;
     } while (cnt < iterations && cond <= 4.0f);
     output[x + (y - offset) * width] = cnt;
   }
 }
Beispiel #3
0
void cl_initilize() {
	cl_int errcode;

	cl_platform_id platform[10];
	get_platforms(platform);

	cl_device_id devices[10];
	int platform_index = 0;
	get_devices(platform[platform_index], devices);

	int device_index = 0;
	show_device_info(devices[device_index]);

	context = clCreateContext(NULL, 1, &devices[device_index], NULL, NULL, &errcode);
	checkError(clCreateContext);

	queue = clCreateCommandQueue (context, devices[device_index], CL_QUEUE_PROFILING_ENABLE, &errcode); // третий параметр - свойства
	checkError(clCreateCommandQueue);

	char* source = "\n\
			  __kernel void sum(__global const uchar *src, __global uchar *trg, int m, int n)\n\
			  {\n\
			    int  i = get_global_id(0);\n\
			    int  j = get_global_id(1);\n\
 	            int SIdx = (i*n + (n-1 - j)) ;\n\
		    	int DIdx = (j*m + i) ;\n\
				if (i > m) return;\
				if (j > n) return;\
            	for (int c = 0; c < 3; c++)\n\
   		            trg[DIdx*3+c] = src[SIdx*3+c];\n\
			  }";
Beispiel #4
0
__kernel void Viz(
        __global real_t* x_pos,
        __global real_t* y_pos,
        __global real_t* z_pos,
        const __global int* n_atoms,
        __global real_t* x_cen,
        __global real_t* y_cen,
        __global real_t* z_cen, 
        __global float* vertices) 
{ 
  int i_atom = get_global_id(0);
  int ibox = get_global_id(1);

  int offset = N_MAX_ATOMS; 

  if (i_atom < n_atoms[ibox]) 
  {
    vertices[i_atom*3 + offset*ibox*3 + 0] = x_cen[ibox] + x_pos[i_atom + offset*ibox];
    vertices[i_atom*3 + offset*ibox*3 + 1] = y_cen[ibox] + y_pos[i_atom + offset*ibox];
    vertices[i_atom*3 + offset*ibox*3 + 2] = z_cen[ibox] + z_pos[i_atom + offset*ibox];
  }
  else
  {
    vertices[i_atom*3 + offset*ibox*3 + 0] = 0.0f;
    vertices[i_atom*3 + offset*ibox*3 + 1] = 0.0f;
    vertices[i_atom*3 + offset*ibox*3 + 2] = 0.0f;
  }
}
Beispiel #5
0
kernel void gaussian_filter (global uchar* image,
                             global uchar* filtered_image,
                             global char*  gaussian_kernel,
                             global int*   image_dims,
                                    short   weight)
{
   const int image_height = image_dims[0];
   const int image_width = image_dims[1];
    
   const int global_x = get_global_id(0);
   const int global_y = get_global_id(1);
   const int2 pixel_pos = { global_x, global_y };

   if (OutsideImage(pixel_pos, image_width, image_height))
      return;

   short sum = 0;
   int index = 0;
   int2 pos;

   /* 3x3 Convolution */
   for(int y= -1; y<=1; y++)
      for(int x=-1; x<=1; x++)
      {
         pos = pixel_pos + (int2)(x,y);
         sum += gaussian_kernel[index++] * image[pos.y * image_width + pos.x];
      }

   sum /= weight;

   filtered_image[global_y * img_width + global_x] = (uchar) sum; 
}
__kernel void train(__global const float *perceptrons, __global const short *images, __global float *updates )
{
	int imagesOffset = get_global_id(0)*IMAGE_SIZE*BATCH_ITEMS;
	int updatesOffset = get_global_id(0)*NO_PERCEPTRONS*IMAGE_SIZE;

	int perceptron;
	float sum;
	int offset;
	int batch;
	float activationOld[NO_PERCEPTRONS];
	float activation;

	for(batch=0;batch<BATCH_ITEMS;batch++)
	{
		for(perceptron=0;perceptron<NO_PERCEPTRONS;perceptron++)
		{
			sum=0;
			for(offset=0;offset<IMAGE_SIZE;offset++)
			{
				sum+=perceptrons[perceptron*IMAGE_SIZE+offset]*images[imagesOffset+batch*IMAGE_SIZE+offset];
			}
			sum/=IMAGE_SIZE;
			activation = sum-activationOld[perceptron];
			//activation=fabs(activation);
			activationOld[perceptron]=sum;
			if(batch==0) continue;
			for(offset=0;offset<IMAGE_SIZE;offset++)
			{
				updates[updatesOffset+perceptron*IMAGE_SIZE+offset]+=-images[imagesOffset+batch*IMAGE_SIZE+offset]*activation;
			}
		}
	}
}
Beispiel #7
0
void _Kernel_global_id2d(int* out)
{
   unsigned x = get_global_id(0);
   unsigned y = get_global_id(1);
   unsigned id = (y * get_global_size(1)) + x;
   out[id] = id;
}
Beispiel #8
0
kernel void checkResult(global int* src,global int* hCols,global int* hRowFirstPtr,global int* hRowNextPtr,const int ldpcM,const int ldpcN,const int nonZeros,global int* flags){
    int batchInd=get_global_id(0);
    int nodeInd=get_global_id(1);//nonZeros
    if(nodeInd<ldpcM){//nodeInd = row;
        //init the matrixM to 0;
        int result=0;
        int row=nodeInd;
        for(int nextPtr=hRowFirstPtr[row];nextPtr!=-1;nextPtr=hRowNextPtr[nextPtr]){
            //nextPtr is the node location;
            result+=src[batchInd*ldpcN+hCols[nextPtr]];
        }
        if(result%2!=0){
            atomic_inc(&flags[batchInd]);
            printf("!%d ",nodeInd);
        }
    }
    if(batchInd==0&&nodeInd==0){
        int i;
        printf("CheckResult: src=");
        for(i=0;i<100;++i){
            printf("%d ",src[i]);
        }
        printf("\n");
    }
}
Beispiel #9
0
__kernel void RemoveMean(__global float* Volumes, 
					     __private int DATA_W, 
						 __private int DATA_H, 
						 __private int DATA_D, 
						 __private int NUMBER_OF_VOLUMES)
{
	int x = get_global_id(0);
	int y = get_global_id(1);
	int z = get_global_id(2);

	if (x >= DATA_W || y >= DATA_H || z >= DATA_D)
		return;


	float mean = 0.0f;
	for (int v = 0; v < NUMBER_OF_VOLUMES; v++)
	{
		mean += Volumes[Calculate4DIndex(x,y,z,v,DATA_W,DATA_H,DATA_D)];
	}
	mean /= (float)NUMBER_OF_VOLUMES;
	
	// Calculate the residual
	for (int v = 0; v < NUMBER_OF_VOLUMES; v++)
	{
		Volumes[Calculate4DIndex(x,y,z,v,DATA_W,DATA_H,DATA_D)] -= mean;
	}

}
Beispiel #10
0
__kernel void ieee_ldpc_decode_iteration3(const int ldpc_n, const int ldpc_m , __global float * r0, __global float *r1, __global float *q0, __global float * q1, __global float * prior_p0, __global float * prior_p1,__global char * matrix_n,__global char * matrix_h,__global int *flag){
	int r=get_global_id(0);//ldpc_m
	int c=get_global_id(1);//ldpc_n
	int cc,rr;
	int tmp;

	if(r==0){
		if(matrix_n[c]==-1){
			*flag=0;
		}
	}
	if(c==0){
		int k;
		tmp=0;
		for(k=0;k<ldpc_n;k++){
			if(	matrix_h[r*ldpc_n+k]*matrix_n[k]==1){
				tmp++;
			}
		}
		//matrix_zero[r]=tmp;
		if(tmp%2!=0){
			*flag=0;
		}
	}
}
__kernel void CalculateTFCEValues(__global float* TFCE_Values,
								  __global const float* Mask,
	  	    					  __private float threshold,
								  __global const unsigned int* Cluster_Indices,
							      __global const unsigned int* Cluster_Sizes,
								  __private int DATA_W,
								  __private int DATA_H,
								  __private int DATA_D)
{
	int x = get_global_id(0);
	int y = get_global_id(1);
	int z = get_global_id(2);

	if (x >= DATA_W || y >= DATA_H || z >= DATA_D)
		return;

	if ( Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] != 1.0f )
		return;

	// Check if the current voxel belongs to a cluster
	if ( Cluster_Indices[Calculate3DIndex(x, y, z, DATA_W, DATA_H)] < (DATA_W * DATA_H * DATA_D) )
	{
		// Get extent of cluster for current voxel
		float clusterSize = (float)Cluster_Sizes[Cluster_Indices[Calculate3DIndex(x, y, z, DATA_W, DATA_H)]];
		float value = sqrt(clusterSize) * threshold * threshold;

		TFCE_Values[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] += value;
	}
}
__kernel void CalculateClusterMasses(__global unsigned int* Cluster_Indices,
						  	  	     volatile __global unsigned int* Cluster_Masses,
						  	  	     __global const float* Data,
						  	  	     __global const float* Mask,
						  	  	     __private float threshold,
									 __private int contrast,
						  	  	     __private int DATA_W,
						  	  	     __private int DATA_H,
						  	  	     __private int DATA_D)
{
	int x = get_global_id(0);
	int y = get_global_id(1);
	int z = get_global_id(2);

	if (x >= DATA_W || y >= DATA_H || z >= DATA_D)
		return;

	if ( Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] != 1.0f )
		return;

	// Threshold data
	if ( Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold )
	{
		// Increment mass for the current cluster index, done in an ugly way since atomic floats are not supported
		atomic_add( &Cluster_Masses[Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)]], (unsigned int)(Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] * 10000.0f) );
	}
}
Beispiel #13
0
__kernel void test(__global const float *perceptrons, __global const short *images, __global short *labels )
{
    int imagesOffset = get_global_id(0)*IMAGE_SIZE;
    int labelsOffset = get_global_id(0);
    float sum;
    int perceptron;
    int offset;
    float max=-FLT_MAX;
    int maxLabel;

    for(perceptron=0; perceptron<NO_CLASSES; perceptron++)
    {
        sum=0;
        for(offset=0; offset<IMAGE_SIZE; offset++)
        {
            sum+=perceptrons[perceptron*(IMAGE_SIZE+1)+offset]*images[imagesOffset+offset]/128.;
        }
        sum+=perceptrons[perceptron*(IMAGE_SIZE+1)+IMAGE_SIZE];
        if(sum>max)
        {
            max=sum;
            maxLabel=perceptron;
        }
    }
    labels[labelsOffset]=maxLabel;
}
__kernel void SetStartClusterIndicesKernel(__global unsigned int* Cluster_Indices,
										   __global const float* Data,
										   __global const float* Mask,
										   __private float threshold,
 									       __private int contrast,
										   __private int DATA_W,
										   __private int DATA_H,
										   __private int DATA_D)
{
	int x = get_global_id(0);
	int y = get_global_id(1);
	int z = get_global_id(2);

	if (x >= DATA_W || y >= DATA_H || z >= DATA_D)
		return;

	// Threshold data
	if ( (Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] == 1.0f) && (Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold) )
	{
		// Set an unique index
		Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] = (unsigned int)Calculate3DIndex(x,y,z,DATA_W,DATA_H);
	}
	else
	{
		// Make sure that all other voxels have a higher start index
		Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] = (unsigned int)(DATA_W * DATA_H * DATA_D * 3);
	}
}
__kernel void CalculateClusterSizes(__global unsigned int* Cluster_Indices,
						  	  	    volatile __global unsigned int* Cluster_Sizes,
						  	  	    __global const float* Data,
						  	  	    __global const float* Mask,
						  	  	    __private float threshold,	
									__private int contrast,
						  	  	    __private int DATA_W,
						  	  	    __private int DATA_H,
						  	  	    __private int DATA_D)
{
	int x = get_global_id(0);
	int y = get_global_id(1);
	int z = get_global_id(2);

	if (x >= DATA_W || y >= DATA_H || z >= DATA_D)
		return;

	if ( Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] != 1.0f )
		return;

	// Threshold data
	if ( Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold )
	{
		unsigned int one = 1;
		// Increment counter for the current cluster index
		atomic_add(&Cluster_Sizes[Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)]],one);		
	}
}
__kernel void ClusterizeRelabel(__global unsigned int* Cluster_Indices,
						  	  	__global const float* Data,
						  	  	__global const float* Mask,
						  	  	__private float threshold,
								__private int contrast,
						  	  	__private int DATA_W,
						  	  	__private int DATA_H,
						  	  	__private int DATA_D)
{
	int x = get_global_id(0);
	int y = get_global_id(1);
	int z = get_global_id(2);

	if (x >= DATA_W || y >= DATA_H || z >= DATA_D)
		return;

	// Threshold data
	if ( (Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] == 1.0f) && (Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold) )
	{
		// Relabel voxels
		unsigned int label = Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)];
		unsigned int next = Cluster_Indices[label];
		while (next != label)
		{
			label = next;
			next = Cluster_Indices[label];
		}
		Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] = label;
	}
}
Beispiel #17
0
kernel void compute_z_by_ry(global int *cur_y, global int *cur_z, global int *cur_r,
			    global int *transformed_y, global int *temp_y, global int *z_by_ry,
			    uint N, uint D, uint K, uint f_img_width) {

  const uint V_SCALE = 0, H_SCALE = 1, V_TRANS = 2, H_TRANS = 3, NUM_TRANS = 4;
  uint nth = get_global_id(0); // nth is the index of images
  uint kth = get_global_id(1); // kth is the index of features
  uint f_img_height = D / f_img_width;
  
  if (cur_z[nth * K + kth] == 0) {
    for (int dth = 0; dth < D; dth++) {
      transformed_y[nth * K * D + kth * D + dth] = 0;
    }
  } else {

  for (int dth = 0; dth < D; dth++) {
    temp_y[nth * K * D + kth * D + dth] = cur_y[kth * D + dth];
  }
  
  // vertically scale the feature image
  uint v_scale = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + V_SCALE];
  scale_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, 0, v_scale, K, D);
  for (int dth = 0; dth < D; dth++) {
    temp_y[nth * K * D + kth * D + dth] = transformed_y[nth * K * D + kth * D + dth];
  }
  
  // horizontal scale the feature image
  uint h_scale = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + H_SCALE];
  scale_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, h_scale, 0, K, D);
  for (int dth = 0; dth < D; dth++) {
    temp_y[nth * K * D + kth * D + dth] = transformed_y[nth * K * D + kth * D + dth];
  }

  // vertically translate the feature image
  uint v_dist = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + V_TRANS];
  v_translate_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, v_dist, K, D);
  for (int dth = 0; dth < D; dth++) {
    temp_y[nth * K * D + kth * D + dth] = transformed_y[nth * K * D + kth * D + dth];
  }
  
  // horizontally translate the feature image
  uint h_dist = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + H_TRANS];
  h_translate_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, h_dist, K, D);
  }
  // wait until copying is done
  barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); 

  
  /* at this point, for each object, a transformed y has been generated */
  
  if (kth == 0) {
    for (int dth = 0; dth < D; dth++) {
      z_by_ry[nth * D + dth] = 0;
      for (int k = 0; k < K; k++) {
	z_by_ry[nth * D + dth] += transformed_y[nth * K * D + k * D + dth] * cur_z[nth * K + k];
      }
    }
  }
}
__kernel void transpose(
    __global const double* a, int aRows, int aColumns,
    __global double* out
) {
    int i = get_global_id(0);
    int j = get_global_id(1);

    int outColumns = aRows;
    out[i * outColumns + j] = a[j * aColumns + i];
}
__kernel void transposeFloat(
    __global const float* a, int aRows, int aColumns,
    __global float* out
) {
    int i = get_global_id(0);
    int j = get_global_id(1);

    int outColumns = aRows;
    out[i * outColumns + j] = a[j * aColumns + i];
}
Beispiel #20
0
__kernel void kern(
    __read_only image2d_t entry,
    __read_only image2d_t exit,
    __write_only image2d_t tex,
    __read_only image1d_t transfer,
    __global float * data,
    int width,
    int height,
    int depth
    )
{
    int x = get_global_id(0);
    int y = get_global_id(1);

    int2 coords = (int2)(x,y);
    float2 tcoords = (float2)(x,y)/512.0f;

    const float Samplings = 100.0f;
    const float k = 3.0f;

    float3 a=read_imagef(entry,samplersrc,tcoords).xyz;
    float3 b=read_imagef(exit,samplersrc,tcoords).xyz;

    float3 dir=b-a;
    int steps = (int)(floor(Samplings * length(dir)));
    float3 diff1 = dir / (float)(steps);
    dir=dir*(1.0f/length(dir));
    float delta=1.0f/Samplings;

    float4 result = (float4)(0.0f,0.0f,0.0f,1.0f);
    
    for (int i=0; i<steps; i++) {
        float3 p=a;
        
        float valuex = 0.0f;
        float3 valued = 0.0f;
        
        //Calculate gradients
        evaldx(data, p.x, p.y, p.z, width, height, depth, &valuex, &valued);
        
        //Apply classification
        float4 color=read_imagef(transfer,samplersrc,valuex);

        result.w*=pow(color.w,delta);
        result.xyz+=result.w*color.xyz*delta;
        if(result.w<0.05f) {
            i=steps;
            result.w=0.0f;
        }
        a+=diff1;
    }

    write_imagef(tex, coords, result);

};
__kernel void do_cellular(
    __global int *mode, __global int *in_width, __global int *in_height,
    __global double *in_threshold,
    __global double *Vx, __global double *Vu, __global double *Vy) {

  const int cmode  = (*mode);
  const int X      = get_global_id(0), Y = get_global_id(1);
  const int width  = (*in_width), height = (*in_height);
  const double thr = (*in_threshold);
  double A[5][5], B[5][5];

  double a = 0.0, b = 0.0;
  int x, y, posX, posY;

  // CNN のテンプレートの設定
  set_template(cmode, A, B);

  // 積分の区分数
  int cnt, n = 50;
  double d = 0.0, u = 1.0;
  double dt = (u - d) / n;
  double output_vx, vx;

  for(cnt = 0; cnt < n; cnt++) {
    //a = 0.0; b = 0.0;
    
    // テンプレートの計算
    for(y = 0; y < 5; y++) {
      posY = Y + y - 2;
      for(x = 0; x < 5; x++) {
        posX = X + x - 2;
        /*if(posX > -1 && posY > -1 &&
          posX < width - 1 && posY < height - 1) {
          a += A[y][x] * Vy[posY * width + posX];
          b += B[y][x] * Vu[posY * width + posX];
          }*/
        a += A[y][x] * Vy[
          (posY < 0 ? 0 : ((posY > (height - 1)) ? height - 1: posY)) * width
          + ((posX < 0) ? 0 : ((posX > (width - 1)) ? width - 1: posX))];
        b += B[y][x] * Vu[
          (posY < 0 ? 0 : ((posY > (height - 1)) ? height - 1: posY)) * width
          + ((posX < 0) ? 0 : ((posX > (width - 1)) ? width - 1: posX))];
      }
    }

    output_vx =  a + b + thr - Vx[Y * width + X];
    Vx[Y * width + X] += output_vx * dt;

    vx = Vx[Y * width + X];
    Vy[Y * width + X] = (vx > 1.0) ? 1.0 : ((vx < -1.0) ? -1.0 : vx);
    //Vy[Y * width + X] = calc_sigmoid(vx, 1.0);
    //Vy[Y * width + X] = (1.0/2.0) * (fabs(vx + 1) - fabs(vx - 1));
    //Vy[Y * width + X]  = (output_vx > 0) ? 1.0 : (output_vx < 0) ? -1.0 : 0.0;
  }
}
Beispiel #22
0
kernel void test(global int* src,global int* flags,global float* codes){
    int batchInd=get_global_id(0);
    int nodeInd=get_global_id(1);//nonZeros
    if(batchInd==0 && nodeInd==0){
        if(codes[0]==0)
            flags[0]=3;
        else
            flags[0]=2;
        src[0]=1;
    }
}
Beispiel #23
0
		__kernel void forward(
			const __global float* input, const int input_offset,
			__global float* output, const int output_offset,
			const __global float* mask,
			const int input_dim)
		{
			const int b = get_global_id(1);
			const int o = get_global_id(0);

			output[o+input_dim*b+output_offset] =
				mask[o]*input[o+input_dim*b+input_offset];
		}
Beispiel #24
0
__kernel void
LineRenderKernel(const __global float* pointData,
		 const __global float* directionData,
		 __global float * vertexBuffer,
		 float4 camPos, uint Nlines)
{
  //Position data
  if (get_global_id(0) >= Nlines) return;
  
  pointData += get_global_id(0) * 3;
  directionData += get_global_id(0) * 3;
  
  vertexBuffer += 4 * 3 * get_global_id(0); 
  
  float4 pos ;
  pos.x = pointData[0];
  pos.y = pointData[1];
  pos.z = pointData[2];
  pos.w = 0;
  
  float4 dir ;
  dir.x = directionData[0];
  dir.y = directionData[1];
  dir.z = directionData[2];
  dir.w = 0;

  float4 point = pos - 0.5f * dir;
  
  //Arrow Bottom
  vertexBuffer[0] = point.x;
  vertexBuffer[1] = point.y;
  vertexBuffer[2] = point.z;
  
  //Arrow Head
  point = pos + 0.5f * dir;
  vertexBuffer[3] = point.x;
  vertexBuffer[4] = point.y;
  vertexBuffer[5] = point.z;

  float4 pointToView = point - camPos;
  float4 sidesVec = normalize(cross(pointToView, dir));
  
  //Arrow verts
  point = pos + 0.3f * dir + 0.1 * length(dir) * sidesVec;
  vertexBuffer[6] = point.x;
  vertexBuffer[7] = point.y;
  vertexBuffer[8] = point.z;

  point = pos + 0.3f * dir - 0.1 * length(dir) * sidesVec;
  vertexBuffer[9] = point.x;
  vertexBuffer[10] = point.y;
  vertexBuffer[11] = point.z;
}
Beispiel #25
0
kernel void refreshR(global int* hRows,global float* q0,global float* q1, global float* r0,global float* r1,global int* hRowFirstPtr,global int* hRowNextPtr,const int ldpcM,const int nonZeros){
    int batchInd=get_global_id(0);
    int nodeInd=get_global_id(1);//nonZeros
    int hRow=hRows[nodeInd];//0-ldpcM
    //int hCol=hCols[nodeInd];//0-ldpcN
    float dTmp=1;
    for(int nextPtr=hRowFirstPtr[hRow];nextPtr!=-1;nextPtr=hRowNextPtr[nextPtr]){
        //nextPtr is the node location;
        dTmp*=q0[batchInd*nonZeros+nextPtr]-q1[batchInd*nonZeros+nextPtr];
    }
    r0[batchInd*nonZeros+nodeInd]=(1+dTmp)/2;
    r1[batchInd*nonZeros+nodeInd]=(1-dTmp)/2;
}
Beispiel #26
0
__kernel void randomfill(const int patchHeight, const int patchWidth,
			const int height,const int width,
			__global double *img1,__global double *img2, __global double * output)
{
	const int effectiveWidth=width-patchWidth;
	const int effectiveHeight=height-patchHeight;
	int y = get_global_id(0);
	int x = get_global_id(1);
	int seed=y<<16+x;
	int ty=seed=nff(y,x,0)=random(0,effectiveHeight,seed);
	int tx=nff(y,x,1)=random(0,effectiveWidth,seed);
	nff(y,x,2)=D(y,x, ty,tx );
}
Beispiel #27
0
__kernel void render(__write_only __global image2d_t targetImage, int w, int h, int animation) {
	int y = get_global_id(0);
	int x = get_global_id(1);
	Stack stack;
	stack.size=0;
	ulong seed = y*w+x;

	Viewport viewport = setupViewport(x, y, w, h, animation);
	Vector color = getPixelAntialiased(viewport, &stack, &seed);
	uint4 intColor = {color.z*255, color.y*255, color.x*255, 255};

	int2 posOut = {x, y};
	write_imageui(targetImage, posOut, intColor);
}
    kernel void sobel_rgb(read_only image2d_t src, write_only image2d_t dst)
    {
        int x = (int)get_global_id(0);
        int y = (int)get_global_id(1);

        if (x >= get_image_width(src) || y >= get_image_height(src))
                return;

        //  [(x-1, y+1), (x, y+1), (x+1, y+1)]
        //  [(x-1, y  ), (x, y  ), (x+1, y  )]
        //  [(x-1, y-1), (x, y-1), (x+1, y-1)]

        //  [p02, p12,   p22]
        //  [p01, pixel, p21]
        //  [p00, p10,   p20]

        //Basically finding influence of neighbour pixels on current pixel
        float4 p00 = read_imagef(src, sampler, (int2)(x - 1, y - 1));
        float4 p10 = read_imagef(src, sampler, (int2)(x,     y - 1));
        float4 p20 = read_imagef(src, sampler, (int2)(x + 1, y - 1));

        float4 p01 = read_imagef(src, sampler, (int2)(x - 1, y));
        //pixel that we are working on
        float4 p21 = read_imagef(src, sampler, (int2)(x + 1, y));

        float4 p02 = read_imagef(src, sampler, (int2)(x - 1, y + 1));
        float4 p12 = read_imagef(src, sampler, (int2)(x,     y + 1));
        float4 p22 = read_imagef(src, sampler, (int2)(x + 1, y + 1));

        //Find Gx = kernel + 3x3 around current pixel
        //           Gx = [-1 0 +1]     [p02, p12,   p22]
        //                [-2 0 +2]  +  [p01, pixel, p21]
        //                [-1 0 +1]     [p00, p10,   p20]
        float3 gx = -p00.xyz + p20.xyz +
                    2.0f * (p21.xyz - p01.xyz)
                    -p02.xyz + p22.xyz;

        //Find Gy = kernel + 3x3 around current pixel
        //           Gy = [-1 -2 -1]     [p02, p12,   p22]
        //                [ 0  0  0]  +  [p01, pixel, p21]
        //                [+1 +2 +1]     [p00, p10,   p20]
        float3 gy = p00.xyz + p20.xyz +
                    2.0f * (- p12.xyz + p10.xyz) -
                    p02.xyz - p22.xyz;
        //Find G
        float3 g = native_sqrt(gx * gx + gy * gy);

        // we could also approximate this as g = fabs(gx) + fabs(gy)
        write_imagef(dst, (int2)(x, y), (float4)(g.x, g.y, g.z, 1.0f));
    }
Beispiel #29
0
__kernel void merge(__write_only image2d_t destination, __read_only image2d_t previousDestination, __read_only image2d_t sourceA, __read_only image2d_t sourceB, int xA, int yA, int xB, int yB)
{
    int2 destinationCoord = (int2) (get_global_id(0), get_global_id(1));
    int2 sourceCoordA = (int2) (destinationCoord.x + xA, destinationCoord.y + yA);
    int2 sourceCoordB = (int2) (destinationCoord.x + xB, destinationCoord.y + yB);
    float4 destinationPixel = read_imagef(previousDestination, sampler, destinationCoord);
    float4 sourcePixelA = read_imagef(sourceA, sampler, sourceCoordA);
    float4 sourcePixelB = read_imagef(sourceB, sampler, sourceCoordB);

    destinationPixel = sourcePixelA + destinationPixel * (1 - sourcePixelA.w);
    destinationPixel = sourcePixelB + destinationPixel * (1 - sourcePixelB.w);

    write_imagef(destination, destinationCoord, destinationPixel);
}
__kernel void CalculatePermutationPValuesClusterMassInference(__global float* P_Values,
														      __global const float* Test_Values,
															  __global const unsigned int* Cluster_Indices,
															  __global const unsigned int* Cluster_Sizes,
							   	   	   	   	   	  	  	  	  __global const float* Mask,
							   	   	   	   	   	  	  	  	  __global const float* c_Max_Values,
							   	   	   	   	   	  	  	  	  __private float threshold,
							   	   	   	   	   	  	  	  	  __private int contrast,
							   	   	   	   	   	  	  	  	  __private int DATA_W,
							   	   	   	   	   	  	  	  	  __private int DATA_H,
							   	   	   	   	   	  	  	  	  __private int DATA_D,
							   	   	   	   	   	  	  	  	  __private int NUMBER_OF_PERMUTATIONS)
{
	int x = get_global_id(0);
	int y = get_global_id(1);
	int z = get_global_id(2);

    if (x >= DATA_W || y >= DATA_H || z >= DATA_D)
        return;

    if ( Mask[Calculate3DIndex(x, y, z, DATA_W, DATA_H)] == 1.0f )
	{
    	// Check if the current voxel belongs to a cluster
    	if ( Test_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] > threshold )
    	{
    		// Get cluster mass of current cluster, divide by 10 000 as 10 000 is multiplied with in the CalculateClusterMasses kernel
    		float Test_Value = ((float)Cluster_Sizes[Cluster_Indices[Calculate3DIndex(x, y, z, DATA_W, DATA_H)]]) / 10000.0f;

    		float sum = 0.0f;
    		for (int p = 0; p < NUMBER_OF_PERMUTATIONS; p++)
    		{
    			if (Test_Value > c_Max_Values[p])
    			{
    				sum += 1.0f;
    			}
    		}
    		P_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] = sum / (float)NUMBER_OF_PERMUTATIONS;
    	}
    	// Voxel is not part of a cluster, so p-value should be 0
    	else
    	{
    		P_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] = 0.0f;
    	}
	}
    else
    {
    	P_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] = 0.0f;
    }
}