__kernel void ieee_ldpc_decode_init(const int ldpc_n, const int ldpc_m, __global float * r0, __global float *r1, __global float *q0, __global float * q1, __global float * prior_p0, __global float * prior_p1,__global char * matrix_n,__global char * matrix_h){ size_t r=get_global_id(0);//ldpc_m size_t c=get_global_id(1);//ldpc_n if(matrix_n[c]==0){ if(r<ldpc_m){ r0[r*ldpc_n+c]=0; r1[r*ldpc_n+c]=0; q0[r*ldpc_n+c]=1.0; q1[r*ldpc_n+c]=0; }else if(r==ldpc_m){ prior_p0[c]=1.0; prior_p1[c]=0; } }else if(matrix_n[c]==1){ if(r<ldpc_m){ r0[r*ldpc_n+c]=0; r1[r*ldpc_n+c]=0; q0[r*ldpc_n+c]=0; q1[r*ldpc_n+c]=1.0; }else if(r==ldpc_m){ prior_p0[c]=0; prior_p1[c]=1.0; } }else if(matrix_n[c]==-1){ if(r<ldpc_m){ r0[r*ldpc_n+c]=0; r1[r*ldpc_n+c]=0; q0[r*ldpc_n+c]=0.5; q1[r*ldpc_n+c]=0.5; }else if(r==ldpc_m){ prior_p0[c]=0.5; prior_p1[c]=0.5; } } }
__kernel void calculate_fractal(__global float* config, __global unsigned short* output) { unsigned offset = (unsigned) config[7]; unsigned rows = (unsigned) config[8]; unsigned x = get_global_id(0); unsigned y = get_global_id(1); if (y >= offset && y < (offset + rows)) { unsigned iterations = config[0]; unsigned width = config[1]; unsigned height = config[2]; float min_re = config[3]; float max_re = config[4]; float min_im = config[5]; float max_im = config[6]; float re_factor = (max_re - min_re) / (width - 1); float im_factor = (max_im - min_im) / (height - 1); float z_re = min_re + x * re_factor; float z_im = max_im - y * im_factor; float const_re = z_re; float const_im = z_im; unsigned cnt = 0; float cond = 0; do { float tmp_re = z_re; float tmp_im = z_im; z_re = ( tmp_re * tmp_re - tmp_im * tmp_im ) + const_re; z_im = -1 * ( 2 * tmp_re * tmp_im ) + const_im; cond = z_re * z_re + z_im * z_im; ++cnt; } while (cnt < iterations && cond <= 4.0f); output[x + (y - offset) * width] = cnt; } }
void cl_initilize() { cl_int errcode; cl_platform_id platform[10]; get_platforms(platform); cl_device_id devices[10]; int platform_index = 0; get_devices(platform[platform_index], devices); int device_index = 0; show_device_info(devices[device_index]); context = clCreateContext(NULL, 1, &devices[device_index], NULL, NULL, &errcode); checkError(clCreateContext); queue = clCreateCommandQueue (context, devices[device_index], CL_QUEUE_PROFILING_ENABLE, &errcode); // третий параметр - свойства checkError(clCreateCommandQueue); char* source = "\n\ __kernel void sum(__global const uchar *src, __global uchar *trg, int m, int n)\n\ {\n\ int i = get_global_id(0);\n\ int j = get_global_id(1);\n\ int SIdx = (i*n + (n-1 - j)) ;\n\ int DIdx = (j*m + i) ;\n\ if (i > m) return;\ if (j > n) return;\ for (int c = 0; c < 3; c++)\n\ trg[DIdx*3+c] = src[SIdx*3+c];\n\ }";
__kernel void Viz( __global real_t* x_pos, __global real_t* y_pos, __global real_t* z_pos, const __global int* n_atoms, __global real_t* x_cen, __global real_t* y_cen, __global real_t* z_cen, __global float* vertices) { int i_atom = get_global_id(0); int ibox = get_global_id(1); int offset = N_MAX_ATOMS; if (i_atom < n_atoms[ibox]) { vertices[i_atom*3 + offset*ibox*3 + 0] = x_cen[ibox] + x_pos[i_atom + offset*ibox]; vertices[i_atom*3 + offset*ibox*3 + 1] = y_cen[ibox] + y_pos[i_atom + offset*ibox]; vertices[i_atom*3 + offset*ibox*3 + 2] = z_cen[ibox] + z_pos[i_atom + offset*ibox]; } else { vertices[i_atom*3 + offset*ibox*3 + 0] = 0.0f; vertices[i_atom*3 + offset*ibox*3 + 1] = 0.0f; vertices[i_atom*3 + offset*ibox*3 + 2] = 0.0f; } }
kernel void gaussian_filter (global uchar* image, global uchar* filtered_image, global char* gaussian_kernel, global int* image_dims, short weight) { const int image_height = image_dims[0]; const int image_width = image_dims[1]; const int global_x = get_global_id(0); const int global_y = get_global_id(1); const int2 pixel_pos = { global_x, global_y }; if (OutsideImage(pixel_pos, image_width, image_height)) return; short sum = 0; int index = 0; int2 pos; /* 3x3 Convolution */ for(int y= -1; y<=1; y++) for(int x=-1; x<=1; x++) { pos = pixel_pos + (int2)(x,y); sum += gaussian_kernel[index++] * image[pos.y * image_width + pos.x]; } sum /= weight; filtered_image[global_y * img_width + global_x] = (uchar) sum; }
__kernel void train(__global const float *perceptrons, __global const short *images, __global float *updates ) { int imagesOffset = get_global_id(0)*IMAGE_SIZE*BATCH_ITEMS; int updatesOffset = get_global_id(0)*NO_PERCEPTRONS*IMAGE_SIZE; int perceptron; float sum; int offset; int batch; float activationOld[NO_PERCEPTRONS]; float activation; for(batch=0;batch<BATCH_ITEMS;batch++) { for(perceptron=0;perceptron<NO_PERCEPTRONS;perceptron++) { sum=0; for(offset=0;offset<IMAGE_SIZE;offset++) { sum+=perceptrons[perceptron*IMAGE_SIZE+offset]*images[imagesOffset+batch*IMAGE_SIZE+offset]; } sum/=IMAGE_SIZE; activation = sum-activationOld[perceptron]; //activation=fabs(activation); activationOld[perceptron]=sum; if(batch==0) continue; for(offset=0;offset<IMAGE_SIZE;offset++) { updates[updatesOffset+perceptron*IMAGE_SIZE+offset]+=-images[imagesOffset+batch*IMAGE_SIZE+offset]*activation; } } } }
void _Kernel_global_id2d(int* out) { unsigned x = get_global_id(0); unsigned y = get_global_id(1); unsigned id = (y * get_global_size(1)) + x; out[id] = id; }
kernel void checkResult(global int* src,global int* hCols,global int* hRowFirstPtr,global int* hRowNextPtr,const int ldpcM,const int ldpcN,const int nonZeros,global int* flags){ int batchInd=get_global_id(0); int nodeInd=get_global_id(1);//nonZeros if(nodeInd<ldpcM){//nodeInd = row; //init the matrixM to 0; int result=0; int row=nodeInd; for(int nextPtr=hRowFirstPtr[row];nextPtr!=-1;nextPtr=hRowNextPtr[nextPtr]){ //nextPtr is the node location; result+=src[batchInd*ldpcN+hCols[nextPtr]]; } if(result%2!=0){ atomic_inc(&flags[batchInd]); printf("!%d ",nodeInd); } } if(batchInd==0&&nodeInd==0){ int i; printf("CheckResult: src="); for(i=0;i<100;++i){ printf("%d ",src[i]); } printf("\n"); } }
__kernel void RemoveMean(__global float* Volumes, __private int DATA_W, __private int DATA_H, __private int DATA_D, __private int NUMBER_OF_VOLUMES) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); if (x >= DATA_W || y >= DATA_H || z >= DATA_D) return; float mean = 0.0f; for (int v = 0; v < NUMBER_OF_VOLUMES; v++) { mean += Volumes[Calculate4DIndex(x,y,z,v,DATA_W,DATA_H,DATA_D)]; } mean /= (float)NUMBER_OF_VOLUMES; // Calculate the residual for (int v = 0; v < NUMBER_OF_VOLUMES; v++) { Volumes[Calculate4DIndex(x,y,z,v,DATA_W,DATA_H,DATA_D)] -= mean; } }
__kernel void ieee_ldpc_decode_iteration3(const int ldpc_n, const int ldpc_m , __global float * r0, __global float *r1, __global float *q0, __global float * q1, __global float * prior_p0, __global float * prior_p1,__global char * matrix_n,__global char * matrix_h,__global int *flag){ int r=get_global_id(0);//ldpc_m int c=get_global_id(1);//ldpc_n int cc,rr; int tmp; if(r==0){ if(matrix_n[c]==-1){ *flag=0; } } if(c==0){ int k; tmp=0; for(k=0;k<ldpc_n;k++){ if( matrix_h[r*ldpc_n+k]*matrix_n[k]==1){ tmp++; } } //matrix_zero[r]=tmp; if(tmp%2!=0){ *flag=0; } } }
__kernel void CalculateTFCEValues(__global float* TFCE_Values, __global const float* Mask, __private float threshold, __global const unsigned int* Cluster_Indices, __global const unsigned int* Cluster_Sizes, __private int DATA_W, __private int DATA_H, __private int DATA_D) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); if (x >= DATA_W || y >= DATA_H || z >= DATA_D) return; if ( Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] != 1.0f ) return; // Check if the current voxel belongs to a cluster if ( Cluster_Indices[Calculate3DIndex(x, y, z, DATA_W, DATA_H)] < (DATA_W * DATA_H * DATA_D) ) { // Get extent of cluster for current voxel float clusterSize = (float)Cluster_Sizes[Cluster_Indices[Calculate3DIndex(x, y, z, DATA_W, DATA_H)]]; float value = sqrt(clusterSize) * threshold * threshold; TFCE_Values[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] += value; } }
__kernel void CalculateClusterMasses(__global unsigned int* Cluster_Indices, volatile __global unsigned int* Cluster_Masses, __global const float* Data, __global const float* Mask, __private float threshold, __private int contrast, __private int DATA_W, __private int DATA_H, __private int DATA_D) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); if (x >= DATA_W || y >= DATA_H || z >= DATA_D) return; if ( Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] != 1.0f ) return; // Threshold data if ( Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold ) { // Increment mass for the current cluster index, done in an ugly way since atomic floats are not supported atomic_add( &Cluster_Masses[Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)]], (unsigned int)(Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] * 10000.0f) ); } }
__kernel void test(__global const float *perceptrons, __global const short *images, __global short *labels ) { int imagesOffset = get_global_id(0)*IMAGE_SIZE; int labelsOffset = get_global_id(0); float sum; int perceptron; int offset; float max=-FLT_MAX; int maxLabel; for(perceptron=0; perceptron<NO_CLASSES; perceptron++) { sum=0; for(offset=0; offset<IMAGE_SIZE; offset++) { sum+=perceptrons[perceptron*(IMAGE_SIZE+1)+offset]*images[imagesOffset+offset]/128.; } sum+=perceptrons[perceptron*(IMAGE_SIZE+1)+IMAGE_SIZE]; if(sum>max) { max=sum; maxLabel=perceptron; } } labels[labelsOffset]=maxLabel; }
__kernel void SetStartClusterIndicesKernel(__global unsigned int* Cluster_Indices, __global const float* Data, __global const float* Mask, __private float threshold, __private int contrast, __private int DATA_W, __private int DATA_H, __private int DATA_D) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); if (x >= DATA_W || y >= DATA_H || z >= DATA_D) return; // Threshold data if ( (Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] == 1.0f) && (Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold) ) { // Set an unique index Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] = (unsigned int)Calculate3DIndex(x,y,z,DATA_W,DATA_H); } else { // Make sure that all other voxels have a higher start index Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] = (unsigned int)(DATA_W * DATA_H * DATA_D * 3); } }
__kernel void CalculateClusterSizes(__global unsigned int* Cluster_Indices, volatile __global unsigned int* Cluster_Sizes, __global const float* Data, __global const float* Mask, __private float threshold, __private int contrast, __private int DATA_W, __private int DATA_H, __private int DATA_D) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); if (x >= DATA_W || y >= DATA_H || z >= DATA_D) return; if ( Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] != 1.0f ) return; // Threshold data if ( Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold ) { unsigned int one = 1; // Increment counter for the current cluster index atomic_add(&Cluster_Sizes[Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)]],one); } }
__kernel void ClusterizeRelabel(__global unsigned int* Cluster_Indices, __global const float* Data, __global const float* Mask, __private float threshold, __private int contrast, __private int DATA_W, __private int DATA_H, __private int DATA_D) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); if (x >= DATA_W || y >= DATA_H || z >= DATA_D) return; // Threshold data if ( (Mask[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] == 1.0f) && (Data[Calculate4DIndex(x,y,z,contrast,DATA_W,DATA_H,DATA_D)] > threshold) ) { // Relabel voxels unsigned int label = Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)]; unsigned int next = Cluster_Indices[label]; while (next != label) { label = next; next = Cluster_Indices[label]; } Cluster_Indices[Calculate3DIndex(x,y,z,DATA_W,DATA_H)] = label; } }
kernel void compute_z_by_ry(global int *cur_y, global int *cur_z, global int *cur_r, global int *transformed_y, global int *temp_y, global int *z_by_ry, uint N, uint D, uint K, uint f_img_width) { const uint V_SCALE = 0, H_SCALE = 1, V_TRANS = 2, H_TRANS = 3, NUM_TRANS = 4; uint nth = get_global_id(0); // nth is the index of images uint kth = get_global_id(1); // kth is the index of features uint f_img_height = D / f_img_width; if (cur_z[nth * K + kth] == 0) { for (int dth = 0; dth < D; dth++) { transformed_y[nth * K * D + kth * D + dth] = 0; } } else { for (int dth = 0; dth < D; dth++) { temp_y[nth * K * D + kth * D + dth] = cur_y[kth * D + dth]; } // vertically scale the feature image uint v_scale = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + V_SCALE]; scale_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, 0, v_scale, K, D); for (int dth = 0; dth < D; dth++) { temp_y[nth * K * D + kth * D + dth] = transformed_y[nth * K * D + kth * D + dth]; } // horizontal scale the feature image uint h_scale = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + H_SCALE]; scale_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, h_scale, 0, K, D); for (int dth = 0; dth < D; dth++) { temp_y[nth * K * D + kth * D + dth] = transformed_y[nth * K * D + kth * D + dth]; } // vertically translate the feature image uint v_dist = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + V_TRANS]; v_translate_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, v_dist, K, D); for (int dth = 0; dth < D; dth++) { temp_y[nth * K * D + kth * D + dth] = transformed_y[nth * K * D + kth * D + dth]; } // horizontally translate the feature image uint h_dist = cur_r[nth * (K * NUM_TRANS) + kth * NUM_TRANS + H_TRANS]; h_translate_global(temp_y, transformed_y, nth, kth, f_img_height, f_img_width, h_dist, K, D); } // wait until copying is done barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); /* at this point, for each object, a transformed y has been generated */ if (kth == 0) { for (int dth = 0; dth < D; dth++) { z_by_ry[nth * D + dth] = 0; for (int k = 0; k < K; k++) { z_by_ry[nth * D + dth] += transformed_y[nth * K * D + k * D + dth] * cur_z[nth * K + k]; } } } }
__kernel void transpose( __global const double* a, int aRows, int aColumns, __global double* out ) { int i = get_global_id(0); int j = get_global_id(1); int outColumns = aRows; out[i * outColumns + j] = a[j * aColumns + i]; }
__kernel void transposeFloat( __global const float* a, int aRows, int aColumns, __global float* out ) { int i = get_global_id(0); int j = get_global_id(1); int outColumns = aRows; out[i * outColumns + j] = a[j * aColumns + i]; }
__kernel void kern( __read_only image2d_t entry, __read_only image2d_t exit, __write_only image2d_t tex, __read_only image1d_t transfer, __global float * data, int width, int height, int depth ) { int x = get_global_id(0); int y = get_global_id(1); int2 coords = (int2)(x,y); float2 tcoords = (float2)(x,y)/512.0f; const float Samplings = 100.0f; const float k = 3.0f; float3 a=read_imagef(entry,samplersrc,tcoords).xyz; float3 b=read_imagef(exit,samplersrc,tcoords).xyz; float3 dir=b-a; int steps = (int)(floor(Samplings * length(dir))); float3 diff1 = dir / (float)(steps); dir=dir*(1.0f/length(dir)); float delta=1.0f/Samplings; float4 result = (float4)(0.0f,0.0f,0.0f,1.0f); for (int i=0; i<steps; i++) { float3 p=a; float valuex = 0.0f; float3 valued = 0.0f; //Calculate gradients evaldx(data, p.x, p.y, p.z, width, height, depth, &valuex, &valued); //Apply classification float4 color=read_imagef(transfer,samplersrc,valuex); result.w*=pow(color.w,delta); result.xyz+=result.w*color.xyz*delta; if(result.w<0.05f) { i=steps; result.w=0.0f; } a+=diff1; } write_imagef(tex, coords, result); };
__kernel void do_cellular( __global int *mode, __global int *in_width, __global int *in_height, __global double *in_threshold, __global double *Vx, __global double *Vu, __global double *Vy) { const int cmode = (*mode); const int X = get_global_id(0), Y = get_global_id(1); const int width = (*in_width), height = (*in_height); const double thr = (*in_threshold); double A[5][5], B[5][5]; double a = 0.0, b = 0.0; int x, y, posX, posY; // CNN のテンプレートの設定 set_template(cmode, A, B); // 積分の区分数 int cnt, n = 50; double d = 0.0, u = 1.0; double dt = (u - d) / n; double output_vx, vx; for(cnt = 0; cnt < n; cnt++) { //a = 0.0; b = 0.0; // テンプレートの計算 for(y = 0; y < 5; y++) { posY = Y + y - 2; for(x = 0; x < 5; x++) { posX = X + x - 2; /*if(posX > -1 && posY > -1 && posX < width - 1 && posY < height - 1) { a += A[y][x] * Vy[posY * width + posX]; b += B[y][x] * Vu[posY * width + posX]; }*/ a += A[y][x] * Vy[ (posY < 0 ? 0 : ((posY > (height - 1)) ? height - 1: posY)) * width + ((posX < 0) ? 0 : ((posX > (width - 1)) ? width - 1: posX))]; b += B[y][x] * Vu[ (posY < 0 ? 0 : ((posY > (height - 1)) ? height - 1: posY)) * width + ((posX < 0) ? 0 : ((posX > (width - 1)) ? width - 1: posX))]; } } output_vx = a + b + thr - Vx[Y * width + X]; Vx[Y * width + X] += output_vx * dt; vx = Vx[Y * width + X]; Vy[Y * width + X] = (vx > 1.0) ? 1.0 : ((vx < -1.0) ? -1.0 : vx); //Vy[Y * width + X] = calc_sigmoid(vx, 1.0); //Vy[Y * width + X] = (1.0/2.0) * (fabs(vx + 1) - fabs(vx - 1)); //Vy[Y * width + X] = (output_vx > 0) ? 1.0 : (output_vx < 0) ? -1.0 : 0.0; } }
kernel void test(global int* src,global int* flags,global float* codes){ int batchInd=get_global_id(0); int nodeInd=get_global_id(1);//nonZeros if(batchInd==0 && nodeInd==0){ if(codes[0]==0) flags[0]=3; else flags[0]=2; src[0]=1; } }
__kernel void forward( const __global float* input, const int input_offset, __global float* output, const int output_offset, const __global float* mask, const int input_dim) { const int b = get_global_id(1); const int o = get_global_id(0); output[o+input_dim*b+output_offset] = mask[o]*input[o+input_dim*b+input_offset]; }
__kernel void LineRenderKernel(const __global float* pointData, const __global float* directionData, __global float * vertexBuffer, float4 camPos, uint Nlines) { //Position data if (get_global_id(0) >= Nlines) return; pointData += get_global_id(0) * 3; directionData += get_global_id(0) * 3; vertexBuffer += 4 * 3 * get_global_id(0); float4 pos ; pos.x = pointData[0]; pos.y = pointData[1]; pos.z = pointData[2]; pos.w = 0; float4 dir ; dir.x = directionData[0]; dir.y = directionData[1]; dir.z = directionData[2]; dir.w = 0; float4 point = pos - 0.5f * dir; //Arrow Bottom vertexBuffer[0] = point.x; vertexBuffer[1] = point.y; vertexBuffer[2] = point.z; //Arrow Head point = pos + 0.5f * dir; vertexBuffer[3] = point.x; vertexBuffer[4] = point.y; vertexBuffer[5] = point.z; float4 pointToView = point - camPos; float4 sidesVec = normalize(cross(pointToView, dir)); //Arrow verts point = pos + 0.3f * dir + 0.1 * length(dir) * sidesVec; vertexBuffer[6] = point.x; vertexBuffer[7] = point.y; vertexBuffer[8] = point.z; point = pos + 0.3f * dir - 0.1 * length(dir) * sidesVec; vertexBuffer[9] = point.x; vertexBuffer[10] = point.y; vertexBuffer[11] = point.z; }
kernel void refreshR(global int* hRows,global float* q0,global float* q1, global float* r0,global float* r1,global int* hRowFirstPtr,global int* hRowNextPtr,const int ldpcM,const int nonZeros){ int batchInd=get_global_id(0); int nodeInd=get_global_id(1);//nonZeros int hRow=hRows[nodeInd];//0-ldpcM //int hCol=hCols[nodeInd];//0-ldpcN float dTmp=1; for(int nextPtr=hRowFirstPtr[hRow];nextPtr!=-1;nextPtr=hRowNextPtr[nextPtr]){ //nextPtr is the node location; dTmp*=q0[batchInd*nonZeros+nextPtr]-q1[batchInd*nonZeros+nextPtr]; } r0[batchInd*nonZeros+nodeInd]=(1+dTmp)/2; r1[batchInd*nonZeros+nodeInd]=(1-dTmp)/2; }
__kernel void randomfill(const int patchHeight, const int patchWidth, const int height,const int width, __global double *img1,__global double *img2, __global double * output) { const int effectiveWidth=width-patchWidth; const int effectiveHeight=height-patchHeight; int y = get_global_id(0); int x = get_global_id(1); int seed=y<<16+x; int ty=seed=nff(y,x,0)=random(0,effectiveHeight,seed); int tx=nff(y,x,1)=random(0,effectiveWidth,seed); nff(y,x,2)=D(y,x, ty,tx ); }
__kernel void render(__write_only __global image2d_t targetImage, int w, int h, int animation) { int y = get_global_id(0); int x = get_global_id(1); Stack stack; stack.size=0; ulong seed = y*w+x; Viewport viewport = setupViewport(x, y, w, h, animation); Vector color = getPixelAntialiased(viewport, &stack, &seed); uint4 intColor = {color.z*255, color.y*255, color.x*255, 255}; int2 posOut = {x, y}; write_imageui(targetImage, posOut, intColor); }
kernel void sobel_rgb(read_only image2d_t src, write_only image2d_t dst) { int x = (int)get_global_id(0); int y = (int)get_global_id(1); if (x >= get_image_width(src) || y >= get_image_height(src)) return; // [(x-1, y+1), (x, y+1), (x+1, y+1)] // [(x-1, y ), (x, y ), (x+1, y )] // [(x-1, y-1), (x, y-1), (x+1, y-1)] // [p02, p12, p22] // [p01, pixel, p21] // [p00, p10, p20] //Basically finding influence of neighbour pixels on current pixel float4 p00 = read_imagef(src, sampler, (int2)(x - 1, y - 1)); float4 p10 = read_imagef(src, sampler, (int2)(x, y - 1)); float4 p20 = read_imagef(src, sampler, (int2)(x + 1, y - 1)); float4 p01 = read_imagef(src, sampler, (int2)(x - 1, y)); //pixel that we are working on float4 p21 = read_imagef(src, sampler, (int2)(x + 1, y)); float4 p02 = read_imagef(src, sampler, (int2)(x - 1, y + 1)); float4 p12 = read_imagef(src, sampler, (int2)(x, y + 1)); float4 p22 = read_imagef(src, sampler, (int2)(x + 1, y + 1)); //Find Gx = kernel + 3x3 around current pixel // Gx = [-1 0 +1] [p02, p12, p22] // [-2 0 +2] + [p01, pixel, p21] // [-1 0 +1] [p00, p10, p20] float3 gx = -p00.xyz + p20.xyz + 2.0f * (p21.xyz - p01.xyz) -p02.xyz + p22.xyz; //Find Gy = kernel + 3x3 around current pixel // Gy = [-1 -2 -1] [p02, p12, p22] // [ 0 0 0] + [p01, pixel, p21] // [+1 +2 +1] [p00, p10, p20] float3 gy = p00.xyz + p20.xyz + 2.0f * (- p12.xyz + p10.xyz) - p02.xyz - p22.xyz; //Find G float3 g = native_sqrt(gx * gx + gy * gy); // we could also approximate this as g = fabs(gx) + fabs(gy) write_imagef(dst, (int2)(x, y), (float4)(g.x, g.y, g.z, 1.0f)); }
__kernel void merge(__write_only image2d_t destination, __read_only image2d_t previousDestination, __read_only image2d_t sourceA, __read_only image2d_t sourceB, int xA, int yA, int xB, int yB) { int2 destinationCoord = (int2) (get_global_id(0), get_global_id(1)); int2 sourceCoordA = (int2) (destinationCoord.x + xA, destinationCoord.y + yA); int2 sourceCoordB = (int2) (destinationCoord.x + xB, destinationCoord.y + yB); float4 destinationPixel = read_imagef(previousDestination, sampler, destinationCoord); float4 sourcePixelA = read_imagef(sourceA, sampler, sourceCoordA); float4 sourcePixelB = read_imagef(sourceB, sampler, sourceCoordB); destinationPixel = sourcePixelA + destinationPixel * (1 - sourcePixelA.w); destinationPixel = sourcePixelB + destinationPixel * (1 - sourcePixelB.w); write_imagef(destination, destinationCoord, destinationPixel); }
__kernel void CalculatePermutationPValuesClusterMassInference(__global float* P_Values, __global const float* Test_Values, __global const unsigned int* Cluster_Indices, __global const unsigned int* Cluster_Sizes, __global const float* Mask, __global const float* c_Max_Values, __private float threshold, __private int contrast, __private int DATA_W, __private int DATA_H, __private int DATA_D, __private int NUMBER_OF_PERMUTATIONS) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); if (x >= DATA_W || y >= DATA_H || z >= DATA_D) return; if ( Mask[Calculate3DIndex(x, y, z, DATA_W, DATA_H)] == 1.0f ) { // Check if the current voxel belongs to a cluster if ( Test_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] > threshold ) { // Get cluster mass of current cluster, divide by 10 000 as 10 000 is multiplied with in the CalculateClusterMasses kernel float Test_Value = ((float)Cluster_Sizes[Cluster_Indices[Calculate3DIndex(x, y, z, DATA_W, DATA_H)]]) / 10000.0f; float sum = 0.0f; for (int p = 0; p < NUMBER_OF_PERMUTATIONS; p++) { if (Test_Value > c_Max_Values[p]) { sum += 1.0f; } } P_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] = sum / (float)NUMBER_OF_PERMUTATIONS; } // Voxel is not part of a cluster, so p-value should be 0 else { P_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] = 0.0f; } } else { P_Values[Calculate4DIndex(x, y, z, contrast, DATA_W, DATA_H, DATA_D)] = 0.0f; } }