C++ (Cpp) buildKernel 예제들

예제 #1

0

파일 보기

파일: clKernelBundle.cpp 프로젝트: DonjuanChu/OpenSubdiv

bool
OsdCLKernelBundle::Compile(cl_context clContext,
                           int numVertexElements, int numVaryingElements) {

    cl_int ciErrNum;

    _numVertexElements = numVertexElements;
    _numVaryingElements = numVaryingElements;

    char constantDefine[256];
    snprintf(constantDefine, sizeof(constantDefine),
             "#define NUM_VERTEX_ELEMENTS %d\n"
             "#define NUM_VARYING_ELEMENTS %d\n",
             numVertexElements, numVaryingElements);

    const char *sources[] = { constantDefine, clSource };

    _clProgram = clCreateProgramWithSource(clContext, 2, sources, 0, &ciErrNum);
    CL_CHECK_ERROR(ciErrNum, "clCreateProgramWithSource\n");

    ciErrNum = clBuildProgram(_clProgram, 0, NULL, NULL, NULL, NULL);
    if (ciErrNum != CL_SUCCESS) {
        OsdError(OSD_CL_PROGRAM_BUILD_ERROR, "CLerr=%d", ciErrNum);

        cl_int numDevices = 0;
        clGetContextInfo(clContext, CL_CONTEXT_NUM_DEVICES,
                         sizeof(cl_uint), &numDevices, NULL);
        cl_device_id *devices = new cl_device_id[numDevices];
        clGetContextInfo(clContext, CL_CONTEXT_DEVICES,
                         sizeof(cl_device_id)*numDevices, devices, NULL);
        for (int i = 0; i < numDevices; ++i) {
            char cBuildLog[10240];
            clGetProgramBuildInfo(_clProgram, devices[i], CL_PROGRAM_BUILD_LOG,
                                  sizeof(cBuildLog), cBuildLog, NULL);
            OsdError(OSD_CL_PROGRAM_BUILD_ERROR, cBuildLog);
        }
        delete[] devices;
        return false;
    }

    _clBilinearEdge   = buildKernel(_clProgram, "computeBilinearEdge");
    _clBilinearVertex = buildKernel(_clProgram, "computeBilinearVertex");
    _clCatmarkFace    = buildKernel(_clProgram, "computeFace");
    _clCatmarkEdge    = buildKernel(_clProgram, "computeEdge");
    _clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
    _clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
    _clLoopEdge       = buildKernel(_clProgram, "computeEdge");
    _clLoopVertexA    = buildKernel(_clProgram, "computeVertexA");
    _clLoopVertexB    = buildKernel(_clProgram, "computeLoopVertexB");
    _clVertexEditAdd  = buildKernel(_clProgram, "editVertexAdd");

    return true;
}

예제 #2

0

파일 보기

파일: device.cpp 프로젝트: pdhahn/occa

  kernel device::buildKernelFromString(const std::string &content,
                                       const std::string &kernelName,
                                       const occa::properties &props) const {

    occa::properties allProps = props + kernelProperties();
    allProps["mode"] = mode();

    hash_t kernelHash = (hash()
                         ^ occa::hash(allProps)
                         ^ occa::hash(content));

    io::lock_t lock(kernelHash, "occa-device");
    std::string stringSourceFile = io::hashDir(kernelHash);
    stringSourceFile += "stringSource.okl";

    if (lock.isMine()) {
      if (!sys::fileExists(stringSourceFile)) {
        io::write(stringSourceFile, content);
      }
      lock.release();
    }

    return buildKernel(stringSourceFile,
                       kernelName,
                       props);
  }

예제 #3

0

파일 보기

파일: DeconvolutionTool.cpp 프로젝트: 413x/SmartDeblur

bool DeconvolutionTool::doDeconvolution(QImage *inputImage, QImage *outputImage, Blur* blur) {
    isProcessingCancelled = false;
    // Create kernel
    buildKernel(kernelMatrix, width, height, blur);
    fftw_execute(realForwardKernelPlan);
    // Fill processingContext
    ProcessingContext* processingContext = new ProcessingContext();
    processingContext->inputImage = inputImage;
    processingContext->outputImage = outputImage;
    processingContext->inputImageMatrix = inputImageMatrix;
    processingContext->outputImageMatrix = outputImageMatrix;
    processingContext->kernelFFT = kernelFFT;
    processingContext->width = width;
    processingContext->height = height;
    processingContext->blur = blur;

    if (blur->mode == PREVIEW_GRAY) {
        doDeconvolutionForChannel(processingContext, GRAY);
    } else {
        QString progressText= "High-Quality";
        if (blur->mode == PREVIEW_COLOR) {
            progressText = "Color Preview";
        }
        setProgressInterval(10,40, progressText);
        doDeconvolutionForChannel(processingContext, RED);
        setProgressInterval(40,70, progressText);
        doDeconvolutionForChannel(processingContext, GREEN);
        setProgressInterval(70,100, progressText);
        doDeconvolutionForChannel(processingContext, BLUE);

    }

    delete(processingContext);
    return !isProcessingCancelled;
}

예제 #4

0

파일 보기

파일: OpenCL.cpp 프로젝트: Goursat/rodinia

void OpenCL::init(int isGPU)
{
	if (isGPU)
		getDevices(CL_DEVICE_TYPE_GPU);
	else
		getDevices(CL_DEVICE_TYPE_CPU);

	buildKernel();
}

예제 #5

0

파일 보기

파일: PtexTriangleFilter.cpp 프로젝트: DINKIN/ptex

PTEX_NAMESPACE_BEGIN

void PtexTriangleFilter::eval(float* result, int firstChan, int nChannels,
                              int faceid, float u, float v,
                              float uw1, float vw1, float uw2, float vw2,
                              float width, float blur)
{
    // init
    if (!_tx || nChannels <= 0) return;
    if (faceid < 0 || faceid >= _tx->numFaces()) return;
    _ntxchan = _tx->numChannels();
    _dt = _tx->dataType();
    _firstChanOffset = firstChan*DataSize(_dt);
    _nchan = PtexUtils::min(nChannels, _ntxchan-firstChan);

    // get face info
    const FaceInfo& f = _tx->getFaceInfo(faceid);

    // if neighborhood is constant, just return constant value of face
    if (f.isNeighborhoodConstant()) {
        PtexPtr<PtexFaceData> data ( _tx->getData(faceid, 0) );
        if (data) {
            char* d = (char*) data->getData() + _firstChanOffset;
            Ptex::ConvertToFloat(result, d, _dt, _nchan);
        }
        return;
    }

    // clamp u and v
    u = PtexUtils::clamp(u, 0.0f, 1.0f);
    v = PtexUtils::clamp(v, 0.0f, 1.0f);

    // build kernel
    PtexTriangleKernel k;
    buildKernel(k, u, v, uw1, vw1, uw2, vw2, width, blur, f.res);

    // accumulate the weight as we apply
    _weight = 0;

    // allocate temporary result
    _result = (float*) alloca(sizeof(float)*_nchan);
    memset(_result, 0, sizeof(float)*_nchan);

    // apply to faces
    splitAndApply(k, faceid, f);

    // normalize (both for data type and cumulative kernel weight applied)
    // and output result
    float scale = 1.0f / (_weight * OneValue(_dt));
    for (int i = 0; i < _nchan; i++) result[i] = float(_result[i] * scale);

    // clear temp result
    _result = 0;
}

예제 #6

0

파일 보기

파일: device.cpp 프로젝트: pdhahn/occa

  void device::loadKernels(const std::string &library) {
    // TODO 1.1: Load kernels
#if 0
    std::string devHash = hash().toFullString();
    strVector dirs = io::directories("occa://" + library);
    const int dirCount = (int) dirs.size();
    int kernelsLoaded = 0;

    for (int d = 0; d < dirCount; ++d) {
      const std::string buildFile = dirs[d] + kc::buildFile;

      if (!sys::fileExists(buildFile)) {
        continue;
      }

      json info = json::read(buildFile)["info"];
      if ((std::string) info["device/hash"] != devHash) {
        continue;
      }
      ++kernelsLoaded;

      const std::string sourceFilename = dirs[d] + kc::parsedSourceFile;

      json &kInfo = info["kernel"];
      hash_t hash = hash_t::fromString(kInfo["hash"]);
      jsonArray metadataArray = kInfo["metadata"].array();
      occa::properties kernelProps = kInfo["props"];

      // Ignore how the kernel was setup, turn off verbose
      kernelProps["verbose"] = false;

      const int kernels = metadataArray.size();
      for (int k = 0; k < kernels; ++k) {
        buildKernel(sourceFilename,
                    hash,
                    kernelProps,
                    lang::kernelMetadata::fromJson(metadataArray[k]));
      }
    }

    // Print loaded info
    if (properties().get("verbose", false) && kernelsLoaded) {
      std::cout << "Loaded " << kernelsLoaded;
      if (library.size()) {
        std::cout << " ["<< library << "]";
      } else {
        std::cout << " cached";
      }
      std::cout << ((kernelsLoaded == 1)
                    ? " kernel\n"
                    : " kernels\n");
    }
#endif
  }

예제 #7

0

파일 보기

파일: engine.cpp 프로젝트: piesbert/nameless

bool Engine::start() {
    bool retval {false};

    m_media = std::make_unique<Media>();

    if (m_media->init()) {
        buildKernel();
        if (buildModules()) {
            m_kernel->start();
            retval = true;
        }
    }

    return retval;
}

예제 #8

0

파일 보기

파일: clDispatcher.cpp 프로젝트: BenjaminCoquelle/OpenSubdiv

bool
OsdClKernelDispatcher::ClKernel::Compile(cl_context clContext, int numVertexElements, int numVaryingElements) {

    cl_int ciErrNum;

    _numVertexElements = numVertexElements;
    _numVaryingElements = numVaryingElements;

    char constantDefine[256];
    snprintf(constantDefine, 256, "#define NUM_VERTEX_ELEMENTS %d\n"
             "#define NUM_VARYING_ELEMENTS %d\n", numVertexElements, numVaryingElements);

    const char *sources[] = { constantDefine, clSource };

    _clProgram = clCreateProgramWithSource(clContext, 2, sources, 0, &ciErrNum);
    CL_CHECK_ERROR(ciErrNum, "clCreateProgramWithSource\n");

    ciErrNum = clBuildProgram(_clProgram, 0, NULL, NULL, NULL, NULL);
    if (ciErrNum != CL_SUCCESS) {
        OSD_ERROR("ERROR in clBuildProgram %d\n", ciErrNum);
        char cBuildLog[10240];
        clGetProgramBuildInfo(_clProgram, _clDevice, CL_PROGRAM_BUILD_LOG,
                              sizeof(cBuildLog), cBuildLog, NULL);
        OSD_ERROR(cBuildLog);
        return false;
    }

    // -------
    _clBilinearEdge   = buildKernel(_clProgram, "computeBilinearEdge");
    _clBilinearVertex = buildKernel(_clProgram, "computeBilinearVertex");
    _clCatmarkFace    = buildKernel(_clProgram, "computeFace");
    _clCatmarkEdge    = buildKernel(_clProgram, "computeEdge");
    _clCatmarkVertexA = buildKernel(_clProgram, "computeVertexA");
    _clCatmarkVertexB = buildKernel(_clProgram, "computeVertexB");
    _clLoopEdge       = buildKernel(_clProgram, "computeEdge");
    _clLoopVertexA    = buildKernel(_clProgram, "computeVertexA");
    _clLoopVertexB    = buildKernel(_clProgram, "computeLoopVertexB");

    return true;
}

예제 #9

0

파일 보기

파일: volumemaxclprocessor.cpp 프로젝트: david12345678901/inviwo

VolumeMaxCLProcessor::VolumeMaxCLProcessor()
    : Processor()
    , ProcessorKernelOwner(this)
    , inport_("inVolume")
    , outport_("outVolume")
    , volumeRegionSize_("region", "Region size", 8, 1, 100)
    , workGroupSize_("wgsize", "Work group size", ivec3(8), ivec3(0), ivec3(256))
    , useGLSharing_("glsharing", "Use OpenGL sharing", true)
    , supportsVolumeWrite_(false)
    , tmpVolume_(nullptr)
    , kernel_(nullptr) {
    addPort(inport_);
    addPort(outport_);

    addProperty(volumeRegionSize_);
    addProperty(workGroupSize_);
    addProperty(useGLSharing_);

    buildKernel();
}

예제 #10

0

파일 보기

파일: FBlur.cpp 프로젝트: zhexuany/FlashPhoto

///apply filter effect to the PixelBuffer* buffer passed into this function
void FBlur::applyFilter(PixelBuffer* imageBuffer){
  // if kernel is already initialized, do not need initialize it again.
  kernel = buildKernel(std::round(getFloatParameter()));

  // printKernel();

  if(getName() == "FEdgeDetection"){
    imageBuffer -> convertToLuminance();
  }

  int width = imageBuffer -> getWidth();
  int height = imageBuffer -> getHeight();
  //create a new pixel buffer for storing the convolution result.
  PixelBuffer* newImageBuffer
    = new PixelBuffer(width, height, imageBuffer -> getBackgroundColor());
  for(int i = 0; i < width; i++){
    for(int j = 0; j < height; j++){
      float newRed = 0;
      float newGreen = 0;
      float newBlue = 0;
      for(size_t filterI = 0; filterI < kernel.size(); filterI++){
        for(size_t filterJ = 0; filterJ < kernel[filterI].size(); filterJ++){
          //The location imageI and imageJ is calculated so that
          //for the center element of the filter it'll be i, j
          int imageI = (i - kernel.size()/2 + filterI + width) % width;
          int imageJ = (j - kernel[filterI].size()/2 + filterJ + height) % height;
          ColorData currPixel = imageBuffer -> getPixel(imageI, imageJ);
          newRed += currPixel.getRed() * kernel[filterI][filterJ];
          newGreen += currPixel.getGreen()*kernel[filterI][filterJ];
          newBlue += currPixel.getBlue()*kernel[filterI][filterJ];
        }
      }
      ColorData newPixel = ColorData(newRed, newGreen, newBlue);
      newPixel = newPixel.clampedColor();
      newImageBuffer -> setPixel(i, j, newPixel);
    }
  }
  newImageBuffer -> copyPixelBuffer(newImageBuffer, imageBuffer);
  delete newImageBuffer;
}

예제 #11

0

파일 보기

파일: kernel-dens.c 프로젝트: miguel-porto/ecoSpace

JNIEXPORT jint JNICALL Java_pt_floraon_ecospace_nativeFunctions_computeKernelDensities(JNIEnv *env, jclass obj, jstring filename, jstring outfilename, jintArray variables,jint freqthresh,jfloat sigmapercent,jboolean downweight) {
// NOTE: "variables" must be in increasing order! 0 is latitude, 1 is longitude, 2... are the other climatic variables
	const char *pfilename=(*env)->GetStringUTFChars(env, filename , NULL );
	const char *poutfilename=(*env)->GetStringUTFChars(env, outfilename , NULL );
	
	FILE *varsfile,*densfile,*freqfile;
	int nvars,i,j,k,*freqs,ntaxafiltered=0,*mapIDs,*outIDs,d1,d2,d1from,d1to,d2from,d2to,d3from,d3to,d1kern,d2kern,sidesq;
	int lastID=-1,d2p,kernelhalfside,kernelside,kernelsidesq;
	register int d3p,d3;
	register float *d3kern;
	jsize nrecs;
	jint ntaxa,*pIDs;
	VARIABLE *vararray;
	VARIABLEHEADER *varheader;
	int nvarstouse=(int)(*env)->GetArrayLength(env,variables);		// how many vars will be used for kernel density
	jint *pvariables=(*env)->GetIntArrayElements(env, variables, 0);
	float sigma;
	float *kernel,*tmpdens;
	unsigned long *weight;
	DENSITY *densities;
	bool anythingtosave=false,skiprec;
	size_t dummy;
	
	varsfile=fopen(STANDARDVARIABLEFILE(pfilename),"r");
	dummy=fread(&nrecs,sizeof(jsize),1,varsfile);
	dummy=fread(&ntaxa,sizeof(jint),1,varsfile);
	dummy=fread(&nvars,sizeof(int),1,varsfile);
//	fseek(varsfile,nvars*sizeof(tmp.filename)+sizeof(long)*ntaxa,SEEK_CUR);
	fseek(varsfile,sizeof(long)*ntaxa,SEEK_CUR);	// skip index

	{	// redirect stdout to file
		int fd;
		fpos_t pos;
		fflush(stdout);
		fgetpos(stdout, &pos);
		fd = dup(fileno(stdout));
		FILE *dummy=freopen("logfile.txt", "a", stdout);
	}


	vararray=malloc(nvarstouse*nrecs*sizeof(VARIABLE));
	varheader=malloc(nvarstouse*sizeof(VARIABLEHEADER));
	pIDs=malloc(nrecs*sizeof(jint));
	dummy=fread(pIDs,sizeof(jint),nrecs,varsfile);
	fseek(varsfile,2*sizeof(jfloat)*nrecs,SEEK_CUR);	// skip original coordinates
	weight=malloc(sizeof(long)*nrecs);
	dummy=fread(weight,sizeof(long),nrecs,varsfile);

	for(i=0;i<nvarstouse;i++) {
		fseek(varsfile,(sizeof(VARIABLEHEADER) + sizeof(VARIABLE)*nrecs)*(pvariables[i]-(i==0 ? 0 : (pvariables[i-1]+1))),SEEK_CUR);
		dummy=fread(&varheader[i],sizeof(VARIABLEHEADER),1,varsfile);
		printf("Variable %d: min %f max %f\n",pvariables[i],varheader[i].min,varheader[i].max);
		dummy=fread(&vararray[i*nrecs],sizeof(VARIABLE),nrecs,varsfile);
	}
	
// count the # of records of each taxon
	freqs=calloc(ntaxa,sizeof(int));
	for(i=0;i<nrecs;i++) freqs[pIDs[i]]++;
// write out frequencies in a text file (for java)
	freqfile=fopen(FREQANALYSISFILE(pfilename),"w");
	for(i=0;i<ntaxa;i++) fprintf(freqfile,"%d\n",freqs[i]);
	fclose(freqfile);
// count the # of taxa after filtering out rarest	
	for(i=0;i<ntaxa;i++) if(freqs[i] >= freqthresh) ntaxafiltered++;
// create a mapping of IDs: because some IDs were removed, make them sequential without holes (remember that memory is the limiting factor here!)
	mapIDs=malloc(ntaxa*sizeof(int));
	for(i=0;i<ntaxa;i++) mapIDs[i]=-1;
	for(i=0,j=0;i<nrecs;i++) {
		if(freqs[pIDs[i]] >= freqthresh) {
			if(mapIDs[pIDs[i]]==-1) {
				mapIDs[pIDs[i]]=j;
				j++;
			}
		}
	}
//for(i=0;i<ntaxa;i++) printf("%d ",mapIDs[i]);
	
// compute the resolution of the multidimensional space so that not too much memory is occupied
	side=(int)pow((float)(MAXMEMORYPERBATCH)/(float)ntaxafiltered,(float)1/nvarstouse);
	if(side>MAXSIDE) side=MAXSIDE;
//side=40;
	sidesq=side*side;
	sigma=side*sigmapercent;
	arraysize=(int)pow(side,nvarstouse);

	printf("Using a grid with a side of %d cells, in %d variables (dimensions).\n",side,nvarstouse);
	printf("Reading %d variables of %d records of %d taxa (after filtering out those with less than %d occurrences)...\n",nvars,nrecs,ntaxafiltered,freqthresh);
// build the kernel for this case
	kernel=buildKernel(side,sigma,nvarstouse,&kernelhalfside,&kernelside,&kernelsidesq);

// compute densities
// allocate N multidimensional arrays (multidimensional grids to compute kernel densities in each cell)
	densities=malloc(ntaxafiltered*sizeof(DENSITY));
	outIDs=calloc(ntaxafiltered,sizeof(int));

	for(i=0;i<ntaxafiltered;i++) {
		densities[i].density=malloc(arraysize);
		memset(densities[i].density,0,arraysize);
	}

	printf("Computing kernel densities");fflush(stdout);

// scale the variables to the size of the grid
	for(i=0;i<nrecs;i++) {	
		for(k=0;k<nvarstouse;k++) {
			if(vararray[i+nrecs*k]!=RASTERNODATA) vararray[i+nrecs*k]=(vararray[i+nrecs*k]*side)/10000;
		}
	}
	
	#pragma omp parallel private(i,k,skiprec,tmpdens,anythingtosave,d1from,d1to,d1,d1kern,d2,d2from,d2to,d2p,d2kern,d3,d3from,d3to,d3p,d3kern)
	{
		tmpdens=malloc(arraysize*sizeof(float));
		memset(tmpdens,0,arraysize*sizeof(float));

		#pragma omp for
		for(j=0;j<ntaxa;j++) {		// NOTE: this loop doesn't need the records to be sorted, that's why it takes much longer
// TODO: we might have an index here, i.e. for each taxon, a list of tthe respective records, but it's so fast that maybe it's not a big issue, we're talking about a few thousands of taxa only.
			if(freqs[j]<freqthresh) continue;
			anythingtosave=false;
			for(i=0;i<nrecs;i++) {	// iterate through all records in search for this taxon ACK!! give me an index
				if(pIDs[i]!=j) continue;
				for(k=0,skiprec=false;k<nvarstouse;k++) {	// check if any one of the variables is NA. if it is, skip this record
					if(vararray[i+nrecs*k] == RASTERNODATA) {
						skiprec=true;
						break;
					}
				}
				if(skiprec) continue;		// skip NAs
	// now yeah, create density surface by summing the kernels record by record			
	// since it is not computationally feasible more than 3 dimensions, just make the optimized code for each case...
				switch(nvarstouse) {
					case 1:
						d1from=(vararray[i]-kernelhalfside)<0 ? 0 : (vararray[i]-kernelhalfside);
						d1to=(vararray[i]+kernelhalfside+1>side ? side : vararray[i]+kernelhalfside+1);
					
						for(d1=d1from,d1kern=vararray[i]-kernelhalfside<0 ? kernelhalfside-vararray[i] : 0;d1<d1to;d1++,d1kern++) {
							tmpdens[d1]+=kernel[d1kern] * (downweight ? ((float)weight[i]/MULTIPLIER) : 1);
						}
						anythingtosave=true;
					break;
				
					case 2:
						d1from=(vararray[i]-kernelhalfside)<0 ? 0 : (vararray[i]-kernelhalfside);
						d1to=(vararray[i]+kernelhalfside+1>side ? side : vararray[i]+kernelhalfside+1);
						d2from=(vararray[i+nrecs]-kernelhalfside)<0 ? 0 : (vararray[i+nrecs]-kernelhalfside);
						d2to=(vararray[i+nrecs]+kernelhalfside+1>side ? side : vararray[i+nrecs]+kernelhalfside+1);
					
						for(d1=d1from,d1kern=vararray[i]-kernelhalfside<0 ? kernelhalfside-vararray[i] : 0;d1<d1to;d1++,d1kern++) {
							for(d2=d2from,d2p=d1+d2from*side,d2kern=d1kern+((vararray[i+nrecs]-kernelhalfside)<0 ? (kernelhalfside-vararray[i+nrecs])*kernelside : 0);d2<d2to;d2++,d2p+=side,d2kern+=kernelside) {
								tmpdens[d2p]+=kernel[d2kern] * (downweight ? ((float)weight[i]/MULTIPLIER) : 1);
							}
						}
						anythingtosave=true;
					break;
				
					case 3:
						d1from=(vararray[i]-kernelhalfside)<0 ? 0 : (vararray[i]-kernelhalfside);
						d1to=(vararray[i]+kernelhalfside+1>side ? side : vararray[i]+kernelhalfside+1);
						d2from=(vararray[i+nrecs]-kernelhalfside)<0 ? 0 : (vararray[i+nrecs]-kernelhalfside);
						d2to=(vararray[i+nrecs]+kernelhalfside+1>side ? side : vararray[i+nrecs]+kernelhalfside+1);
						d3from=(vararray[i+nrecs*2]-kernelhalfside)<0 ? 0 : (vararray[i+nrecs*2]-kernelhalfside);
						d3to=(vararray[i+nrecs*2]+kernelhalfside+1>side ? side : vararray[i+nrecs*2]+kernelhalfside+1);
						for(d1=d1from,d1kern=vararray[i]-kernelhalfside<0 ? kernelhalfside-vararray[i] : 0;
							d1<d1to;
							d1++,d1kern++) {
							for(d2=d2from,d2p=d1+d2from*side,d2kern=d1kern+((vararray[i+nrecs]-kernelhalfside)<0 ? (kernelhalfside-vararray[i+nrecs])*kernelside : 0)
								;d2<d2to
								;d2++,d2p+=side,d2kern+=kernelside) {
								for(d3=d3from,d3p=d2p+d3from*sidesq
										,d3kern=&kernel[d2kern+((vararray[i+nrecs*2]-kernelhalfside)<0 ? (kernelhalfside-vararray[i+nrecs*2])*kernelsidesq : 0)]
									;d3<d3to
									;d3++,d3p+=sidesq,d3kern+=kernelsidesq) {
									tmpdens[d3p]+=*d3kern * (downweight ? ((float)weight[i]/MULTIPLIER) : 1);
									//kernel[d3kern];
								}
							}
						}
						anythingtosave=true;
					break;
				}
			}	// end record loop
			if(anythingtosave) {
				saveKernelDensity(tmpdens,freqs[j],&densities[mapIDs[j]]);		// save kernel density of previous taxon
				outIDs[mapIDs[j]]=j;
			} else {
				saveKernelDensity(NULL, freqs[j], &densities[mapIDs[j]]);		// save kernel density of previous taxon
				outIDs[mapIDs[j]]=j;
//				outIDs[mapIDs[j]]=-1;
			}
			anythingtosave = false;
			memset(tmpdens,0,arraysize*sizeof(float));
			printf(".");
			fflush(stdout);
		}	// end taxon loop
	}
/*	THIS is the working code. Above still developing.
	for(i=0;i<nrecs;i++) {	// iterate through all records IMPORTANT: records must be sorted by taxon ID
		if(freqs[pIDs[i]]>=freqthresh) {
			if(pIDs[i]!=lastID) {	// this record is already a different species
				if(anythingtosave) {
					saveKernelDensity(tmpdens,freqs[lastID],&densities[mapIDs[lastID]]);		// save kernel density of previous taxon
					outIDs[mapIDs[lastID]]=lastID;
				} else outIDs[mapIDs[lastID]]=-1;
				anythingtosave=false;
				memset(tmpdens,0,arraysize*sizeof(float));
				lastID=pIDs[i];
				printf(".");
				fflush(stdout);
			}
// scale the variables to the size of the grid
			for(j=0,skiprec=false;j<nvarstouse;j++) {
				if(vararray[i+nrecs*j]==RASTERNODATA) {
					skiprec=true;
					continue;
				} else vararray[i+nrecs*j]=(vararray[i+nrecs*j]*side)/10000;
			}
			if(skiprec) continue;		// skip NAs
// now yeah, create density surface by summing the kernels record by record			
// since it is not computationally feasible more than 3 dimensions, just make the optimized code for each case...
			switch(nvarstouse) {
				case 1:
					d1from=(vararray[i]-kernelhalfside)<0 ? 0 : (vararray[i]-kernelhalfside);
					d1to=(vararray[i]+kernelhalfside+1>side ? side : vararray[i]+kernelhalfside+1);
					
					for(d1=d1from,d1kern=vararray[i]-kernelhalfside<0 ? kernelhalfside-vararray[i] : 0;d1<d1to;d1++,d1kern++) {
						tmpdens[d1]+=kernel[d1kern] * (downweight ? ((float)weight[i]/MULTIPLIER) : 1);
					}
					anythingtosave=true;
				break;
				
				case 2:
					d1from=(vararray[i]-kernelhalfside)<0 ? 0 : (vararray[i]-kernelhalfside);
					d1to=(vararray[i]+kernelhalfside+1>side ? side : vararray[i]+kernelhalfside+1);
					d2from=(vararray[i+nrecs]-kernelhalfside)<0 ? 0 : (vararray[i+nrecs]-kernelhalfside);
					d2to=(vararray[i+nrecs]+kernelhalfside+1>side ? side : vararray[i+nrecs]+kernelhalfside+1);
					
					for(d1=d1from,d1kern=vararray[i]-kernelhalfside<0 ? kernelhalfside-vararray[i] : 0;d1<d1to;d1++,d1kern++) {
						for(d2=d2from,d2p=d1+d2from*side,d2kern=d1kern+((vararray[i+nrecs]-kernelhalfside)<0 ? (kernelhalfside-vararray[i+nrecs])*kernelside : 0);d2<d2to;d2++,d2p+=side,d2kern+=kernelside) {
							tmpdens[d2p]+=kernel[d2kern] * (downweight ? ((float)weight[i]/MULTIPLIER) : 1);
						}
					}
					anythingtosave=true;
				break;
				
				case 3:
					d1from=(vararray[i]-kernelhalfside)<0 ? 0 : (vararray[i]-kernelhalfside);
					d1to=(vararray[i]+kernelhalfside+1>side ? side : vararray[i]+kernelhalfside+1);
					d2from=(vararray[i+nrecs]-kernelhalfside)<0 ? 0 : (vararray[i+nrecs]-kernelhalfside);
					d2to=(vararray[i+nrecs]+kernelhalfside+1>side ? side : vararray[i+nrecs]+kernelhalfside+1);
					d3from=(vararray[i+nrecs*2]-kernelhalfside)<0 ? 0 : (vararray[i+nrecs*2]-kernelhalfside);
					d3to=(vararray[i+nrecs*2]+kernelhalfside+1>side ? side : vararray[i+nrecs*2]+kernelhalfside+1);
					for(d1=d1from,d1kern=vararray[i]-kernelhalfside<0 ? kernelhalfside-vararray[i] : 0;
						d1<d1to;
						d1++,d1kern++) {
						for(d2=d2from,d2p=d1+d2from*side,d2kern=d1kern+((vararray[i+nrecs]-kernelhalfside)<0 ? (kernelhalfside-vararray[i+nrecs])*kernelside : 0)
							;d2<d2to
							;d2++,d2p+=side,d2kern+=kernelside) {
							for(d3=d3from,d3p=d2p+d3from*sidesq
									,d3kern=&kernel[d2kern+((vararray[i+nrecs*2]-kernelhalfside)<0 ? (kernelhalfside-vararray[i+nrecs*2])*kernelsidesq : 0)]
								;d3<d3to
								;d3++,d3p+=sidesq,d3kern+=kernelsidesq) {
								tmpdens[d3p]+=*d3kern * (downweight ? ((float)weight[i]/MULTIPLIER) : 1);
								//kernel[d3kern];
							}
						}
					}
					anythingtosave=true;
				break;
			}
		}
	}
	saveKernelDensity(tmpdens,freqs[lastID],&densities[mapIDs[lastID]]);		// save kernel density of the last taxon
	outIDs[mapIDs[lastID]]=lastID;
*/

// scale all densities to the absolute maximum
/*	unsigned long maxmax=0;
	float factor;
	for(i=0;i<ntaxafiltered;i++) if(maxmax<densities[i].max) maxmax=densities[i].max;
	for(i=0;i<ntaxafiltered;i++) {
		factor=(float)densities[i].max/maxmax;
		for(j=0;j<arraysize;j++) densities[i].density[j]=(unsigned char)((float)densities[i].density[j]*factor);
	}
	*/
// now write to output file
	printf("\nWriting file...\n");
	densfile=fopen(DENSITYFILE(pfilename,poutfilename),"w");
	fwrite(&ntaxafiltered,sizeof(int),1,densfile);		// how many densities in file
	fwrite(outIDs,sizeof(int),ntaxafiltered,densfile);	// the real taxon IDs of each density
	fwrite(&side,sizeof(int),1,densfile);				// the size of the grid
	fwrite(&nvarstouse,sizeof(int),1,densfile);			// the number of variables
	for(k=0;k<ntaxafiltered;k++) {						// now the densities!
		fwrite(&densities[k],sizeof(DENSITY),1,densfile);	// of course, the pointer will be meaningless
		fwrite(densities[k].density,arraysize,1,densfile);
	}
	fclose(densfile);
#ifdef VERBOSE	
	for(k=3;k<4;k++) {	//ntaxafiltered
		printf("************* Taxon Nrecs: %d ************\n",densities[k].nrecords);
		switch(nvarstouse) {
			case 2:
				for(d1=0;d1<side;d1++) {
					for(d2=0;d2<side;d2++) {
							printf("%3d",densities[k].density[d1+d2*side]);
					}
					printf("\n");
				}
			break;
			
			case 3:
				for(d1=0;d1<5;d1++) {
					for(d2=0;d2<side;d2++) {
						for(d3=0;d3<side;d3++) {
							printf("%3d",densities[k].density[d1+d2*side+d3*sidesq]);
						}
						printf("\n");
					}
					printf("*****************\n");
				}
			break;
		}
		printf("********SUM: %f NRECS: %d*********\n",sum,densities[k].nrecords);
	}
	for(k=0;k<ntaxafiltered;k++) {
		for(d1=0,sum=0;d1<arraysize;d1++) sum+=(float)densities[k].density[d1]*densities[k].max/255.0;
		printf("SUM: %f NRECS: %d\n",sum,densities[k].nrecords);
	}
#endif
	
	
	fclose(varsfile);
	free(pIDs);
	free(freqs);
	free(vararray);
	free(varheader);
	free(kernel);
	free(mapIDs);
	free(tmpdens);
	for(i=0;i<ntaxafiltered;i++) free(densities[i].density);
	free(densities);
	free(outIDs);
	(*env)->ReleaseStringUTFChars(env,filename,pfilename);
	(*env)->ReleaseStringUTFChars(env,outfilename,poutfilename);
	return 1;
}

예제 #12

0

파일 보기

파일: PtexSeparableFilter.cpp 프로젝트: Alexander-Murashko/oiio

void PtexSeparableFilter::eval(float* result, int firstChan, int nChannels,
			       int faceid, float u, float v, 
			       float uw1, float vw1, float uw2, float vw2,
			       float width, float blur)
{
    // init
    if (!_tx || nChannels <= 0) return;
    if (faceid < 0 || faceid >= _tx->numFaces()) return;
    _ntxchan = _tx->numChannels();
    _dt = _tx->dataType();
    _firstChanOffset = firstChan*DataSize(_dt);
    _nchan = PtexUtils::min(nChannels, _ntxchan-firstChan);

    // get face info
    const FaceInfo& f = _tx->getFaceInfo(faceid);

    // if neighborhood is constant, just return constant value of face
    if (f.isNeighborhoodConstant()) {
	PtexPtr<PtexFaceData> data ( _tx->getData(faceid, 0) );
	if (data) {
	    char* d = (char*) data->getData() + _firstChanOffset;
	    Ptex::ConvertToFloat(result, d, _dt, _nchan);
	}
	return;
    }

    // find filter width as bounding box of vectors w1 and w2
    float uw = fabs(uw1) + fabs(uw2), vw = fabs(vw1) + fabs(vw2);

    // handle border modes
    switch (_uMode) {
    case m_clamp: u = PtexUtils::clamp(u, 0.0f, 1.0f); break;
    case m_periodic: u = u-floor(u); break;
    case m_black: break; // do nothing
    }

    switch (_vMode) {
    case m_clamp: v = PtexUtils::clamp(v, 0.0f, 1.0f); break;
    case m_periodic: v = v-floor(v);
    case m_black: break; // do nothing
    }

    // build kernel
    PtexSeparableKernel k;
    if (f.isSubface()) {
	// for a subface, build the kernel as if it were on a main face and then downres
	uw = uw * width + blur * 2;
	vw = vw * width + blur * 2;
	buildKernel(k, u*.5, v*.5, uw*.5, vw*.5, f.res);
	if (k.res.ulog2 == 0) k.upresU();
	if (k.res.vlog2 == 0) k.upresV();
	k.res.ulog2--; k.res.vlog2--;
    }
    else {
	uw = uw * width + blur;
	vw = vw * width + blur;
	buildKernel(k, u, v, uw, vw, f.res);
    }
    k.stripZeros();

    // check kernel (debug only)
    assert(k.uw > 0 && k.vw > 0);
    assert(k.uw <= PtexSeparableKernel::kmax && k.vw <= PtexSeparableKernel::kmax);
    _weight = k.weight();

    // allocate temporary double-precision result
    _result = (double*) alloca(sizeof(double)*_nchan);
    memset(_result, 0, sizeof(double)*_nchan);

    // apply to faces
    splitAndApply(k, faceid, f);

    // normalize (both for data type and cumulative kernel weight applied)
    // and output result
    double scale = 1.0 / (_weight * OneValue(_dt));
    for (int i = 0; i < _nchan; i++) result[i] = float(_result[i] * scale);

    // clear temp result
    _result = 0;
}

예제 #13

0

파일 보기

CLKernel *EasyCL::buildKernel(string kernelfilepath, string kernelname) {
    return buildKernel(kernelfilepath, kernelname, "");
}

예제 #14

0

파일 보기

파일: volumemaxclprocessor.cpp 프로젝트: sunwj/inviwo

void VolumeMaxCLProcessor::initialize() {
    Processor::initialize();
    buildKernel();
}

예제 #15

0

파일 보기

파일: raycast.c 프로젝트: jdsteve2/parallel-c-programs

// Serial ray casting
unsigned char* raycast_serial(unsigned char* data, unsigned char* region){
    unsigned char* image = (unsigned char*)malloc(sizeof(unsigned char)*IMAGE_DIM*IMAGE_DIM);
    
    // Camera/eye position, and direction of viewing. These can be changed to look
    // at the volume from different angles.
    float3 camera = {.x=1000,.y=1000,.z=1000};
    float3 forward = {.x=-1, .y=-1, .z=-1};
    float3 z_axis = {.x=0, .y=0, .z = 1};
    
    // Finding vectors aligned with the axis of the image
    float3 right = cross(forward, z_axis);
    float3 up = cross(right, forward);
    
    // Creating unity lenght vectors
    forward = normalize(forward);
    right = normalize(right);
    up = normalize(up);
    
    float fov = 3.14/4;
    float pixel_width = tan(fov/2.0)/(IMAGE_DIM/2);
    float step_size = 0.5;
    
    // For each pixel
    for(int y = -(IMAGE_DIM/2); y < (IMAGE_DIM/2); y++){
        for(int x = -(IMAGE_DIM/2); x < (IMAGE_DIM/2); x++){
            
            // Find the ray for this pixel
            float3 screen_center = add(camera, forward);
            float3 ray = add(add(screen_center, scale(right, x*pixel_width)), scale(up, y*pixel_width));
            ray = add(ray, scale(camera, -1));
            ray = normalize(ray);
            float3 pos = camera;
            
            // Move along the ray, we stop if the color becomes completely white,
            // or we've done 5000 iterations (5000 is a bit arbitrary, it needs 
            // to be big enough to let rays pass through the entire volume)
            int i = 0;
            float color = 0;
            while(color < 255 && i < 5000){
                i++;
                pos = add(pos, scale(ray, step_size));          // Update position
                int r = value_at(pos, region);                  // Check if we're in the region
                color += value_at(pos, data)*(0.01 + r) ;       // Update the color based on data value, and if we're in the region
            }
            
            // Write final color to image
            image[(y+(IMAGE_DIM/2)) * IMAGE_DIM + (x+(IMAGE_DIM/2))] = color > 255 ? 255 : color;
        }
    }
    
    return image;
}


// Check if two values are similar, threshold can be changed.
int similar(unsigned char* data, int3 a, int3 b){
    unsigned char va = data[a.z * DATA_DIM*DATA_DIM + a.y*DATA_DIM + a.x];
    unsigned char vb = data[b.z * DATA_DIM*DATA_DIM + b.y*DATA_DIM + b.x];
    
    int i = abs(va-vb) < 1;
    return i;
}


// Serial region growing, same algorithm as in assignment 2
unsigned char* grow_region_serial(unsigned char* data){
    unsigned char* region = (unsigned char*)calloc(sizeof(unsigned char), DATA_DIM*DATA_DIM*DATA_DIM);
    
    stack_t* stack = new_stack();
    
    int3 seed = {.x=50, .y=300, .z=300};
    push(stack, seed);
    region[seed.z *DATA_DIM*DATA_DIM + seed.y*DATA_DIM + seed.x] = 1;
    
    int dx[6] = {-1,1,0,0,0,0};
    int dy[6] = {0,0,-1,1,0,0};
    int dz[6] = {0,0,0,0,-1,1};
    
    while(stack->size > 0){
        int3 pixel = pop(stack);
        for(int n = 0; n < 6; n++){
            int3 candidate = pixel;
            candidate.x += dx[n];
            candidate.y += dy[n];
            candidate.z += dz[n];
            
            if(!inside_int(candidate)){
                continue;
            }
            
            if(region[candidate.z * DATA_DIM*DATA_DIM + candidate.y*DATA_DIM + candidate.x]){
                continue;
            }
            
            if(similar(data, pixel, candidate)){
                push(stack, candidate);
                region[candidate.z * DATA_DIM*DATA_DIM + candidate.y*DATA_DIM + candidate.x] = 1;
            }
        }
    }
    
    return region;
}

unsigned char* grow_region_gpu(unsigned char* data){
    cl_platform_id platform;
    cl_device_id device;
    cl_context context;
    cl_command_queue queue;
    cl_kernel kernel;
    cl_int err;
    char *source;
    int i;
    
    clGetPlatformIDs(1, &platform, NULL);
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    
    printPlatformInfo(platform);

    queue = clCreateCommandQueue(context, device, 0, &err);
    kernel = buildKernel("region.cl", "region", NULL, context, device);

    //Host variables
    unsigned char* host_region = (unsigned char*)calloc(sizeof(unsigned char), DATA_SIZE);
    int            host_unfinished;

    cl_mem device_region     = clCreateBuffer(context, CL_MEM_READ_WRITE, DATA_SIZE * sizeof(cl_uchar) ,NULL,&err);
    cl_mem device_data       = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(cl_uchar), NULL,&err);
    cl_mem device_unfinished = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL,&err);
    clError("Error allocating memory", err);

    //plant seed
    int3 seed = {.x=50, .y=300, .z=300};
    host_region[index(seed.z, seed.y, seed.x)] = 2;

    //Copy data to the device
    clEnqueueWriteBuffer(queue, device_data  , CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), data       , 0, NULL, NULL);
    clEnqueueWriteBuffer(queue, device_region, CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), host_region, 0, NULL, NULL);

    //Calculate block and grid sizes
    size_t global[] = { 512, 512, 512 }; 
    size_t local[] = { 8, 8, 8 };


    //Run kernel untill completion
    do{
        host_unfinished = 0;
        clEnqueueWriteBuffer(queue, device_unfinished, CL_FALSE, 0, sizeof(cl_int), &host_unfinished  , 0, NULL, NULL);
        clFinish(queue);

        err = clSetKernelArg(kernel, 0, sizeof(device_data), (void*)&device_data);
        err = clSetKernelArg(kernel, 1, sizeof(device_region), (void*)&device_region);
        err = clSetKernelArg(kernel, 2, sizeof(device_unfinished), (void*)&device_unfinished);
        clError("Error setting arguments", err);

        //Run the kernel
        clEnqueueNDRangeKernel(queue, kernel, 3, NULL, &global, &local, 0, NULL, NULL);
        clFinish(queue);
        clError("Error running kernel", err);

        err = clEnqueueReadBuffer(queue, device_unfinished, CL_TRUE, 0, sizeof(cl_int), &host_unfinished, 0, NULL, NULL);
        clFinish(queue);
        clError("Error reading buffer 1", err);

    }while(host_unfinished);

    //Copy result to host
    err = clEnqueueReadBuffer(queue, device_region, CL_TRUE, 0, DATA_SIZE * sizeof(cl_uchar), host_region, 0, NULL, NULL);
    clFinish(queue);
    clError("Error reading buffer 2", err);

    return host_region;
}

unsigned char* raycast_gpu(unsigned char* data, unsigned char* region){
    cl_platform_id platform;
    cl_device_id device;
    cl_context context;
    cl_command_queue queue;
    cl_kernel kernel;
    cl_int err;
    char *source;
    int i;
    
    clGetPlatformIDs(1, &platform, NULL);
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);

    printPlatformInfo(platform);
    printDeviceInfo(device);

    queue = clCreateCommandQueue(context, device, 0, &err);
    kernel = buildKernel("raycast.cl", "raycast", NULL, context, device);


    cl_mem device_region = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(cl_uchar) ,NULL,&err);
    cl_mem device_data   = clCreateBuffer(context, CL_MEM_READ_ONLY, DATA_SIZE * sizeof(cl_uchar), NULL,&err);
    cl_mem device_image  = clCreateBuffer(context, CL_MEM_READ_WRITE, IMAGE_SIZE * sizeof(cl_uchar),NULL,&err);
    clError("Error allocating memory", err);
    //Copy data to the device
    clEnqueueWriteBuffer(queue, device_data  , CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), data  , 0, NULL, NULL);
    clEnqueueWriteBuffer(queue, device_region, CL_FALSE, 0, DATA_SIZE * sizeof(cl_uchar), region, 0, NULL, NULL);

    int grid_size = IMAGE_DIM;
    int block_size = IMAGE_DIM;


    //Set up kernel arguments
    err = clSetKernelArg(kernel, 0, sizeof(device_data), (void*)&device_data);
    err = clSetKernelArg(kernel, 1, sizeof(device_region), (void*)&device_region);
    err = clSetKernelArg(kernel, 2, sizeof(device_image), (void*)&device_image);
    clError("Error setting arguments", err);

    //Run the kernel
    const size_t globalws[2] = {IMAGE_DIM, IMAGE_DIM};
    const size_t localws[2] = {8, 8};
    clEnqueueNDRangeKernel(queue, kernel, 2, NULL, &globalws, &localws, 0, NULL, NULL);
    
    clFinish(queue);


    //Allocate memory for the result
    unsigned char* host_image = (unsigned char*)malloc(IMAGE_SIZE_BYTES);

    //Copy result from device
    err = clEnqueueReadBuffer(queue, device_image, CL_TRUE, 0, IMAGE_SIZE * sizeof(cl_uchar), host_image, 0, NULL, NULL);
    clFinish(queue);


    //Free device memory
    return host_image;
}


int main(int argc, char** argv){
    
    unsigned char* data = create_data();
    
    unsigned char* region = grow_region_gpu(data);
    
    unsigned char* image = raycast_gpu(data, region);
    
    write_bmp(image, IMAGE_DIM, IMAGE_DIM);
}