Ejemplo n.º 1
0
void VHParticlesRender::draw(){

	if (pSys->nParts == -1)
		return;

	if(displayMode == SHADOWED_SPRITES) {

			calcVectors();
			cu::float3 halfVec = cu::make_float3(halfVector.x,halfVector.y,halfVector.z); 

			calcDepthCu(pSys->dev_pos, pSys->dev_keys, pSys->dev_indices, halfVec, pSys->nParts);

			if (sortParts)
				cudppSort(m_sortHandle, pSys->dev_keys, pSys->dev_indices, 32, pSys->nParts);
	}

	if((displayMode == SPRITES || displayMode == POINTS) && sortParts) {

		glGetFloatv(GL_MODELVIEW_MATRIX, (float *) modelView.get_value());
		 viewVector = -vec3f(modelView.get_row(2));

		cu::float3 viewVec = cu::make_float3(viewVector.x, viewVector.y, viewVector.y);
		//printf("view vec : %f, %f, %f \n", viewVector.x, viewVector.y, viewVector.z);
		calcDepthCu(pSys->dev_pos, pSys->dev_keys, pSys->dev_indices, viewVec, pSys->nParts);

		cudppSort(m_sortHandle, pSys->dev_keys, pSys->dev_indices, 32, pSys->nParts);


	}

	pSys->posVbo->map();
	pSys->colourVbo->map();
	pSys->indexVbo->map();

	pSys->updateVBOs();

	pSys->posVbo->unmap();
	pSys->colourVbo->unmap();
	pSys->indexVbo->unmap();

	switch (displayMode) {

		case POINTS:

			glPointSize(pointSize);

			glDisable(GL_DEPTH_TEST);

			glEnable(GL_BLEND);

			pSys->posVbo->bind();
			glVertexPointer(3, GL_FLOAT, 0, 0);
			glEnableClientState(GL_VERTEX_ARRAY);

			pSys->colourVbo->bind();
			glColorPointer(4, GL_FLOAT, 0, 0);
			glEnableClientState(GL_COLOR_ARRAY);

			if (blendingMode == ADD) {
				glBlendFunc( GL_SRC_ALPHA, GL_ONE );
				glDrawArrays(GL_POINTS, 0, pSys->nParts);
			} else {
				glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
				if(sortParts){
					pSys->indexVbo->bind();
					glDrawElements(GL_POINTS, pSys->nParts, GL_UNSIGNED_INT, 0);
					pSys->indexVbo->unbind();
				} else {
					glDrawArrays(GL_POINTS, 0, pSys->nParts);
				}
			}


			pSys->posVbo->unbind();

			glDisableClientState(GL_VERTEX_ARRAY);
			glDisableClientState(GL_COLOR_ARRAY);

			glDisable(GL_BLEND);

			break;

		case LINES:

			glDisable(GL_DEPTH_TEST);

			glEnable(GL_BLEND);
			glBlendFunc( GL_SRC_ALPHA, GL_ONE );

			pSys->posVbo->bind();
			glVertexPointer(3, GL_FLOAT, 0, 0);
			glEnableClientState(GL_VERTEX_ARRAY);

			pSys->colourVbo->bind();
			glColorPointer(4, GL_FLOAT, 0, 0);
			glEnableClientState(GL_COLOR_ARRAY);

			glLineWidth(lineWidth);
			for (int i = 0; i<pSys->nLeadParts; i++) {
				glDrawArrays(GL_LINE_STRIP, i*pSys->trailLength, pSys->trailLength);
			}

			pSys->posVbo->unbind();

			glDisableClientState(GL_VERTEX_ARRAY);
			glDisableClientState(GL_COLOR_ARRAY);

			glDisable(GL_BLEND);

			break;

		case SPRITES:


			glDisable(GL_DEPTH_TEST);

			glEnable(GL_TEXTURE_2D);

			glEnable(GL_BLEND);
			glBlendFunc( GL_SRC_ALPHA, GL_ONE );

			pSys->posVbo->bind();
			glVertexPointer(3, GL_FLOAT, 0, 0);
			glEnableClientState(GL_VERTEX_ARRAY);

			pSys->colourVbo->bind();
			glColorPointer(4, GL_FLOAT, 0, 0);
			glEnableClientState(GL_COLOR_ARRAY);

			simpleSpriteProg->enable();
			simpleSpriteProg->setUniform1f("pointRadius",pointSize);
			simpleSpriteProg->bindTexture("sDiffuseMap",TextureManager::Inst()->m_texID[id1],GL_TEXTURE_2D,0);

			if (blendingMode == ADD) {
				glBlendFunc( GL_SRC_ALPHA, GL_ONE );
				glDrawArrays(GL_POINTS, 0, pSys->nParts);
			} else {
				glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

				if(sortParts){
					pSys->indexVbo->bind();
					glDrawElements(GL_POINTS, pSys->nParts, GL_UNSIGNED_INT, 0);
					pSys->indexVbo->unbind();
				} else {
					glDrawArrays(GL_POINTS, 0, pSys->nParts);
				}
			}

			simpleSpriteProg->disable();

			pSys->posVbo->unbind();

			glDisableClientState(GL_VERTEX_ARRAY);
			glDisableClientState(GL_COLOR_ARRAY);

			glDisable(GL_BLEND);

			glDisable(GL_TEXTURE_2D);

			break;

		case SHADOWED_SPRITES :

			GLfloat currentViewport[4];
			glGetFloatv(GL_VIEWPORT, currentViewport);

			if(width != currentViewport[2] || height != currentViewport[3])
				initFbos(currentViewport[2],currentViewport[3], false);

			drawSlices();

			//glutReportErrors();

			Fbo::unbind();


			glViewport(0, 0, width, height);
			glDisable(GL_DEPTH_TEST);
			glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
			glEnable(GL_BLEND);

			int mm;
			glGetIntegerv ( GL_MATRIX_MODE, &mm );

			displayTexProg->enable();
			displayTexProg->bindTexture("tex", imageTex, GL_TEXTURE_2D, 0);
		    
			drawQuad();

			displayTexProg->disable();

			if(displayLightBuffer) {

				displayTexProg->bindTexture("tex", lightTex[srcLightTexture], GL_TEXTURE_2D, 0);
				glViewport(0, 0, lightBufferSize, lightBufferSize);
				drawQuad();
				displayTexProg->disable();
			}

			//calcVectors();

			glViewport(0, 0, width, height);

 			if (displayVectors) {
				debugVectors();
			}

			glutReportErrors();

			break;

	}

}
Ejemplo n.º 2
0
  void IntIntSorter::executeOnGPUAsync(void * const keys, void * const vals, const int numKeys, int & numUniqueKeys, int ** keyOffsets, int ** valOffsets, int ** numVals)
  {
    if (numKeys == 0)
    {
      numUniqueKeys = 0;
      *keyOffsets = *valOffsets = NULL;
      *numVals = NULL;
      return;
    }
    if (numKeys > 32 * 1048576)
    {
      executeOnCPUAsync(keys, vals, numKeys, numUniqueKeys, keyOffsets, valOffsets, numVals);
      return;
    }
    int commRank;
    MPI_Comm_rank(MPI_COMM_WORLD, &commRank);
    CUDPPConfiguration cudppConfig;
    CUDPPHandle planHandle;

    void * gpuInputKeys   = cudacpp::Runtime::malloc(sizeof(int) * numKeys);
    void * gpuInputVals   = cudacpp::Runtime::malloc(sizeof(int) * numKeys);
    void * gpuUniqueFlags = cudacpp::Runtime::malloc(sizeof(int) * numKeys);
    void * gpuValOffsets  = cudacpp::Runtime::malloc(sizeof(int) * numKeys);

    cudacpp::Runtime::memcpyHtoD(gpuInputKeys, keys, sizeof(int) * numKeys);
    cudacpp::Runtime::memcpyHtoD(gpuInputVals, vals, sizeof(int) * numKeys);

    /*
      what we need to get out of here:

      1 - sorted keys and values
      2 - num unique keys
      3 - number of values for each key
      4 - value offsets
      5 - compacted keys

      to get:
          simply sort
          A = find unique values
          B = reverse exclusive scan of "A"
          C = if A[i] == 1
                C[B[0] - B[i]] = i
          D = [0] = C[0] + 1
              [N] = #keys - C[#keys - 1]
              [i] = C[i + 1] - C[i]
          E = forward exclusive scan D
          F = keys[E[i]]

      1 = result of sort (only copy the values)
      2 = B[0]
      3 = D
      4 = E
      5 = F
    */

    // 1
    cudppConfig.algorithm  = CUDPP_SORT_RADIX;
    cudppConfig.op         = CUDPP_ADD; // ignored
    cudppConfig.datatype   = CUDPP_UINT;
    cudppConfig.options    = CUDPP_OPTION_KEY_VALUE_PAIRS;
    cudppPlan(&planHandle, cudppConfig, numKeys, 1, numKeys * sizeof(int));
    cudppSort(planHandle, gpuInputKeys, gpuInputVals, sizeof(int) * 8, numKeys);
    cudppDestroyPlan(planHandle);
    cudacpp::Runtime::sync();
    // cudacpp::Runtime::memcpyDtoH(keys, gpuInputKeys, sizeof(int) * numKeys);
    cudacpp::Runtime::memcpyDtoH(vals, gpuInputVals, sizeof(int) * numKeys);

    // 2 - A = gpuUniqueFlags
    gpmrIntIntSorterMarkUnique(gpuInputKeys, gpuUniqueFlags, numKeys);

    // 2 - B = gpuValOffsets
    cudppConfig.algorithm  = CUDPP_SCAN;
    cudppConfig.op         = CUDPP_ADD; // ignored
    cudppConfig.datatype   = CUDPP_INT;
    cudppConfig.options    = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_BACKWARD;
    cudppPlan(&planHandle, cudppConfig, numKeys, 1, numKeys * sizeof(int));
    cudppScan(planHandle, gpuValOffsets, gpuUniqueFlags, numKeys);
    cudppDestroyPlan(planHandle);
    cudacpp::Runtime::sync();
    cudacpp::Runtime::memcpyDtoH(&numUniqueKeys, gpuValOffsets, sizeof(int));
    ++numUniqueKeys;

    // 2 - C = gpuInputVals and
    // 3 - D = gpuValOffsets
    cudacpp::Runtime::sync();
    gpmrIntIntSorterFindOffsets(gpuInputKeys, gpuUniqueFlags, gpuValOffsets, gpuInputVals, gpuValOffsets, numKeys, numUniqueKeys);
    *numVals = reinterpret_cast<int * >(cudacpp::Runtime::mallocHost(numUniqueKeys * sizeof(int)));
    cudacpp::Runtime::sync();
    cudacpp::Runtime::memcpyDtoH(*numVals, gpuValOffsets, sizeof(int) * numUniqueKeys);
    cudacpp::Runtime::sync();

    // 4 - E = gpuUniqueFlags
    cudppConfig.algorithm  = CUDPP_SCAN;
    cudppConfig.op         = CUDPP_ADD; // ignored
    cudppConfig.datatype   = CUDPP_INT;
    cudppConfig.options    = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_FORWARD;
    cudppPlan(&planHandle, cudppConfig, numKeys, 1, numKeys * sizeof(int));
    cudppScan(planHandle, gpuUniqueFlags, gpuValOffsets, numKeys);
    cudppDestroyPlan(planHandle);
    cudacpp::Runtime::sync();
    *valOffsets = reinterpret_cast<int * >(cudacpp::Runtime::mallocHost(numUniqueKeys * sizeof(int)));
    cudacpp::Runtime::memcpyDtoH(*valOffsets, gpuUniqueFlags, sizeof(int) * numUniqueKeys);

    // 4 - F = gpuInputVals
    gpmrIntIntSorterSetCompactedKeys(gpuInputKeys, gpuUniqueFlags, gpuInputVals, numUniqueKeys);
    cudacpp::Runtime::memcpyDtoH(keys, gpuInputVals, sizeof(int) * numUniqueKeys);

    cudacpp::Runtime::free(gpuInputKeys);
    cudacpp::Runtime::free(gpuInputVals);
    cudacpp::Runtime::free(gpuUniqueFlags);
    cudacpp::Runtime::free(gpuValOffsets);
  }