Exemple #1
0
int main(int argc, char *argv[])
{
    bool bench = false;
    int benchFrames = 0;

    // GLUT may remove from args
    glutInit(&argc,argv);

#ifndef PACKAGE_STRING
#define PACKAGE_STRING "Renderer 2.x"
#endif

#ifndef __TIMESTAMP__
#define __TIMESTAMP__ "Unknown"
#endif
    if (argc < 2)
	panic("%s\nBuilt on: %s\nUsage: %s [-b frames] <filenameOf3dObject.ply>\n",
	    PACKAGE_STRING, __TIMESTAMP__, argv[0]);

    printf("%s\nBuilt on: %s\n", PACKAGE_STRING, __TIMESTAMP__);

    // In case we want to benchmark
    if (!strcmp(argv[1], "-b")) {
	bench = true;
	benchFrames = atoi(argv[2]);
    }

    float maxi = load_object(argv[argc-1]);

    UpdateBoundingVolumeHierarchy(argv[argc-1]);

    // SDL startup

    if ( SDL_Init(SDL_INIT_VIDEO) < 0 )
        panic("Couldn't initialize SDL: %s\n", SDL_GetError());
    atexit(SDL_Quit); // Clean up on exit
    if (!SDL_SetVideoMode( MAXX, MAXY, 0, SDL_OPENGL))
        panic("Couldn't set video mode: %s\n", SDL_GetError());
    // Ignore mice, we love keyboards
    SDL_EventState(SDL_MOUSEMOTION, SDL_IGNORE);

    // OpenGL startup

    // We need an OpenGL PBO...
    cout << "Loading extensions: " << glewGetErrorString(glewInit()) << endl;
    if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_pixel_buffer_object" ))
	panic("Missing GL_ARB_pixel_buffer_object\n");

    glViewport(0, 0, MAXX, MAXY);
    glClearColor(0.3f, 0.3f, 0.3f, 0.5f);
    glEnable(GL_TEXTURE_2D);
    glLoadIdentity();

    create_buffer(&buffer);
    create_texture(&tex);
    SAFE( cudaGLRegisterBufferObject(buffer) ); // Tell CUDA about the PBO

    // Now, allocate the CUDA side of the data (in CUDA global memory,
    // in preparation for the textures that will store them...)

    Vertex *cudaVertices;
    Triangle *cudaTriangles;

    float *pVerticesData = (float*)malloc(g_verticesNo*8*sizeof(float));
    for(unsigned f=0; f<g_verticesNo; f++) {
	// Texture-wise:
	//
	// first float4
	pVerticesData[f*8+0]=g_vertices[f]._x;
	pVerticesData[f*8+1]=g_vertices[f]._y;
	pVerticesData[f*8+2]=g_vertices[f]._z;
	pVerticesData[f*8+3]=g_vertices[f]._ambientOcclusionCoeff;
	// second float4
	pVerticesData[f*8+4]=g_vertices[f]._normal._x;
	pVerticesData[f*8+5]=g_vertices[f]._normal._y;
	pVerticesData[f*8+6]=g_vertices[f]._normal._z;
	pVerticesData[f*8+7]=0.f;
    }
    SAFE( cudaMalloc((void**)&cudaVertices, g_verticesNo*8*sizeof(float)) );
    SAFE( cudaMemcpy(cudaVertices, pVerticesData, g_verticesNo*8*sizeof(float), cudaMemcpyHostToDevice) );

    float *pTrianglesIntersectionData = (float *)malloc(g_trianglesNo*20*sizeof(float));
    for(unsigned e=0; e<g_trianglesNo; e++) {
	// Texture-wise:
	//
	// first float4
	pTrianglesIntersectionData[20*e+0]  = g_triangles[e]._center._x;
	pTrianglesIntersectionData[20*e+1]  = g_triangles[e]._center._y;
	pTrianglesIntersectionData[20*e+2]  = g_triangles[e]._center._z;
	pTrianglesIntersectionData[20*e+3]  = g_triangles[e]._twoSided?1.0f:0.0f;
	// second float4
	pTrianglesIntersectionData[20*e+4]  = g_triangles[e]._normal._x;
	pTrianglesIntersectionData[20*e+5]  = g_triangles[e]._normal._y;
	pTrianglesIntersectionData[20*e+6]  = g_triangles[e]._normal._z;
	pTrianglesIntersectionData[20*e+7]  = g_triangles[e]._d;
	// third float4
	pTrianglesIntersectionData[20*e+8]  = g_triangles[e]._e1._x;
	pTrianglesIntersectionData[20*e+9]  = g_triangles[e]._e1._y;
	pTrianglesIntersectionData[20*e+10] = g_triangles[e]._e1._z;
	pTrianglesIntersectionData[20*e+11] = g_triangles[e]._d1;
	// fourth float4
	pTrianglesIntersectionData[20*e+12] = g_triangles[e]._e2._x;
	pTrianglesIntersectionData[20*e+13] = g_triangles[e]._e2._y;
	pTrianglesIntersectionData[20*e+14] = g_triangles[e]._e2._z;
	pTrianglesIntersectionData[20*e+15] = g_triangles[e]._d2;
	// fifth float4
	pTrianglesIntersectionData[20*e+16] = g_triangles[e]._e3._x;
	pTrianglesIntersectionData[20*e+17] = g_triangles[e]._e3._y;
	pTrianglesIntersectionData[20*e+18] = g_triangles[e]._e3._z;
	pTrianglesIntersectionData[20*e+19] = g_triangles[e]._d3;
    }
    float *cudaTriangleIntersectionData;
    SAFE( cudaMalloc((void**)&cudaTriangleIntersectionData, g_trianglesNo*20*sizeof(float)) );
    SAFE( cudaMemcpy(cudaTriangleIntersectionData, pTrianglesIntersectionData, g_trianglesNo*20*sizeof(float), cudaMemcpyHostToDevice) );

    SAFE( cudaMalloc((void**)&cudaTriangles, g_trianglesNo*sizeof(Triangle)) );
    SAFE( cudaMemcpy(cudaTriangles, g_triangles, g_trianglesNo*sizeof(Triangle), cudaMemcpyHostToDevice) );
//    SAFE( cudaMemcpyToSymbol("VERTICES", &g_verticesNo, sizeof(int)) );
//    SAFE( cudaMemcpyToSymbol("TRIANGLES", &g_trianglesNo, sizeof(int)) );
//    SAFE( cudaMemcpyToSymbol("VERTICES", &g_verticesNo, sizeof(int)) );
//    SAFE( cudaMemcpyToSymbol("TRIANGLES", &g_trianglesNo, sizeof(int)) );

    float angle1=0.0f;
    float angle2=0.0f;
    float angle3=45.0f*M_PI/180.f;
    const coord EyeDistanceFactor = 4.0;
    const coord LightDistanceFactor = 4.0;

    Vector3 light(LightDistanceFactor*maxi, LightDistanceFactor*maxi, LightDistanceFactor*maxi);
    Vector3 *pLight = &light;

    int framesDrawn = 0;
    bool autoRotate = true;

    Keyboard keys;
    Vector3 eye(maxi*EyeDistanceFactor, 0.0, 0.0);
    Vector3 lookat(eye._x + 1.0f*cos(angle2)*cos(angle1),
		 eye._y + 1.0f*cos(angle2)*sin(angle1),
		 eye._z + 1.0f*sin(angle2));

    Camera sony(eye, lookat);

    Clock watch;

#define DEGREES_TO_RADIANS(x) ((coord)((x)*M_PI/180.0))
    coord dAngle = DEGREES_TO_RADIANS(0.3f);

    keys.poll();

    // Allocate CUDA-side data (global memory) for positions and transformations matrices
    Matrix3 *cudaSony = NULL;
    SAFE( cudaMalloc((void**)&cudaSony, sizeof(sony._mv)) );
    Vector3 *cudaEye = NULL;
    SAFE( cudaMalloc((void**)&cudaEye, sizeof(Vector3)) );
    Vector3 *cudaLightInWorldSpace = NULL;
    SAFE( cudaMalloc((void**)&cudaLightInWorldSpace, sizeof(Vector3)) );

    // Allocate CUDA-side data (global memory for corresponding textures) for Bounding Volume Hierarchy data
    // See BVH.h for the data we are storing (from CacheFriendlyBVHNode)
    //
    // Leaf nodes triangle lists (indices to global triangle list)
    int *cudaTriIdxList = NULL;
    SAFE( cudaMalloc((void**)&cudaTriIdxList, g_triIndexListNo*sizeof(int)) );
    SAFE( cudaMemcpy(cudaTriIdxList, g_triIndexList, g_triIndexListNo*sizeof(int), cudaMemcpyHostToDevice) );

    // Bounding box checks need bottom._x,top._x,bottom._y,top._y,bottom._z,top._z...
    float *pLimits = (float *)malloc(g_pCFBVH_No*6*sizeof(float));
    for(unsigned h=0; h<g_pCFBVH_No; h++) {
	// Texture-wise:
	//
	// First float2
	pLimits[6*h+0] = g_pCFBVH[h]._bottom._x;
	pLimits[6*h+1] = g_pCFBVH[h]._top._x;
	// Second float2
	pLimits[6*h+2] = g_pCFBVH[h]._bottom._y;
	pLimits[6*h+3] = g_pCFBVH[h]._top._y;
	// Third float2
	pLimits[6*h+4] = g_pCFBVH[h]._bottom._z;
	pLimits[6*h+5] = g_pCFBVH[h]._top._z;
    }
    float *cudaBVHlimits = NULL;
    SAFE( cudaMalloc((void**)&cudaBVHlimits, g_pCFBVH_No*6*sizeof(float)) );
    SAFE( cudaMemcpy(cudaBVHlimits, pLimits, g_pCFBVH_No*6*sizeof(float), cudaMemcpyHostToDevice) );

    // ..and finally, from CacheFriendlyBVHNode, the 4 integer values:
    int *pIndexesOrTrilists = (int *)malloc(g_pCFBVH_No*4*sizeof(unsigned));
    for(unsigned g=0; g<g_pCFBVH_No; g++) {
	// Texture-wise:
	//
	// A single uint4
	pIndexesOrTrilists[4*g+0] = g_pCFBVH[g].u.leaf._count;
	pIndexesOrTrilists[4*g+1] = g_pCFBVH[g].u.inner._idxRight;
	pIndexesOrTrilists[4*g+2] = g_pCFBVH[g].u.inner._idxLeft;
	pIndexesOrTrilists[4*g+3] = g_pCFBVH[g].u.leaf._startIndexInTriIndexList;
    }
    int *cudaBVHindexesOrTrilists = NULL;
    SAFE( cudaMalloc((void**)&cudaBVHindexesOrTrilists, g_pCFBVH_No*4*sizeof(unsigned)) );
    SAFE( cudaMemcpy(cudaBVHindexesOrTrilists, pIndexesOrTrilists, g_pCFBVH_No*4*sizeof(unsigned), cudaMemcpyHostToDevice) );

    // Allocate and calculate CUDA-side data (global memory) for Morton-order table
    //
    unsigned *cudaMortonTable = NULL;
    SAFE( cudaMalloc((void**)&cudaMortonTable, MAXX*MAXY*sizeof(unsigned)) );
    unsigned *pMortonTable = (unsigned *)malloc(MAXX*MAXY*sizeof(unsigned));
    // Final largest dimension
    unsigned maxDim = MAXX;
    if (maxDim<MAXY) maxDim = MAXY;
    // Find minimum power of two >= maxDim
    maxDim--;
    maxDim |= maxDim >> 1;
    maxDim |= maxDim >> 2;
    maxDim |= maxDim >> 4;
    maxDim |= maxDim >> 8;
    maxDim |= maxDim >> 16;
    maxDim++;
    // Build morton table for our pixels
    int ofs = 0;
    for(unsigned work=0; work<maxDim*maxDim; work++) {
	unsigned topBit = 0x80000000;
	unsigned w = work;
	int x=0, y=0;
	for(unsigned i=0; i<32; i+=2) {
	    y = (y<<1) | ((w&topBit)?1:0);
	    w<<=1;
	    x = (x<<1) | ((w&topBit)?1:0);
	    w<<=1;
	}
	if (x>=0 && y>=0 && x<MAXX && y<MAXY) {
	    unsigned value = x | (y<<16);
	    pMortonTable[ofs++] = value;
	    if (ofs == MAXX*MAXY)
		break;
	}
    }
    SAFE( cudaMemcpy(cudaMortonTable, pMortonTable, ofs*sizeof(unsigned), cudaMemcpyHostToDevice) );

    //
    // Game time...
    //

    pLight->_x = LightDistanceFactor*maxi*cos(angle3);
    pLight->_y = LightDistanceFactor*maxi*sin(angle3);

    g_benchmark = bench;

    ModeDescription();
    watch.reset();
    while(!keys.Abort()) {
	framesDrawn++;
	if (bench && framesDrawn>benchFrames)
	    break;
	if (!bench) {
	    // Only allow keyboard control if we are not benchmarking
	    if (keys.Left())
		angle1-=dAngle;
	    if (keys.Right())
		angle1+=dAngle;
	    if (keys.Up())
		angle2=min(angle2+dAngle, DEGREES_TO_RADIANS(89.0f));
	    if (keys.Down())
		angle2=max(angle2-dAngle, DEGREES_TO_RADIANS(-89.0f));
	    if (keys.Forward() || keys.Backward()) {
		Vector3 fromEyeToLookat(lookat);
		fromEyeToLookat -= eye;
		if (autoRotate)
		    fromEyeToLookat *= 0.05f;
		else
		    fromEyeToLookat *= 0.05f*maxi;
		/*
		cout << "At " << eye._x << " " << eye._y << " " << eye._z << endl;
		cout << "Looking at " << lookat._x << " " << lookat._y << " " << lookat._z << endl;
		cout << "Adding " << fromEyeToLookat._x << " " << fromEyeToLookat._y << " " << fromEyeToLookat._z << endl;
		*/
		if (keys.Forward())
		    eye += fromEyeToLookat;
		else
		    eye -= fromEyeToLookat;
	    }
	    if (keys.isF4()) {
		watch.reset(); framesDrawn = 1;
		while (keys.isF4()) keys.poll(); g_bUsePoints       = !g_bUsePoints;       ModeDescription(); }
	    if (keys.isF5()) {
		watch.reset(); framesDrawn = 1;
		while (keys.isF5()) keys.poll(); g_bUseSpecular     = !g_bUseSpecular;     ModeDescription(); }
	    if (keys.isF6()) {
		watch.reset(); framesDrawn = 1;
		while (keys.isF6()) keys.poll(); g_bUsePhongInterp  = !g_bUsePhongInterp;  ModeDescription(); }
	    if (keys.isF7()) {
		watch.reset(); framesDrawn = 1;
		while (keys.isF7()) keys.poll(); g_bUseReflections  = !g_bUseReflections;  ModeDescription(); }
	    if (keys.isF8()) {
		watch.reset(); framesDrawn = 1;
		while (keys.isF8()) keys.poll(); g_bUseShadows      = !g_bUseShadows;      ModeDescription(); }
	    if (keys.isF9()) {
		watch.reset(); framesDrawn = 1;
		while (keys.isF9()) keys.poll(); g_bUseAntialiasing = !g_bUseAntialiasing; ModeDescription(); }

	    if (keys.isS() || keys.isF() || keys.isE() || keys.isD()) {
		Vector3 eyeToLookatPoint = lookat;
		eyeToLookatPoint -= eye;
		eyeToLookatPoint.normalize();
		Vector3 zenith(0., 0., 1.);
		Vector3 rightAxis = cross(eyeToLookatPoint, zenith);
		rightAxis.normalize();
		Vector3 upAxis = cross(rightAxis, eyeToLookatPoint);
		upAxis.normalize();
		if (keys.isS()) { rightAxis *= 0.05f*maxi; eye -= rightAxis; }
		if (keys.isF()) { rightAxis *= 0.05f*maxi; eye += rightAxis; }
		if (keys.isD()) { upAxis *= 0.05f*maxi; eye -= upAxis; }
		if (keys.isE()) { upAxis *= 0.05f*maxi; eye += upAxis; }
	    }
	    if (keys.isR()) {
		while(keys.isR())
		    keys.poll();
		autoRotate = !autoRotate;
		if (!autoRotate) {
		    Vector3 eyeToAxes = eye;
		    eyeToAxes.normalize();
#if defined(__GNUC__)
		    // Hideous GCC code generation bug - unless I print them, eyeToAxes are not normalized!
		    cout << "Moving to " << eyeToAxes._x << "," <<
			eyeToAxes._y << "," << eyeToAxes._z << endl;
#endif
		    angle2 = asin(-eyeToAxes._z);
		    angle1 = (eye._y<0)?acos(eyeToAxes._x/cos(angle2)):-acos(eyeToAxes._x/cos(angle2));
		} else {
		    angle1 = -angle1;
		    angle2 = -angle2;
		}
		watch.reset();
		framesDrawn = 1;
	    }
	    if (keys.Light()) {
		angle3 += 4*dAngle;
		pLight->_x = LightDistanceFactor*maxi*cos(angle3);
		pLight->_y = LightDistanceFactor*maxi*sin(angle3);
	    }
	    if (keys.Light2()) {
		angle3 -= 4*dAngle;
		pLight->_x = LightDistanceFactor*maxi*cos(angle3);
		pLight->_y = LightDistanceFactor*maxi*sin(angle3);
	    }
	    if (keys.isH()) {
		while(keys.isH()) keys.poll();
		glClear(GL_COLOR_BUFFER_BIT);
		glDisable(GL_LIGHTING);
		glDisable(GL_TEXTURE_2D);
		glColor3f(1.f, 1.f, 1.f);
		const char *help =
		    "KEYS: (available only when not benchmarking)\n"
		    "\n"
		    "- Hit 'R' to stop/start auto-rotation of the camera around the object.\n"
		    "- Fly using the cursor keys,A,Z - and rotate the light with W and Q.\n"
		    "- S and F are 'strafe' left/right\n"
		    "- E and D are 'strafe' up/down\n"
		    "  (strafe keys don't work in auto-rotation mode).\n"
		    "- F4 toggles points mode\n"
		    "- F5 toggles specular lighting\n"
		    "- F6 toggles phong normal interpolation\n"
		    "- F7 toggles reflections\n"
		    "- F8 toggles shadows\n"
		    "- F9 toggles anti-aliasing\n"
		    "- ESC quits.\n\n"
		    "Now press H to get out of this help screen\n";

		glRasterPos2f(-0.95, 0.9);
		int lines = 0;
		for(const char *p=help; *p; p++) {
		    if (*p == '\n')
			// 12 point helvetica => 12+3=15 points per line
			glRasterPos2f(-0.95, 0.9 - ((12+3)*2.f/MAXY)*lines++);
		    else
			glutBitmapCharacter(GLUT_BITMAP_HELVETICA_12, *p);
		}
		SDL_GL_SwapBuffers();
		while(!keys.isH() && !keys.Abort()) keys.poll();
		while(keys.isH() || keys.Abort()) keys.poll();
		framesDrawn = 1;
		watch.reset();
	    }
	}
	if (!autoRotate) {
	    lookat._x = eye._x - 1.0f*cos(angle2)*cos(angle1);
	    lookat._y = eye._y + 1.0f*cos(angle2)*sin(angle1);
	    lookat._z = eye._z + 1.0f*sin(angle2);
	} else {
	    angle1-=dAngle;
	    lookat._x = 0;
	    lookat._y = 0;
	    lookat._z = 0;
	    coord distance = sqrt(eye._x*eye._x + eye._y*eye._y + eye._z*eye._z);
	    eye._x = distance*cos(angle2)*cos(angle1);
	    eye._y = distance*cos(angle2)*sin(angle1);
	    eye._z = distance*sin(angle2);
	}

	sony.set(eye, lookat);

	// Update the CUDA side of the eye/light positions and x-form matrix
	SAFE( cudaMemcpy(cudaEye, (Vector3*)&sony, sizeof(Vector3), cudaMemcpyHostToDevice) );
	SAFE( cudaMemcpy(cudaLightInWorldSpace, pLight, sizeof(Vector3), cudaMemcpyHostToDevice) );
	SAFE( cudaMemcpy(cudaSony, &sony._mv, sizeof(sony._mv), cudaMemcpyHostToDevice) );

	// ...and render!
	CudaRender(
	    cudaSony,
	    cudaVertices, cudaTriangles, cudaTriangleIntersectionData,
	    cudaTriIdxList, cudaBVHlimits, cudaBVHindexesOrTrilists,
	    cudaEye, cudaLightInWorldSpace,
	    cudaMortonTable);

	keys.poll();

	// The more frames per sec, the smaller the dAngle should be
	// The less frames per sec, the larger  the dAngle should be
	//   so...
	// Move dAngle towards 36/framesPerSec in 15 frames (steps)
	//   for 100fps, dAngle = 0.36
	//   for 10 fps, dAngle = 3.6
	// In both cases, we rotate 360 degrees in 10 seconds
	//dAngle += ((36.0/(framesDrawn/(watch.readMS()/1000.0)))-dAngle)/15.;
	//
	// Changed to 18/framesPerSec (20 seconds needed for a 360 rotation)
	dAngle += (DEGREES_TO_RADIANS(9.0f/(framesDrawn/(watch.readMS()/1000.0f)))-dAngle)/15.0f;
    }
    framesDrawn--;

#ifdef _WIN32
    stringstream speed;
    speed << "Rendering " << framesDrawn << " frames in " << (watch.readMS()/1000.0) << " seconds. (" << framesDrawn/(watch.readMS()/1000.0) << " fps)\n";
    MessageBoxA(0, (LPCSTR) speed.str().c_str(), (const char *)"Speed of rendering", MB_OK);
#else
    cout << "Rendering " << framesDrawn << " frames in ";
    cout << (watch.readMS()/1000.0) << " seconds. (";
    cout << framesDrawn/(watch.readMS()/1000.0) << " fps)\n";
#endif

    SAFE(cudaFree(cudaMortonTable));
    SAFE(cudaFree(cudaBVHindexesOrTrilists));
    SAFE(cudaFree(cudaBVHlimits));
    SAFE(cudaFree(cudaTriIdxList));
    SAFE(cudaFree(cudaLightInWorldSpace));
    SAFE(cudaFree(cudaEye));
    SAFE(cudaFree(cudaSony));
    SAFE(cudaFree(cudaTriangles));
    SAFE(cudaFree(cudaTriangleIntersectionData));
    SAFE(cudaFree(cudaVertices));
    SAFE(cudaGLUnregisterBufferObject(buffer));
    destroy_texture(&tex);
    destroy_buffer(&buffer);

    cudaThreadExit();

    return 0;
}