TetrahedronMesh::TetrahedronMesh(const int size)
{
	int trianglesPerFace = size*size;
	numFaces = trianglesPerFace * 4;
	vertices.resize(4 + 6 * (size - 1) + 2 * (size - 2)*(size - 1));
	triangles.resize(4 * trianglesPerFace);
	float3 initialPos = { size / 2.0, size / 2.0, size / 2.0 };
	float3 deltaX = { 0, 1, -1 };
	float3 deltaY = { -1, -1, 0 };
	int** indices1 = new2D(size + 1);
	int** indices2 = new2D(size + 1);
	int** indices3 = new2D(size + 1);
	int** indices4 = new2D(size + 1);
	int coordCount = 0, triangleCount = 0;

	// PLane 1:
	for (int y = 0; y <= size; y++){
		float3 currentPos = initialPos + deltaY * y;
		for (int x = 0; x < y + 1; x++){
			indices1[x][y] = coordCount;
			vertices[coordCount++] = currentPos;
			if (x > 0) {
				triangles[triangleCount++] = { indices1[x][y], indices1[x - 1][y], indices1[x - 1][y - 1] };
				if (x < y) {
					triangles[triangleCount++] = { indices1[x][y], indices1[x - 1][y - 1], indices1[x][y - 1] };
				}
			}
			currentPos += deltaX;
		}
	}

	// Plane 2:
	deltaX = { 1, 0, -1 };
	for (int y = 0; y <= size; y++){
		float3 currentPos = initialPos + deltaY * y;
		for (int x = 0; x < y + 1; x++){
			if (x == 0){
				indices2[x][y] = indices1[x][y];
			}
			else{
				indices2[x][y] = coordCount;
				vertices[coordCount++] = currentPos;
			}
			if (x > 0) {
				triangles[triangleCount++] = { indices2[x][y], indices2[x - 1][y - 1], indices2[x - 1][y] };
				if (x < y) {
					triangles[triangleCount++] = { indices2[x][y], indices2[x][y - 1], indices2[x - 1][y - 1] };
				}
			}
			currentPos += deltaX;
		}
	}

	// plane 3
	initialPos = { -size / 2.0, size / 2.0, -size / 2.0 };
	deltaY = { 1, -1, 0 };
	deltaX = { 0, 1, 1 };
	for (int y = 0; y <= size; y++){
		float3 currentPos = initialPos + deltaY * y;
		for (int x = 0; x < y + 1; x++){
			if (y == size){
				indices3[x][y] = indices2[size - x][size - x];
			}
			else if (x == y){
				indices3[x][y] = indices1[size - x][size - x];
			}
			else{
				indices3[x][y] = coordCount;
				vertices[coordCount++] = currentPos;
			}
			if (x > 0) {
				triangles[triangleCount++] = { indices3[x][y], indices3[x - 1][y], indices3[x - 1][y - 1] };
				if (x < y) {
					triangles[triangleCount++] = { indices3[x][y], indices3[x - 1][y - 1], indices3[x][y - 1] };
				}
			}
			currentPos += deltaX;
		}
	}

	//plane 4
	deltaX = { -1, 0, 1 };
	for (int y = 0; y <= size; y++){
		float3 currentPos = initialPos + deltaY * y;
		for (int x = 0; x < y + 1; x++){
			if (y == size){
				indices4[x][y] = indices2[size - x][size];
			}
			else if (x == y){
				indices4[x][y] = indices1[size - y][size];
			}
			else if (x == 0){
				indices4[x][y] = indices3[x][y];
			}
			else{
				indices4[x][y] = coordCount;
				vertices[coordCount++] = currentPos;
			}
			if (x > 0) {
				triangles[triangleCount++] = { indices4[x][y], indices4[x - 1][y - 1], indices4[x - 1][y] };
				if (x < y) {
					triangles[triangleCount++] = { indices4[x][y], indices4[x][y - 1], indices4[x - 1][y - 1] };
				}
			}
			currentPos += deltaX;
		}
	}
	initCudaBuffers();
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
    pArgc = &argc;
    pArgv = argv;
    char *ref_file = NULL;

#if defined(__linux__)
    setenv ("DISPLAY", ":0", 0);
#endif

    printf("%s Starting...\n\n", sSDKsample);

    printf("NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n");

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (argc > 1)
    {
        if (checkCmdLineFlag(argc, (const char **)argv, "file"))
        {
            getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
            fpsLimit = frameCheckNumber;
        }
    }

    // Get the path of the filename
    char *filename;

    if (getCmdLineArgumentString(argc, (const char **) argv, "image", &filename))
    {
        image_filename = filename;
    }

    // load image
    char *image_path = sdkFindFilePath(image_filename, argv[0]);

    if (image_path == NULL)
    {
        fprintf(stderr, "Error unable to find and load image file: '%s'\n", image_filename);
        exit(EXIT_FAILURE);
    }

    sdkLoadPPM4ub(image_path, (unsigned char **)&h_img, &width, &height);

    if (!h_img)
    {
        printf("Error unable to load PPM file: '%s'\n", image_path);
        exit(EXIT_FAILURE);
    }

    printf("Loaded '%s', %d x %d pixels\n", image_path, width, height);

    if (checkCmdLineFlag(argc, (const char **)argv, "threads"))
    {
        nthreads = getCmdLineArgumentInt(argc, (const char **) argv, "threads");
    }

    if (checkCmdLineFlag(argc, (const char **)argv, "sigma"))
    {
        sigma = getCmdLineArgumentFloat(argc, (const char **) argv, "sigma");
    }

    runBenchmark = checkCmdLineFlag(argc, (const char **) argv, "benchmark");

    int device;
    struct cudaDeviceProp prop;
    cudaGetDevice(&device);
    cudaGetDeviceProperties(&prop, device);

    if (!strncmp("Tesla", prop.name, 5))
    {
        printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n");
        //        runBenchmark = true;
        runBenchmark = true;
    }

    // Benchmark or AutoTest mode detected, no OpenGL
    if (runBenchmark == true || ref_file != NULL)
    {
        findCudaDevice(argc, (const char **)argv);
    }
    else
    {
        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(&argc, argv);
        findCudaGLDevice(argc, (const char **)argv);
    }

    initCudaBuffers();

    if (ref_file)
    {
        printf("(Automated Testing)\n");
        bool testPassed = runSingleTest(ref_file, argv[0]);

        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();

        exit(testPassed ? EXIT_SUCCESS : EXIT_FAILURE);
    }

    if (runBenchmark)
    {
        printf("(Run Benchmark)\n");
        benchmark(100);

        cleanup();

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();

        exit(EXIT_SUCCESS);
    }

    initGLBuffers();
    glutMainLoop();

    exit(EXIT_SUCCESS);
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv) 
{
    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (!cutCheckCmdLineFlag(argc, (const char **)argv, "noqatest") ||
		cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) 
	{
        g_bQAReadback = true;
        fpsLimit = frameCheckNumber;
    }
    if (argc > 1) {

        if (cutCheckCmdLineFlag(argc, (const char **)argv, "glverify")) {
            g_bOpenGLQA = true;
            fpsLimit = frameCheckNumber;
        }
    }

    printf("[%s] ", sSDKsample);
    if (g_bQAReadback) printf("(Automated Testing)\n");
    if (g_bOpenGLQA)   printf("(OpenGL Readback)\n");

    // Get the path of the filename
    char *filename;
    if (cutGetCmdLineArgumentstr(argc, (const char**) argv, "image", &filename)) {
        image_filename = filename;
    }
    // load image
    char* image_path = cutFindFilePath(image_filename, argv[0]);
    if (image_path == 0) {
        fprintf(stderr, "Error finding image file '%s'\n", image_filename);
        cudaThreadExit();
        exit(EXIT_FAILURE);
    }

    cutilCheckError( cutLoadPPM4ub(image_path, (unsigned char **) &h_img, &width, &height));
    if (!h_img) {
        printf("Error opening file '%s'\n", image_path);
        cudaThreadExit();
        exit(-1);
    }
    printf("Loaded '%s', %d x %d pixels\n", image_path, width, height);

    cutGetCmdLineArgumenti(argc, (const char**) argv, "threads", &nthreads);
    cutGetCmdLineArgumentf(argc, (const char**) argv, "sigma", &sigma);
    runBenchmark = cutCheckCmdLineFlag(argc, (const char**) argv, "bench");

    int device;
    struct cudaDeviceProp prop;
    cudaGetDevice( &device );
    cudaGetDeviceProperties( &prop, device );
    if( !strncmp( "Tesla", prop.name, 5 ) ) {
        printf("Tesla card detected, running the test in benchmark mode (no OpenGL display)\n");
//        runBenchmark = CUTTrue;
        g_bQAReadback = true;
    }        

    // Benchmark or AutoTest mode detected, no OpenGL
    if (runBenchmark == CUTTrue || g_bQAReadback) {
        if( cutCheckCmdLineFlag( argc, (const char **)argv, "device" ) ) 
            cutilDeviceInit( argc, argv );
        else 
            cudaSetDevice( cutGetMaxGflopsDeviceId() );
    } else {

        // First initialize OpenGL context, so we can properly set the GL for CUDA.
        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
        initGL(argc, argv);

        if( cutCheckCmdLineFlag( argc, (const char **)argv, "device" ) ) 
            cutilGLDeviceInit( argc, argv );
        else 
            cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
    }

    initCudaBuffers();

    if (g_bOpenGLQA) {
        g_CheckRender = new CheckBackBuffer(width, height, 4);
        g_CheckRender->setPixelFormat(GL_RGBA);
        g_CheckRender->setExecPath(argv[0]);
        g_CheckRender->EnableQAReadback(true);
    }

    if (g_bQAReadback) {
        // This is the automated testing path
        g_CheckRender = new CheckBackBuffer(width, height, 4, false);
        g_CheckRender->setPixelFormat(GL_RGBA);
        g_CheckRender->setExecPath(argv[0]);
        g_CheckRender->EnableQAReadback(true);

        runAutoTest(argc, argv); 
        cleanup();
        cudaThreadExit();
        cutilExit(argc, argv);
    }

    if (runBenchmark) {
        benchmark(100);
        cleanup();
        cudaThreadExit();
        exit(0);
    }

    initGLBuffers();
    
    atexit(cleanup);
    
    glutMainLoop();

    cudaThreadExit();
    cutilExit(argc, argv);
}