int main(void) { // Setup scene details scene.fogDensity = 0.003f; scene.ambientColour = vec3(0.10f, 0.16f, 0.20f); scene.lightColour = vec3(0.63f, 0.55f, 0.5f); scene.lightDirection = normalize(vec3(1.0f, 0.25f, 0.5f)); // Setup raytracer raytracer = new Raytracer(WIDTH, HEIGHT, scene, voxels, camera); // Setup SDL SDL_Init(SDL_INIT_EVERYTHING); SDL_SetVideoMode(WIDTH, HEIGHT, 32, SDL_OPENGL | (FULLSCREEN ? SDL_FULLSCREEN : 0)); TTF_Init(); // Setup screen setupOpenGL(); gui = new GUI(ivec2(WIDTH, HEIGHT)); // Get platform and device information cl_platform_id platform = selectPlatform(); cl_device_id device = selectDevice(platform); // Create an OpenCL context cl_int ret; cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(), CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; cl_context context = clCreateContext(properties, 1, &device, openCLLog, NULL, &ret); printCLErrorCode("createContext", ret); // Create a command queue cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &ret); // Create texture outputTexture = createTexture(); screenTexture[0] = createTexture(); screenTexture[1] = createTexture(); finalTexture = createTexture(); // Load textures grassTexture = new Texture("textures/grass.png"); // Create pixel array int bytes = WIDTH * HEIGHT * sizeof(vec4); vec4 *pixels = (vec4*)malloc(bytes); glBindTexture(GL_TEXTURE_2D, outputTexture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, WIDTH, HEIGHT, 0, GL_RGBA, GL_FLOAT, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, screenTexture[0]); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, WIDTH, HEIGHT, 0, GL_RGBA, GL_FLOAT, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, screenTexture[1]); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, WIDTH, HEIGHT, 0, GL_RGBA, GL_FLOAT, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, finalTexture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, WIDTH, HEIGHT, 0, GL_RGBA, GL_FLOAT, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); // Create memory buffers on the device for each vector cl_mem cameraPosMemory = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(vec3), NULL, &ret); cl_mem cameraViewMatrixMemory = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(mat4), NULL, &ret); cl_mem lightDirectionMemory = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(vec3), NULL, &ret); cl_mem voxelSizeMemory = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * 3, NULL, &ret); printCLErrorCode("clCreateBuffer", ret); if (ret != 0) return 1; cl_mem outputMemory = clCreateFromGLTexture2D(context, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, outputTexture, &ret); cl_mem screenMemory[2] = { clCreateFromGLTexture2D(context, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, screenTexture[0], &ret), clCreateFromGLTexture2D(context, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, screenTexture[1], &ret) }; cl_mem finalMemory = clCreateFromGLTexture2D(context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, finalTexture, &ret); // Create the OpenCL kernel Kernel* kernelFast = new Kernel("Fast", "raytracer_fast.cl", "raytrace", context, device); Kernel* kernelNormal = new Kernel("AO", "raytracer_normal.cl", "raytrace", context, device); Kernel* kernelSlow = new Kernel("GI", "raytracer.cl", "raytrace", context, device); Kernel* kernelPath = new Kernel("Path", "raytracer_path.cl", "raytrace", context, device); vector<Kernel*> raytracerKernels = { kernelFast, kernelNormal, kernelSlow, kernelPath }; for (Kernel* kernel : raytracerKernels) kernel->build(); Kernel postprocess("postprocess", "postprocess.cl", "main", context, device); postprocess.build(); Kernel tonemapper("tonemapper", "tonemap.cl", "main", context, device); tonemapper.build(); bool reloadKernel = true; int kernelType = 0; Kernel* kernel = kernelFast; // Create voxels const int AIR = 0, GRASS = 1, GROUND = 2, STONE = 3, WATER = 4, WOOD = 5, LEAVES = 6; #pragma omp parallel for schedule(dynamic) for (int z = 0; z < voxelSize.z; z++) for (int y = 0; y < voxelSize.y; y++) for (int x = 0; x < voxelSize.x; x++) { float h = (perlinNoise(0.6f, 6, x / 128.0f, y / 128.0f, z / 128.0f, 0) - (0.5f - (float)y / voxelSize.y)) * 3.0f; int v = AIR; if (h >= 1.2f) v = STONE; else if (h >= 1.0f) v = GROUND; if (y > 0 && v == GROUND && voxels.get(x, y - 1, z) == AIR) v = GRASS; if (v == 0 && y > voxelSize.y - seaLevel) v = WATER; voxels.get(x, y, z) = v; } int trees = voxelSize.x*voxelSize.z*0.01f; int plantedTrees = 0; for (int i = 0; i < trees*trees && plantedTrees < trees; i++) { int x = noise(i, 10) * (voxelSize.x - 8) + 4; int y = 0; int z = noise(i, 20) * (voxelSize.z - 8) + 4; bool valid = true; for (y = 0; y < voxelSize.y; y++) { int type = voxels.get(x, y, z); valid = (type == GRASS); if (type != AIR) break; } if (!valid || y < 10) continue; plantedTrees++; for (int yt = y - 6; yt < y; yt++) voxels.get(x, yt, z) = WOOD; for (int yt = y - 8; yt < y - 2; yt++) { for (int xt = -4; xt <= 4; xt++) for (int zt = -4; zt <= 4; zt++) { int yft = y - yt - 6; int dist = zt*zt + xt*xt + yft*yft + perlinNoise(0.5, 3, (x+xt)/2.0f, yt, (z+zt)/2.0f, 13) * 8.0f; if (dist <= 12) voxels.get(x+xt, yt, z+zt) = LEAVES; } } } voxels.generateOctree(); voxelsDirty = false; //voxels.remake(); cout << "Node: " << sizeof(Node) << endl; // Create OpenCL data buffers int voxelsSpace = voxels.space * 1.2; int voxelsPtrSpace = voxels.ptrSpace * 1.5; size_t gpuMemoryEstimate = sizeof(uint8_t) * voxelCount + sizeof(Node) * voxelsSpace + sizeof(uint32_t) * voxelsPtrSpace; cout << "GPU Memory Estimate: " << ((gpuMemoryEstimate / 1024.0) / 1024) << "MB" << endl; cl_mem voxelMemory, octreeMemory, ptrTableMemory; voxelMemory = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(uint8_t) * voxelCount, NULL, &ret); octreeMemory = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(Node) * voxelsSpace, NULL, &ret); ptrTableMemory = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(uint32_t) * voxelsPtrSpace, NULL, &ret); // Main loop SDL_Event event; float delta = -1.0f; long last = SDL_GetTicks(); long lastTimePhysics = last; long nextReport = last; bool running = true; bool postprocessEnabled = true; int framesLeftToRender = -1; int forceMixOff = 2; int succesiveFrames = 1; while (running) { bool onceBomb = false; while (SDL_PollEvent(&event)) switch (event.type) { case SDL_QUIT: running = false; break; case SDL_KEYUP: if (!grab) gui->keyPress(event.key.keysym.sym, false); key[event.key.keysym.sym] = false; if (event.key.keysym.sym == SDLK_ESCAPE) { running = false; } else if (event.key.keysym.sym == SDLK_t) { kernelType = (kernelType + 1) % raytracerKernels.size(); kernel = raytracerKernels[kernelType]; cout << "Kernel Type: " << kernelType << endl; cout << "Kernel: " << kernel->getName() << endl; reloadKernel = true; } else if (event.key.keysym.sym == SDLK_y) { if (framesLeftToRender == -1) { framesLeftToRender = 3; kernel = kernelPath; kernelType = 3; } else { framesLeftToRender = -1; kernel = kernelFast; kernelType = 0; } reloadKernel = true; delta = 0; // Reset delta smoothing } else if (event.key.keysym.sym == SDLK_l) { opencl = !opencl; delta = 0; // Reset delta smoothing } else if (event.key.keysym.sym == SDLK_g) { grab = !grab; if (grab) { SDL_ShowCursor(0); SDL_WM_GrabInput(SDL_GRAB_ON); } else { SDL_ShowCursor(1); SDL_WM_GrabInput(SDL_GRAB_OFF); } } else if (event.key.keysym.sym == SDLK_i) { cout << camera.position.x << ", " << camera.position.y << ", " << camera.position.z << endl; } else if (event.key.keysym.sym == SDLK_o) { ofstream out("camera.txt"); out << camera.position.x << " " << camera.position.y << " " << camera.position.z; out << " " << camera.getYaw() << " " << camera.getPitch() << endl; out.close(); cout << "Camera state saved" << endl; } else if (event.key.keysym.sym == SDLK_p) { ifstream in("camera.txt"); float yaw, pitch; in >> camera.position.x >> camera.position.y >> camera.position.z; in >> yaw >> pitch; in.close(); camera.setYaw(yaw); camera.setPitch(pitch); cout << "Camera state restored" << endl; } //if (event.key.keysym.sym == SDLK_ESCAPE) running = false; break; case SDL_KEYDOWN: if (!grab) gui->keyPress(event.key.keysym.sym, true); if (event.key.keysym.sym == SDLK_n) onceBomb = true; key[event.key.keysym.sym] = true; break; case SDL_MOUSEBUTTONDOWN: if (!grab) { gui->mouseButton(ivec2(event.button.x, event.button.y), true); gui->mouseMove(ivec2(event.button.x, event.button.y)); } else if (event.button.button == 1) deleteBlock(); break; case SDL_MOUSEBUTTONUP: if (!grab) { gui->mouseButton(ivec2(event.button.x, event.button.y), false); gui->mouseMove(ivec2(event.button.x, event.button.y)); } break; case SDL_MOUSEMOTION: if (grab) { if (event.motion.xrel != 0 || event.motion.yrel != 0) forceMixOff = 1; camera.changeYaw(0.012f * event.motion.xrel); camera.changePitch(-0.015f * event.motion.yrel); } else gui->mouseMove(ivec2(event.motion.x, event.motion.y)); break; } if (!running) break; if (framesLeftToRender > 0) { if (--framesLeftToRender == 0) cout << "Paused" << endl; } else if (framesLeftToRender == 0) continue; if (gui->getOption(1, 0).choice != kernelType) { kernelType = gui->getOption(1, 0).choice; kernel = raytracerKernels[kernelType]; cout << "Kernel Type: " << kernelType << endl; cout << "Kernel: " << kernel->getName() << endl; reloadKernel = true; } { int newState = gui->getOption(2, 1).choice == 1; if (postprocessEnabled != newState) { postprocessEnabled = newState; forceMixOff = 1; } } { timeOfDay = gui->getOption(0, 0).value; //timeOfDay = (timeOfDay + delta * 0.025f); if (timeOfDay > 24.0f) timeOfDay -= 24.0f; float sunAngle = timeOfDay * M_PI / 12.0f; scene.lightDirection = normalize(vec3(sin(sunAngle), -cos(sunAngle), 0.25f)); } { exposure = gui->getOption(2, 0).value; exposure *= exposure; } { float speed = 8.0f; //acceleration.y = 9.8f * 4.0f; velocity.x *= delta * 0.1f; velocity.y *= delta * 0.1f; velocity.z *= delta * 0.1f; vec3 up = vec3(0.0f, 1.0f, 0.0f); vec3 forward = camera.getWalkDirection(); vec3 right = cross(camera.getWalkDirection(), up); if (key[SDLK_a]) { velocity += right * speed; forceMixOff = 1; } if (key[SDLK_d]) { velocity -= right * speed; forceMixOff = 1; } if (key[SDLK_w]) { velocity += forward * speed; forceMixOff = 1; } if (key[SDLK_s]) { velocity -= forward * speed; forceMixOff = 1; } if (key[SDLK_q]) { velocity -= up * speed; forceMixOff = 1; } if (key[SDLK_z]) { velocity += up * speed; forceMixOff = 1; } if (key[SDLK_b] || onceBomb) { Ray actionRay = Ray(camera.position, vec3(camera.getViewMatrix() * vec4(0.0f, 0.0f, 1.0f, 0.0f))); VoxelGrid::Result result = voxels.intersect(actionRay, 100.0f); if (result.hit) { voxelsDirty = true; for (int x = -4; x <= 4; x++) for (int y = -4; y <= 4; y++) for (int z = -4; z <= 4; z++) if (x*x + y*y + z*z < 16) { int xp = x + result.pos.x, yp = y + result.pos.y, zp = z + result.pos.z; if (xp < 0 || xp >= voxelSize.x || yp < 0 || yp >= voxelSize.y || zp < 0 || zp >= voxelSize.z) continue; voxels.get(xp, yp, zp) = 0; } } } else if (key[SDLK_DELETE]) { deleteBlock(); } else if (key[SDLK_RETURN]) { Ray actionRay(camera.position, vec3(camera.getViewMatrix() * vec4(0.0f, 0.0f, 1.0f, 0.0f))); VoxelGrid::Result result = voxels.intersect(actionRay, 100.0f); if (result.hit) voxels.get(result.pos.x, result.pos.y, result.pos.z) = 3; } long now = SDL_GetTicks(); bool firstPhysics = true; for (;lastTimePhysics < now; lastTimePhysics += 10) { float pdelta = 10.0f / 1000.0f; // Integrate position camera.position += velocity * pdelta + acceleration * 0.5f * pdelta * pdelta; velocity += acceleration * pdelta; // Move out of terrain Ray groundRay(camera.position, vec3(0.0f, 1.0f, 0.0f)); VoxelGrid::Result groundResult = voxels.intersect(groundRay, 100.0f); if (groundResult.hit && groundResult.t < 2.0f) { camera.position.y -= 2.0f - groundResult.t; if (velocity.y > 0.0f) velocity.y = 0.0f; if (key[SDLK_SPACE] && velocity.y > -0.1f && firstPhysics) velocity.y = -20.0f; } firstPhysics = false; } } if (opencl) { static int bufferIndex = 1; bufferIndex = 1 - bufferIndex; if (reloadKernel) { reloadKernel = false; ret |= clSetKernelArg(kernel->kernel, 1, sizeof(cl_mem), (void *)&outputMemory); ret |= clSetKernelArg(kernel->kernel, 2, sizeof(cl_mem), (void *)&cameraPosMemory); ret |= clSetKernelArg(kernel->kernel, 3, sizeof(cl_mem), (void *)&cameraViewMatrixMemory); ret |= clSetKernelArg(kernel->kernel, 4, sizeof(cl_mem), (void *)&lightDirectionMemory); ret |= clSetKernelArg(kernel->kernel, 6, sizeof(cl_mem), (void *)&voxelSizeMemory); ret |= clEnqueueWriteBuffer(command_queue, voxelMemory, CL_TRUE, 0, sizeof(uint8_t) * voxelCount, voxels.getVoxels(), 0, NULL, NULL); ret |= clEnqueueWriteBuffer(command_queue, octreeMemory, CL_TRUE, 0, sizeof(Node) * voxels.space, voxels.nodes, 0, NULL, NULL); ret |= clEnqueueWriteBuffer(command_queue, ptrTableMemory, CL_TRUE, 0, sizeof(uint32_t) * voxels.ptrSpace, voxels.ptrTable, 0, NULL, NULL); ret |= clEnqueueWriteBuffer(command_queue, voxelSizeMemory, CL_TRUE, 0, sizeof(int) * 3, &voxelSize, 0, NULL, NULL); forceMixOff = 2; } if (forceMixOff > 0) { forceMixOff--; succesiveFrames = 1; } else if (succesiveFrames < 128) succesiveFrames++; float mixFactor = 1.0f - 1.0f / succesiveFrames; static int k = 0; k++; vec3 cameraPos = camera.position; mat4 cameraViewMatrix = transpose(camera.getViewMatrix()); ret = clEnqueueWriteBuffer(command_queue, cameraPosMemory, CL_FALSE, 0, sizeof(vec3), &cameraPos, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, cameraViewMatrixMemory, CL_FALSE, 0, sizeof(mat4), &cameraViewMatrix, 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, lightDirectionMemory, CL_FALSE, 0, sizeof(vec3), &scene.lightDirection, 0, NULL, NULL); if (voxelsDirty) { voxelsDirty = false; voxels.generateOctree(); ret |= clEnqueueWriteBuffer(command_queue, voxelMemory, CL_FALSE, 0, sizeof(uint8_t) * voxelCount, voxels.getVoxels(), 0, NULL, NULL); ret |= clEnqueueWriteBuffer(command_queue, octreeMemory, CL_FALSE, 0, sizeof(Node) * voxels.space, voxels.nodes, 0, NULL, NULL); ret |= clEnqueueWriteBuffer(command_queue, ptrTableMemory, CL_FALSE, 0, sizeof(uint32_t) * voxels.ptrSpace, voxels.ptrTable, 0, NULL, NULL); } // Execute the OpenCL kernel on the list ret = clSetKernelArg(kernel->kernel, 0, sizeof(int), (void *)&k); ret = clSetKernelArg(kernel->kernel, 5, sizeof(cl_mem), (void *)&voxelMemory); ret = clSetKernelArg(kernel->kernel, 7, sizeof(cl_mem), (void *)&octreeMemory); ret = clSetKernelArg(kernel->kernel, 8, sizeof(cl_mem), (void *)&ptrTableMemory); glFinish(); ret = clEnqueueAcquireGLObjects(command_queue, 1, &outputMemory, 0, NULL, NULL); ret = clEnqueueAcquireGLObjects(command_queue, 1, &screenMemory[0], 0, NULL, NULL); ret = clEnqueueAcquireGLObjects(command_queue, 1, &screenMemory[1], 0, NULL, NULL); ret = clEnqueueAcquireGLObjects(command_queue, 1, &finalMemory, 0, NULL, NULL); size_t global_item_size[2]; size_t local_item_size[2] = {8, 8}; global_item_size[0] = WIDTH;//((WIDTH + 7) >> 3) << 3; global_item_size[1] = HEIGHT;//((HEIGHT + 15) >> 4) << 4; clEnqueueNDRangeKernel(command_queue, kernel->kernel, 2, NULL, global_item_size, local_item_size, 0, NULL, NULL); if (postprocessEnabled) { clSetKernelArg(postprocess.kernel, 0, sizeof(int), (void *)&k); clSetKernelArg(postprocess.kernel, 1, sizeof(float), (void *)&mixFactor); clSetKernelArg(postprocess.kernel, 2, sizeof(cl_mem), (void *)&outputMemory); clSetKernelArg(postprocess.kernel, 3, sizeof(cl_mem), (void *)&screenMemory[1-bufferIndex]); clSetKernelArg(postprocess.kernel, 4, sizeof(cl_mem), (void *)&screenMemory[bufferIndex]); clEnqueueNDRangeKernel(command_queue, postprocess.kernel, 2, NULL, global_item_size, local_item_size, 0, NULL, NULL); } if (postprocessEnabled) clSetKernelArg(tonemapper.kernel, 0, sizeof(cl_mem), (void *)&screenMemory[bufferIndex]); else clSetKernelArg(tonemapper.kernel, 0, sizeof(cl_mem), (void *)&outputMemory); clSetKernelArg(tonemapper.kernel, 1, sizeof(cl_mem), (void *)&finalMemory); clSetKernelArg(tonemapper.kernel, 2, sizeof(float), (void *)&exposure); clEnqueueNDRangeKernel(command_queue, tonemapper.kernel, 2, NULL, global_item_size, local_item_size, 0, NULL, NULL); clEnqueueReleaseGLObjects(command_queue, 1, &outputMemory, 0, NULL, NULL); clEnqueueReleaseGLObjects(command_queue, 1, &screenMemory[0], 0, NULL, NULL); clEnqueueReleaseGLObjects(command_queue, 1, &screenMemory[1], 0, NULL, NULL); clEnqueueReleaseGLObjects(command_queue, 1, &finalMemory, 0, NULL, NULL); clFinish(command_queue); drawToScreen(); } else { static int k = 0; #pragma omp parallel for schedule(dynamic) lastprivate(k) for (int y = 0; y < HEIGHT; y++) for (int x = 0; x < WIDTH; x++) pixels[x + y * WIDTH] = raytracer->trace(x, y, x + y * WIDTH + k++); glBindTexture(GL_TEXTURE_2D, screenTexture[0]); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, WIDTH, HEIGHT, GL_RGBA, GL_FLOAT, (GLvoid*)pixels); glBindTexture(GL_TEXTURE_2D, 0); drawToScreen(); } long now = SDL_GetTicks(); //if (now - start > 10000) break; float cdelta = (now - last) * 0.001f; last = now; delta = (delta > 0.0f) ? (delta * 2.0f + cdelta) * 0.333f : cdelta; if (nextReport < now) { cout << fixed << setprecision(2) << (delta * 1000) << "ms - FPS: " << (1.0f / delta) << endl; nextReport = now + 500; } gui->update(delta, !grab); }