void mesh_border::load(const string& filename_1, const string& filename_2) { stringstream border_buffer[BORDER_BUFFER_COUNT]; for(size_t border = 0; border < BORDER_BUFFER_COUNT; border++) { const string& filename = (border == 0 ? filename_1 : filename_2); if(!f->file_to_buffer(filename, border_buffer[border])) { a2e_error("couldn't open border file #%u: %s!", border, filename); return; } // read elements size_t line_count = 0; while(!border_buffer[border].eof() && border_buffer[border].good()) { float u1, v1, u2, v2, nx, ny, dist; border_buffer[border] >> u1; border_buffer[border] >> v1; border_buffer[border] >> u2; border_buffer[border] >> v2; border_buffer[border] >> nx; border_buffer[border] >> ny; border_buffer[border] >> dist; dist /= 10.0f; borders[border].vertices[0].push_back(float2(u1, v1 - 10.0f) / 10.0f); borders[border].vertices[0].push_back(float2(u2, v2 - 10.0f) / 10.0f); borders[border].vertices[1].push_back(float2(-u1, v1 - 10.0f) / 10.0f); borders[border].vertices[1].push_back(float2(-u2, v2 - 10.0f) / 10.0f); borders[border].normals[0].push_back(float4(nx, ny, dist+0.001f, 1.0f)); borders[border].normals[0].push_back(float4(nx, ny, 0.001f, 1.0f)); borders[border].normals[1].push_back(float4(-nx, ny, dist+0.001f, 1.0f)); borders[border].normals[1].push_back(float4(-nx, ny, 0.001f, 1.0f)); line_count++; } } // for(size_t j = 0; j < BORDER_BUFFER_COUNT; j++) { glGenBuffers(2, &border_gl_data[j].vertices_vbo[0]); glGenBuffers(2, &border_gl_data[j].normals_vbo[0]); for(size_t i = 0; i < 2; i++) { glBindBuffer(GL_ARRAY_BUFFER, border_gl_data[j].vertices_vbo[i]); glBufferData(GL_ARRAY_BUFFER, sizeof(float2) * borders[j].vertices[i].size(), &borders[j].vertices[i][0], GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, border_gl_data[j].normals_vbo[i]); glBufferData(GL_ARRAY_BUFFER, sizeof(float4) * borders[j].normals[i].size(), &borders[j].normals[i][0], GL_STATIC_DRAW); } } glBindBuffer(GL_ARRAY_BUFFER, 0); }
bool init_efx_funcs() { alGenEffects = (LPALGENEFFECTS)alGetProcAddress("alGenEffects"); alDeleteEffects = (LPALDELETEEFFECTS)alGetProcAddress("alDeleteEffects"); alIsEffect = (LPALISEFFECT)alGetProcAddress("alIsEffect"); alGenAuxiliaryEffectSlots = (LPALGENAUXILIARYEFFECTSLOTS)alGetProcAddress("alGenAuxiliaryEffectSlots"); alEffecti = (LPALEFFECTI)alGetProcAddress("alEffecti"); alEffectf = (LPALEFFECTF)alGetProcAddress("alEffectf"); alGenFilters = (LPALGENFILTERS)alGetProcAddress("alGenFilters"); alIsFilter = (LPALISFILTER)alGetProcAddress("alIsFilter"); alFilteri = (LPALFILTERI)alGetProcAddress("alFilteri"); alFilterf = (LPALFILTERF)alGetProcAddress("alFilterf"); alAuxiliaryEffectSloti = (LPALAUXILIARYEFFECTSLOTI)alGetProcAddress("alAuxiliaryEffectSloti"); alDeleteAuxiliaryEffectSlots = (LPALDELETEAUXILIARYEFFECTSLOTS)alGetProcAddress("alDeleteAuxiliaryEffectSlots"); alDeleteFilters = (LPALDELETEFILTERS)alGetProcAddress("alDeleteFilters"); const auto fail = [](const string& name) -> bool { a2e_error("failed to get function pointer for \"%s\"!", name); return false; }; if(alGenEffects == nullptr) return fail("alGenEffects"); if(alDeleteEffects == nullptr) return fail("alDeleteEffects"); if(alIsEffect == nullptr) return fail("alIsEffect"); if(alGenAuxiliaryEffectSlots == nullptr) return fail("alGenAuxiliaryEffectSlots"); if(alEffecti == nullptr) return fail("alEffecti"); if(alEffectf == nullptr) return fail("alEffectf"); if(alGenFilters == nullptr) return fail("alGenFilters"); if(alIsFilter == nullptr) return fail("alIsFilter"); if(alFilteri == nullptr) return fail("alFilteri"); if(alFilterf == nullptr) return fail("alFilterf"); if(alAuxiliaryEffectSloti == nullptr) return fail("alAuxiliaryEffectSloti"); if(alDeleteAuxiliaryEffectSlots == nullptr) return fail("alDeleteAuxiliaryEffectSlots"); if(alDeleteFilters == nullptr) return fail("alDeleteFilters"); return true; }
int main(int argc, char *argv[]) { // initialize the engine e = new engine(argv[0], (const char*)"../../data/"); e->init(); e->set_caption(APPLICATION_NAME); const xml::xml_doc& config_doc = e->get_config_doc(); // init class pointers fio = e->get_file_io(); eevt = e->get_event(); egfx = e->get_gfx(); t = e->get_texman(); ocl = e->get_opencl(); exts = e->get_ext(); s = e->get_shader(); r = e->get_rtt(); f = new fft(config_doc.get<bool>("config.audio.fake_spectrum", false)); ah = new audio_handler(f, config_doc.get<bool>("config.audio.playback", false)); sce = new scene(e); cam = new camera(e); // for debugging purposes debug_tex = a2e_texture(new texture_object()); debug_tex->width = e->get_width(); debug_tex->height = e->get_height(); // compile additional shaders const string ar_shaders[][2] = { { "IR_GP_SKINNING", "inferred/gp_skinning.a2eshd" }, { "IR_MP_SKINNING", "inferred/mp_skinning.a2eshd" }, { "RTT_MESH", "misc/rtt_mesh.a2eshd" }, { "PARTICLE DEBUG", "particle/particle_debug.a2eshd" }, { "MOTION BLUR", "misc/motion_blur.a2eshd" }, }; for(size_t i = 0; i < A2E_ARRAY_LENGTH(ar_shaders); i++) { if(!s->add_a2e_shader(ar_shaders[i][0], ar_shaders[i][1])) { a2e_error("couldn't add a2e-shader \"%s\"!", ar_shaders[i][1]); done = true; } } // compile additional kernels const string ar_kernels[][4] = { // identifier, kernel name, file name, build options { "PARTICLE MESH INIT", "particle_init", "particle_mesh_spawn.cl", " -DA2E_PARTICLE_INIT" }, { "PARTICLE MESH RESPAWN", "particle_respawn", "particle_mesh_spawn.cl", "" }, { "PARTICLE MESH COMPUTE", "particle_compute", "particle_mesh_compute.cl", " -DSPECTRUM_WIDTH="+size_t2string(FFT_CL_BUFFER_WIDTH) +" -DSPECTRUM_HEIGHT="+size_t2string(FFT_CL_BUFFER_HEIGHT) }, }; for(size_t i = 0; i < A2E_ARRAY_LENGTH(ar_kernels); i++) { bool success = ocl->add_kernel_file(ar_kernels[i][0], ocl->make_kernel_path(ar_kernels[i][2].c_str()), ar_kernels[i][1], ar_kernels[i][3].c_str()) != nullptr; if(!success) { a2e_error("couldn't add opencl kernel \"%s\"!", ar_kernels[i][2]); done = true; } } // initialize the camera cam->set_rotation_speed(300.0f); cam->set_cam_speed(5.0f); cam->set_mouse_input(false); cam->set_keyboard_input(true); cam->set_wasd_input(true); // get camera settings from the config const float3 cam_pos = config_doc.get<float3>("config.camera.position", float3(0.0f, -12.0f, -5.0f)); const float2 cam_rot = config_doc.get<float2>("config.camera.rotation", float2(0.0f, 180.0f)); cam->set_position(cam_pos); cam->set_rotation(cam_rot.x, cam_rot.y, 0.0f); // create the scene create_scene(); // load model model_loader ml(e); bmodel = ml.load(e->data_path("NI-Elem.txt"), e->data_path("NI-Vrts.txt"), e->data_path("NI-Tex0.txt"), e->data_path("NI-boneW.txt"), e->data_path("NI-boneI.txt"), e->data_path("NI-bindMatrix-CM.txt")); // render mesh border and mesh push mesh_border* mb = new mesh_border(); mb->load(e->data_path("NI-Border1.txt"), e->data_path("NI-Border2.txt")); mb->render(); mesh_push* mp = new mesh_push(); mp->load(e->data_path("NI-Push1.txt")); mp->render(); // init openni const string oni_file = (argc > 1 ? string(argv[1]) : ""); const int init_ret = ni_handler::init(oni_file, mb, mp); if(init_ret != XN_STATUS_OK) { a2e_error("couldn't initialize openni: %u", init_ret); done = true; } // add event handlers event::handler key_handler_fnctr(&key_handler); eevt->add_event_handler(key_handler_fnctr, EVENT_TYPE::KEY_DOWN, EVENT_TYPE::KEY_UP, EVENT_TYPE::KEY_PRESSED); event::handler mouse_handler_fnctr(&mouse_handler); eevt->add_event_handler(mouse_handler_fnctr, EVENT_TYPE::MOUSE_RIGHT_CLICK); event::handler quit_handler_fnctr(&quit_handler); eevt->add_event_handler(quit_handler_fnctr, EVENT_TYPE::QUIT); // additional debug stuff const int2 buffer_size(1024); particle_debug_fbo = r->add_buffer(buffer_size.x, buffer_size.y, GL_TEXTURE_2D, texture_object::TF_POINT, rtt::TAA_NONE, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT, 1, rtt::DT_NONE); // main loop while(!done) { // event handling eevt->handle_events(); // set caption (app name and fps count) if(e->is_new_fps_count()) { static stringstream caption; caption << APPLICATION_NAME << " | FPS: " << e->get_fps(); caption << " | Cam: " << float3(-*e->get_position()); caption << " " << cam->get_rotation(); e->set_caption(caption.str().c_str()); core::reset(&caption); } // render e->start_draw(); if(ni_update) ni->run(); cam->run(); sce->draw(); // draw debug texture if(show_debug_texture) { if(debug_tex->width > 0 && debug_tex->height > 0) { e->start_2d_draw(); size_t draw_width = debug_tex->width, draw_height = debug_tex->height; float ratio = float(draw_width) / float(draw_height); float scale = 1.0f; if(ratio >= 1.0f && draw_width > e->get_width()) { scale = float(e->get_width()) / float(draw_width); } else if(ratio < 1.0f && draw_height > e->get_height()) { scale = float(e->get_height()) / float(draw_height); } draw_width *= scale; draw_height *= scale; egfx->draw_textured_color_rectangle(gfx::rect(0, 0, (unsigned int)draw_width, (unsigned int)draw_height), coord(0.0f, 1.0f), coord(1.0f, 0.0f), float4(1.0f, 1.0f, 1.0f, 0.0f), float4(0.0f, 0.0f, 0.0f, 1.0f), debug_tex->tex()); e->stop_2d_draw(); } } e->stop_draw(); // for debugging purposes only: reset players (set if opencl kernels are reloaded) if(debug_players_reset) { debug_players_reset = false; ni_handler::reset_players(); } } debug_tex->tex_num = 0; // cleanup eevt->remove_event_handler(key_handler_fnctr); eevt->remove_event_handler(mouse_handler_fnctr); eevt->remove_event_handler(quit_handler_fnctr); r->delete_buffer(particle_debug_fbo); delete ah; delete f; ni_handler::destroy(); delete mb; delete mp; delete mat; for(const auto& model : models) { delete model; } models.clear(); for(const auto& l : lights) { delete l; } lights.clear(); if(bmodel != nullptr) delete bmodel; delete sce; delete cam; delete e; return 0; }
bus_model* model_loader::load(const string& elements_filename, const string& vertices_filename, const string& tex_coords_filename, const string& bone_weights_filename, const string& bone_indices_filename, const string& matrices_filename) { stringstream elements_buffer, vertices_buffer, tc_buffer, bweights_buffer, bindices_buffer, bm_buffer; if(!f->file_to_buffer(elements_filename, elements_buffer)) { a2e_error("couldn't open elements file: %s!", elements_filename); return nullptr; } if(!f->file_to_buffer(vertices_filename, vertices_buffer)) { a2e_error("couldn't open vertices file: %s!", vertices_filename); return nullptr; } if(!f->file_to_buffer(tex_coords_filename, tc_buffer)) { a2e_error("couldn't open tex coords file: %s!", tex_coords_filename); return nullptr; } if(!f->file_to_buffer(bone_weights_filename, bweights_buffer)) { a2e_error("couldn't open bone weights file: %s!", bone_weights_filename); return nullptr; } if(!f->file_to_buffer(bone_indices_filename, bindices_buffer)) { a2e_error("couldn't open bone indices file: %s!", bone_indices_filename); return nullptr; } if(!f->file_to_buffer(matrices_filename, bm_buffer)) { a2e_error("couldn't open matrices file: %s!", matrices_filename); return nullptr; } stringstream normals_buffer; if(!f->file_to_buffer(e->data_path("NI-Nrms.txt"), normals_buffer)) { a2e_error("couldn't open normals file: %s!", "NI-Nrms.txt"); return nullptr; } // bus_model* bmodel = new bus_model(); static const float model_scale = 1.0f/10.0f; // read elements while(!elements_buffer.eof() && elements_buffer.good()) { unsigned int i1, i2, i3; elements_buffer >> i1; elements_buffer >> i2; elements_buffer >> i3; bmodel->indices.push_back(index3(i1, i2, i3)); } // read vertices while(!vertices_buffer.eof() && vertices_buffer.good()) { float x, y, z; vertices_buffer >> x; vertices_buffer >> y; vertices_buffer >> z; bmodel->vertices.push_back(float3(x, y, z)); } // read tex coords while(!tc_buffer.eof() && tc_buffer.good()) { float u, v; tc_buffer >> u; tc_buffer >> v; bmodel->tex_coords.push_back(coord(u, v)); } // read normals while(!normals_buffer.eof() && normals_buffer.good()) { float x, y, z; normals_buffer >> x; normals_buffer >> y; normals_buffer >> z; bmodel->normals.push_back(float3(x, y, z)); } // read bone weights while(!bweights_buffer.eof() && bweights_buffer.good()) { float wx, wy, wz, ww; bweights_buffer >> wx; bweights_buffer >> wy; bweights_buffer >> wz; bweights_buffer >> ww; bmodel->bone_weights.push_back(float4(wx, wy, wz, ww)); } // read bone indices while(!bindices_buffer.eof() && bindices_buffer.good()) { unsigned int ix, iy, iz, iw; bindices_buffer >> ix; bindices_buffer >> iy; bindices_buffer >> iz; bindices_buffer >> iw; bmodel->bone_indices.push_back(uint4(ix, iy, iz, iw)); } // read inverse matrices size_t m_index = 0; while(!bm_buffer.eof() && bm_buffer.good()) { if(m_index >= PLAYERS_BONE_COUNT) { a2e_error("too many matrices!"); return nullptr; } matrix4f m; for(size_t i = 0; i < 16; i++) { bm_buffer >> m.data[i]; } bmodel->matrices[m_index] = m; bmodel->matrices[m_index][3] *= model_scale; bmodel->matrices[m_index][7] *= model_scale; bmodel->matrices[m_index][11] *= model_scale; bmodel->joint_positions[m_index].set(-m.data[3], -m.data[7], -m.data[11], 1.0f); m_index++; } if(m_index != PLAYERS_BONE_COUNT) { a2e_error("too few matrices!"); return nullptr; } for(auto& vertex : bmodel->vertices) { vertex *= model_scale; } a2e_debug("model (%s, %s, %s) read: #indices: %u, #vertices: %u, #normals, %u, #tex coords: %u", elements_filename, vertices_filename, tex_coords_filename, bmodel->indices.size(), bmodel->vertices.size(), bmodel->normals.size(), bmodel->tex_coords.size()); return bmodel; }
void kernel_to_ptx(const string& identifier, const string& file_name, const string& func_name, std::function<string(const CC_TARGET&)> additional_options_fnc) { a2e_debug("compiling \"%s\" kernel!", identifier); // stringstream buffer(stringstream::in | stringstream::out); if(!file_io::file_to_buffer(file_name, buffer)) { a2e_error("failed to read kernel source!"); return; } const string src(buffer.str()); // nvcc compile for(const auto& target : cc_targets) { const string cc_target_str = target.second; // generate options string options = "-I " + kernel_path; string nvcc_log = ""; const string additional_options(additional_options_fnc(target.first)); if(!additional_options.empty()) { // convert all -DDEFINEs to -D DEFINE options += " " + core::find_and_replace(additional_options, "-D", "-D "); } // add kernel const string tmp_name = "/tmp/cudacl_tmp_"+identifier+"_"+cc_target_str+"_"+size_t2string(SDL_GetPerformanceCounter()); vector<cudacl_kernel_info> kernels_info; string cuda_source = ""; cudacl_translate(tmp_name, src.c_str(), options, cuda_source, kernels_info); // create tmp cu file fstream cu_file(tmp_name+".cu", fstream::out); cu_file << cuda_source << endl; cu_file.close(); // string build_cmd = "/usr/local/cuda/bin/nvcc --ptx --machine 64 -arch sm_" + cc_target_str + " -O3"; build_cmd += " " + options; // build_cmd += " -D NVIDIA"; build_cmd += " -D GPU"; build_cmd += " -D PLATFORM_NVIDIA"; build_cmd += " -o "+tmp_name+".ptx"; build_cmd += " "+tmp_name+".cu"; //build_cmd += " 2>&1"; core::system(build_cmd.c_str(), nvcc_log); // read ptx stringstream ptx_buffer(stringstream::in | stringstream::out); if(!file_io::file_to_buffer(tmp_name+".ptx", ptx_buffer)) { a2e_error("ptx file \"%s\" doesn't exist!", tmp_name+".ptx"); return; } // write to cache // ptx: file_io ptx_out(cache_path+identifier+"_"+cc_target_str+".ptx", file_io::OPEN_TYPE::WRITE); if(!ptx_out.is_open()) { a2e_error("couldn't create ptx cache file for %s!", identifier); return; } const string ptx_data(ptx_buffer.str()); ptx_out.write_block(ptx_data.c_str(), ptx_data.size()); ptx_out.close(); // kernel info: file_io info_out(cache_path+identifier+"_info_"+cc_target_str+".txt", file_io::OPEN_TYPE::WRITE); if(!info_out.is_open()) { a2e_error("couldn't create kernel info cache file for %s!", identifier); return; } auto& info_stream = *info_out.get_filestream(); bool found = false; for(const auto& info : kernels_info) { if(info.name == func_name) { found = true; info_stream << info.name << " " << info.parameters.size(); for(const auto& param : info.parameters) { info_stream << " " << get<0>(param); info_stream << " " << (unsigned int)get<1>(param); info_stream << " " << (unsigned int)get<2>(param); info_stream << " " << (unsigned int)get<3>(param); } break; } } info_out.close(); if(!found) a2e_error("kernel function \"%s\" does not exist in source file!", func_name); } task_counter--; }
int main(int argc, char *argv[]) { logger::init(); a2e_log("kernelcacher v%u.%u.%u - %s %s", KERNELCACHER_MAJOR_VERSION, KERNELCACHER_MINOR_VERSION, KERNELCACHER_REVISION_VERSION, KERNELCACHER_BUILT_DATE, KERNELCACHER_BUILT_TIME); string usage = "usage: kernelcacher /path/to/data/kernels"; if(argc == 1) { a2e_error("no kernel path specified!\n%s", usage.c_str()); return 0; } kernel_path = argv[1]; if(kernel_path.back() != '/') kernel_path.push_back('/'); cache_path = kernel_path.substr(0, kernel_path.rfind('/', kernel_path.length()-2)) + "/cache/"; a2e_debug("caching kernels from \"%s\" to \"%s\" ...", kernel_path, cache_path); // compile kernels const string lsl_sm_1x_str = " -DLOCAL_SIZE_LIMIT=512"; const string lsl_sm_20p_str = " -DLOCAL_SIZE_LIMIT=1024"; vector<tuple<string, string, string, std::function<string(const CC_TARGET&)>>> internal_kernels { { make_tuple("PARTICLE_INIT", "particle_spawn.cl", "particle_init", [](const CC_TARGET&) { return " -DA2E_PARTICLE_INIT"; }), make_tuple("PARTICLE_RESPAWN", "particle_spawn.cl", "particle_respawn", [](const CC_TARGET&) { return ""; }), make_tuple("PARTICLE_COMPUTE", "particle_compute.cl", "particle_compute", [](const CC_TARGET&) { return ""; }), make_tuple("PARTICLE_SORT_LOCAL", "particle_sort.cl", "bitonicSortLocal", [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }), make_tuple("PARTICLE_SORT_MERGE_GLOBAL", "particle_sort.cl", "bitonicMergeGlobal", [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }), make_tuple("PARTICLE_SORT_MERGE_LOCAL", "particle_sort.cl", "bitonicMergeLocal", [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }), make_tuple("PARTICLE_COMPUTE_DISTANCES", "particle_sort.cl", "compute_distances", [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }), } }; task_counter = internal_kernels.size() + 1; for(const auto& int_kernel : internal_kernels) { task::spawn([=]() { kernel_to_ptx(get<0>(int_kernel), kernel_path+get<1>(int_kernel), get<2>(int_kernel), get<3>(int_kernel)); }); } // task::spawn([=]() { file_io crc_file(cache_path+"CACHECRC", file_io::OPEN_TYPE::WRITE); if(!crc_file.is_open()) { a2e_error("couldn't create crc file!"); return; } auto& crc_fstream = *crc_file.get_filestream(); const auto kernel_files = core::get_file_list(kernel_path); for(const auto& kfile : kernel_files) { if(kfile.first[0] == '.') continue; stringstream buffer(stringstream::in | stringstream::out); if(!file_io::file_to_buffer(kernel_path+kfile.first, buffer)) { a2e_error("failed to read kernel source!"); return; } const string src(buffer.str()); const unsigned int crc = crc32(crc32(0L, Z_NULL, 0), (const Bytef*)src.c_str(), (uInt)src.size()); crc_fstream << kfile.first << " " << hex << crc << dec << endl; } crc_file.close(); task_counter--; }); // while(task_counter != 0) { SDL_Delay(100); } // done! logger::destroy(); return 0; }