void db_print_nl_buff(size_t start, size_t limit) { size_t i; fprintf(Debug_File, "\n**** DB_NL_BUFF ****\n"); if (!nl_buff) { fprintf(Debug_File, ">>>> NO NL_BUFF\n\n"); return; } if (start > nl_free) { fprintf(Debug_File, ">>>> start (%s) > nl_free (%s)\n\n", size_t2string(start), size_t2string(nl_free) ); return; } if (limit > nl_free) { fprintf(Debug_File, ">>>> limit (%s) > nl_free (%s)\n\n", size_t2string(limit), size_t2string(nl_free) ); return; } fprintf(Debug_File, "nl_buff: %s entries:\n", size_t2string(nl_free)); for (i = start; i < limit; i++) { struct newline *nl = &nl_buff[i]; fprintf(Debug_File, "nl_tk_diff = %d\n", nl->nl_tk_diff); } fprintf(Debug_File, "\n"); }
static void db_print_text(const struct text *txt) { /* prints a text (in compressed form) */ size_t i; fprintf(Debug_File, "\n\n**** DB_PRINT_TEXT ****\n"); fprintf(Debug_File, "File \"%s\", %s %ss, ", txt->tx_fname, size_t2string(txt->tx_limit - txt->tx_start), Token_Name ); fprintf(Debug_File, "txt->tx_start = %s, txt->tx_limit = %s\n", size_t2string(txt->tx_start), size_t2string(txt->tx_limit) ); int BoL = 1; for (i = txt->tx_start; i < txt->tx_limit; i++) { if (BoL) { fprintf(Debug_File, "[%s]:", size_t2string(i)); BoL = 0; } fprintf(Debug_File, " "); fprint_token(Debug_File, Token_Array[i]); if ((i - txt->tx_start + 1) % 10 == 0) { fprintf(Debug_File, "\n"); BoL = 1; } } fprintf(Debug_File, "\n"); }
void db_run_info(const char *msg, const struct run *run, int lines_too) { const struct chunk *cnk0 = &run->rn_chunk0; const struct chunk *cnk1 = &run->rn_chunk1; if (msg) { fprintf(Debug_File, "%s: ", msg); } fprintf(Debug_File, "\"%s\" / \"%s\":\n", cnk0->ch_text->tx_fname, cnk1->ch_text->tx_fname ); fprintf(Debug_File, "from %s %s/%s to %s/%s:", token_name, size_t2string(cnk0->ch_first.ps_tk_cnt), size_t2string(cnk1->ch_first.ps_tk_cnt), size_t2string(cnk0->ch_last.ps_tk_cnt), size_t2string(cnk1->ch_last.ps_tk_cnt) ); if (lines_too) { fprintf(Debug_File, " from lines %s/%s to %s/%s:", size_t2string(cnk0->ch_first.ps_nl_cnt), size_t2string(cnk1->ch_first.ps_nl_cnt), size_t2string(cnk0->ch_last.ps_nl_cnt), size_t2string(cnk1->ch_last.ps_nl_cnt) ); } fprintf(Debug_File, " %s %s%s\n", size_t2string(run->rn_size), token_name, (run->rn_size == 1 ? "" : "s") ); }
string oclraster_program::create_entry_function_parameters() const { const string fixed_params = get_fixed_entry_function_parameters(); string entry_function_params = ""; for(size_t i = 0, struct_count = structs.size(); i < struct_count; i++) { if(structs[i]->type != STRUCT_TYPE::BUFFERS) { const string qualifier = get_qualifier_for_struct_type(structs[i]->type); if(qualifier != "") entry_function_params += qualifier + " "; entry_function_params += structs[i]->name + "* " + structs[i]->object_name + ", "; } else { for(size_t j = 0, buffer_entries = structs[i]->variables.size(); j < buffer_entries; j++) { entry_function_params += "global " + structs[i]->variable_types[j] + " " + structs[i]->variables[j] + ", "; } } } for(size_t i = 0, image_count = images.image_names.size(); i < image_count; i++) { // framebuffer is passed in separately if(images.is_framebuffer[i]) continue; // for images: only add a placeholder (will be replaced by the actual image type later on) entry_function_params += "###OCLRASTER_IMAGE_"+size_t2string(i)+"###, "; } entry_function_params += fixed_params; return entry_function_params; }
static void fprint_count(FILE *f, size_t cnt, const char *unit) { /* Prints a grammatically correct string "%u %s[s]" for units that form their plural by suffixing -s. */ fprintf(f, "%s %s%s", size_t2string(cnt), unit, (cnt == 1 ? "" : "s")); }
int main(int argc, char *argv[]) { // initialize the engine e = new engine(argv[0], (const char*)"../../data/"); e->init(); e->set_caption(APPLICATION_NAME); const xml::xml_doc& config_doc = e->get_config_doc(); // init class pointers fio = e->get_file_io(); eevt = e->get_event(); egfx = e->get_gfx(); t = e->get_texman(); ocl = e->get_opencl(); exts = e->get_ext(); s = e->get_shader(); r = e->get_rtt(); f = new fft(config_doc.get<bool>("config.audio.fake_spectrum", false)); ah = new audio_handler(f, config_doc.get<bool>("config.audio.playback", false)); sce = new scene(e); cam = new camera(e); // for debugging purposes debug_tex = a2e_texture(new texture_object()); debug_tex->width = e->get_width(); debug_tex->height = e->get_height(); // compile additional shaders const string ar_shaders[][2] = { { "IR_GP_SKINNING", "inferred/gp_skinning.a2eshd" }, { "IR_MP_SKINNING", "inferred/mp_skinning.a2eshd" }, { "RTT_MESH", "misc/rtt_mesh.a2eshd" }, { "PARTICLE DEBUG", "particle/particle_debug.a2eshd" }, { "MOTION BLUR", "misc/motion_blur.a2eshd" }, }; for(size_t i = 0; i < A2E_ARRAY_LENGTH(ar_shaders); i++) { if(!s->add_a2e_shader(ar_shaders[i][0], ar_shaders[i][1])) { a2e_error("couldn't add a2e-shader \"%s\"!", ar_shaders[i][1]); done = true; } } // compile additional kernels const string ar_kernels[][4] = { // identifier, kernel name, file name, build options { "PARTICLE MESH INIT", "particle_init", "particle_mesh_spawn.cl", " -DA2E_PARTICLE_INIT" }, { "PARTICLE MESH RESPAWN", "particle_respawn", "particle_mesh_spawn.cl", "" }, { "PARTICLE MESH COMPUTE", "particle_compute", "particle_mesh_compute.cl", " -DSPECTRUM_WIDTH="+size_t2string(FFT_CL_BUFFER_WIDTH) +" -DSPECTRUM_HEIGHT="+size_t2string(FFT_CL_BUFFER_HEIGHT) }, }; for(size_t i = 0; i < A2E_ARRAY_LENGTH(ar_kernels); i++) { bool success = ocl->add_kernel_file(ar_kernels[i][0], ocl->make_kernel_path(ar_kernels[i][2].c_str()), ar_kernels[i][1], ar_kernels[i][3].c_str()) != nullptr; if(!success) { a2e_error("couldn't add opencl kernel \"%s\"!", ar_kernels[i][2]); done = true; } } // initialize the camera cam->set_rotation_speed(300.0f); cam->set_cam_speed(5.0f); cam->set_mouse_input(false); cam->set_keyboard_input(true); cam->set_wasd_input(true); // get camera settings from the config const float3 cam_pos = config_doc.get<float3>("config.camera.position", float3(0.0f, -12.0f, -5.0f)); const float2 cam_rot = config_doc.get<float2>("config.camera.rotation", float2(0.0f, 180.0f)); cam->set_position(cam_pos); cam->set_rotation(cam_rot.x, cam_rot.y, 0.0f); // create the scene create_scene(); // load model model_loader ml(e); bmodel = ml.load(e->data_path("NI-Elem.txt"), e->data_path("NI-Vrts.txt"), e->data_path("NI-Tex0.txt"), e->data_path("NI-boneW.txt"), e->data_path("NI-boneI.txt"), e->data_path("NI-bindMatrix-CM.txt")); // render mesh border and mesh push mesh_border* mb = new mesh_border(); mb->load(e->data_path("NI-Border1.txt"), e->data_path("NI-Border2.txt")); mb->render(); mesh_push* mp = new mesh_push(); mp->load(e->data_path("NI-Push1.txt")); mp->render(); // init openni const string oni_file = (argc > 1 ? string(argv[1]) : ""); const int init_ret = ni_handler::init(oni_file, mb, mp); if(init_ret != XN_STATUS_OK) { a2e_error("couldn't initialize openni: %u", init_ret); done = true; } // add event handlers event::handler key_handler_fnctr(&key_handler); eevt->add_event_handler(key_handler_fnctr, EVENT_TYPE::KEY_DOWN, EVENT_TYPE::KEY_UP, EVENT_TYPE::KEY_PRESSED); event::handler mouse_handler_fnctr(&mouse_handler); eevt->add_event_handler(mouse_handler_fnctr, EVENT_TYPE::MOUSE_RIGHT_CLICK); event::handler quit_handler_fnctr(&quit_handler); eevt->add_event_handler(quit_handler_fnctr, EVENT_TYPE::QUIT); // additional debug stuff const int2 buffer_size(1024); particle_debug_fbo = r->add_buffer(buffer_size.x, buffer_size.y, GL_TEXTURE_2D, texture_object::TF_POINT, rtt::TAA_NONE, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT, 1, rtt::DT_NONE); // main loop while(!done) { // event handling eevt->handle_events(); // set caption (app name and fps count) if(e->is_new_fps_count()) { static stringstream caption; caption << APPLICATION_NAME << " | FPS: " << e->get_fps(); caption << " | Cam: " << float3(-*e->get_position()); caption << " " << cam->get_rotation(); e->set_caption(caption.str().c_str()); core::reset(&caption); } // render e->start_draw(); if(ni_update) ni->run(); cam->run(); sce->draw(); // draw debug texture if(show_debug_texture) { if(debug_tex->width > 0 && debug_tex->height > 0) { e->start_2d_draw(); size_t draw_width = debug_tex->width, draw_height = debug_tex->height; float ratio = float(draw_width) / float(draw_height); float scale = 1.0f; if(ratio >= 1.0f && draw_width > e->get_width()) { scale = float(e->get_width()) / float(draw_width); } else if(ratio < 1.0f && draw_height > e->get_height()) { scale = float(e->get_height()) / float(draw_height); } draw_width *= scale; draw_height *= scale; egfx->draw_textured_color_rectangle(gfx::rect(0, 0, (unsigned int)draw_width, (unsigned int)draw_height), coord(0.0f, 1.0f), coord(1.0f, 0.0f), float4(1.0f, 1.0f, 1.0f, 0.0f), float4(0.0f, 0.0f, 0.0f, 1.0f), debug_tex->tex()); e->stop_2d_draw(); } } e->stop_draw(); // for debugging purposes only: reset players (set if opencl kernels are reloaded) if(debug_players_reset) { debug_players_reset = false; ni_handler::reset_players(); } } debug_tex->tex_num = 0; // cleanup eevt->remove_event_handler(key_handler_fnctr); eevt->remove_event_handler(mouse_handler_fnctr); eevt->remove_event_handler(quit_handler_fnctr); r->delete_buffer(particle_debug_fbo); delete ah; delete f; ni_handler::destroy(); delete mb; delete mp; delete mat; for(const auto& model : models) { delete model; } models.clear(); for(const auto& l : lights) { delete l; } lights.clear(); if(bmodel != nullptr) delete bmodel; delete sce; delete cam; delete e; return 0; }
void oclraster_program::process_program(const string& raw_code, const kernel_spec default_spec) { // preprocess const string code = preprocess_code(raw_code); // parse static const array<const pair<const char*, const STRUCT_TYPE>, 6> oclraster_struct_types { { { u8"oclraster_in", STRUCT_TYPE::INPUT }, { u8"oclraster_out", STRUCT_TYPE::OUTPUT }, { u8"oclraster_uniforms", STRUCT_TYPE::UNIFORMS }, { u8"oclraster_buffers", STRUCT_TYPE::BUFFERS }, { u8"oclraster_images", STRUCT_TYPE::IMAGES }, { u8"oclraster_framebuffer", STRUCT_TYPE::FRAMEBUFFER } } }; static const set<const string> specifiers { "read_only", "write_only", "read_write" }; // current oclraster_struct grammar limitations/requirements: // * no interior/nested structs/unions // * no multi-variable declarations (e.g. "float x, y, z;") // * no __attribute__ (oclraster_structs already have a __attribute__ qualifier) // * use of any oclraster_struct specifier in other places is disallowed (no typedefs, comments, ...) // * otherwise standard OpenCL C // // example: // oclraster_in vertex_input { // float4 vertex; // float4 normal; // float2 tex_coord; // } inputs; // vector<size2> image_struct_positions; try { // parse and extract for(const auto& type : oclraster_struct_types) { size_t struct_pos = 0; while((struct_pos = code.find(type.first, struct_pos)) != string::npos) { // find first ' ' space char and open '{' bracket and extract the structs name const size_t space_pos = code.find_first_of(" {", struct_pos); const size_t open_bracket_pos = code.find("{", struct_pos); if(space_pos == string::npos || code[space_pos] == '{') throw oclraster_exception("no struct name"); if(open_bracket_pos == string::npos) throw oclraster_exception("no struct open bracket"); const string struct_name = core::trim(code.substr(space_pos+1, open_bracket_pos-space_pos-1)); //oclr_msg("struct type: \"%s\"", type.first); //oclr_msg("struct name: \"%s\"", struct_name); // open/close bracket match size_t bracket_pos = open_bracket_pos; size_t open_bracket_count = 1; while(open_bracket_count > 0) { bracket_pos = code.find_first_of("{}", bracket_pos + 1); if(bracket_pos == string::npos) throw oclraster_exception("struct open/close bracket mismatch"); code[bracket_pos] == '{' ? open_bracket_count++ : open_bracket_count--; } const size_t close_bracket_pos = bracket_pos; // const size_t end_semicolon_pos = code.find(";", close_bracket_pos+1); if(end_semicolon_pos == string::npos) { throw oclraster_exception("end-semicolon missing from struct \""+struct_name+"\"!"); } const string object_name = core::trim(code.substr(close_bracket_pos+1, end_semicolon_pos-close_bracket_pos-1)); //oclr_msg("object name: \"%s\"", object_name); // string struct_interior = code.substr(open_bracket_pos+1, close_bracket_pos-open_bracket_pos-1); //oclr_msg("struct interior:\n\t%s\n", struct_interior); // strip unnecessary whitespace and comments, and condense static const regex rx_space("\\s+", regex::optimize); static const regex rx_semicolon_space("[ ]*;[ ]*", regex::optimize); static const regex rx_newline("\n|\r", regex::optimize); static const regex rx_comments_sl("//(.*)", regex::optimize); static const regex rx_comments_ml("/\\*(.*)\\*/", regex::optimize); struct_interior = regex_replace(struct_interior, rx_comments_sl, ""); struct_interior = regex_replace(struct_interior, rx_newline, ""); struct_interior = regex_replace(struct_interior, rx_comments_ml, ""); struct_interior = regex_replace(struct_interior, rx_space, " "); struct_interior = regex_replace(struct_interior, rx_semicolon_space, ";"); struct_interior = core::trim(struct_interior); //oclr_msg("post-regex interior: >%s<", struct_interior); // extract all member variables vector<string> variable_names, variable_types, variable_specifiers; size_t semicolon_pos = 0, last_semicolon_pos = 0; while((semicolon_pos = struct_interior.find(";", last_semicolon_pos)) != string::npos) { const string var_decl = struct_interior.substr(last_semicolon_pos, semicolon_pos-last_semicolon_pos); //oclr_msg("decl: >%s<", var_decl); const size_t name_start_pos = var_decl.rfind(" "); if(name_start_pos == string::npos) { throw oclraster_exception("invalid variable declaration: \""+var_decl+"\""); } const string var_name = var_decl.substr(name_start_pos+1, var_decl.length()-name_start_pos-1); //oclr_msg("name: >%s<", var_name); variable_names.emplace_back(var_name); // check if type has an additional specifier (for images: read_only, write_only, read_write) const size_t type_start_pos = var_decl.find(" "); const string start_token = var_decl.substr(0, type_start_pos); if(specifiers.find(start_token) != specifiers.end()) { const string var_type = regex_replace(var_decl.substr(type_start_pos+1, name_start_pos-type_start_pos-1), rx_space, ""); // need to strip any whitespace //oclr_msg("type (s): >%s<", var_type); variable_types.emplace_back(var_type); const string var_spec = var_decl.substr(0, type_start_pos); //oclr_msg("spec: >%s<", var_spec); variable_specifiers.emplace_back(var_spec); } else { const string var_type = core::trim(var_decl.substr(0, name_start_pos)); //oclr_msg("type: >%s<", var_type); variable_types.emplace_back(var_type); variable_specifiers.emplace_back(""); } // continue last_semicolon_pos = semicolon_pos+1; } // create info struct if(type.second != STRUCT_TYPE::IMAGES && type.second != STRUCT_TYPE::FRAMEBUFFER) { const bool empty = (variable_names.size() == 0); // can't use variable_names when moving structs.push_back(new oclraster_struct_info { type.second, size2(struct_pos, end_semicolon_pos+1), struct_name, object_name, std::move(variable_names), std::move(variable_types), std::move(variable_specifiers), empty, {} }); } else { image_struct_positions.emplace_back(size2 { struct_pos, end_semicolon_pos+1 }); process_image_struct(variable_names, variable_types, variable_specifiers, (type.second == STRUCT_TYPE::FRAMEBUFFER)); } // continue struct_pos++; } } // process found structs for(auto& oclr_struct : structs) { if(oclr_struct->empty) continue; if(oclr_struct->type == STRUCT_TYPE::BUFFERS) continue; generate_struct_info_cl_program(*oclr_struct); } // order sort(structs.begin(), structs.end(), [](const oclraster_struct_info* info_0, const oclraster_struct_info* info_1) -> bool { return info_0->code_pos.x < info_1->code_pos.x; }); // write framebuffer struct bool has_framebuffer = false; string framebuffer_code = "typedef struct __attribute__((packed)) {\n"; for(size_t i = 0, fb_img_idx = 0, image_count = images.image_names.size(); i < image_count; i++) { if(!images.is_framebuffer[i]) continue; has_framebuffer = true; // TODO: for now, let the access always be read/write, so "const data" (depth) can be modified // between user program calls (downside: user has also read/write access -> create 2 structs?) /*if(images.image_specifiers[i] == ACCESS_TYPE::READ) { framebuffer_code += "const "; }*/ framebuffer_code += "###OCLRASTER_FRAMEBUFFER_IMAGE_" + size_t2string(fb_img_idx) + "### " + images.image_names[i] + ";\n"; fb_img_idx++; } framebuffer_code += "} oclraster_framebuffer;\n"; // recreate structs (in reverse, so that the offsets stay valid) processed_code = code; const size_t struct_count = structs.size() + image_struct_positions.size(); for(size_t i = 0, cur_struct = structs.size(), cur_image = image_struct_positions.size(); i < struct_count; i++) { // figure out which struct comes next (normal struct or image struct) size_t image_code_pos = 0, struct_code_pos = 0; if(cur_image > 0) image_code_pos = image_struct_positions[cur_image-1].x; if(cur_struct > 0) struct_code_pos = structs[cur_struct-1]->code_pos.x; // image if(image_code_pos > struct_code_pos) { cur_image--; processed_code.erase(image_struct_positions[cur_image].x, image_struct_positions[cur_image].y - image_struct_positions[cur_image].x); // insert framebuffer struct code at the last image or framebuffer struct position if(has_framebuffer && cur_image == (image_struct_positions.size()-1)) { processed_code.insert(image_struct_positions[cur_image].x, framebuffer_code); } } // struct else { cur_struct--; const oclraster_struct_info& oclr_struct = *structs[cur_struct]; processed_code.erase(oclr_struct.code_pos.x, oclr_struct.code_pos.y - oclr_struct.code_pos.x); if(!oclr_struct.empty && oclr_struct.type != STRUCT_TYPE::BUFFERS) { string struct_code = ""; switch(oclr_struct.type) { case STRUCT_TYPE::INPUT: struct_code += "oclraster_in"; break; case STRUCT_TYPE::OUTPUT: struct_code += "oclraster_out"; break; case STRUCT_TYPE::UNIFORMS: struct_code += "oclraster_uniforms"; break; case STRUCT_TYPE::BUFFERS: case STRUCT_TYPE::IMAGES: case STRUCT_TYPE::FRAMEBUFFER: oclr_unreachable(); } struct_code += " {\n"; for(size_t var_index = 0; var_index < oclr_struct.variables.size(); var_index++) { struct_code += oclr_struct.variable_types[var_index] + " " + oclr_struct.variables[var_index] + ";\n"; } struct_code += "} " + oclr_struct.name + ";\n"; processed_code.insert(oclr_struct.code_pos.x, struct_code); } } } // remove empty structs for(auto iter = structs.begin(); iter != structs.end();) { if((*iter)->empty) { delete *iter; iter = structs.erase(iter); } else iter++; } // build entry function parameter string const string entry_function_params = create_entry_function_parameters(); // check if entry function exists, and if so, replace it with a modified function name const regex rx_entry_function("("+entry_function+")\\s*\\(\\s*\\)", regex::optimize); if(!regex_search(code, rx_entry_function)) { throw oclraster_exception("entry function \""+entry_function+"\" not found!"); } processed_code = regex_replace(processed_code, rx_entry_function, "OCLRASTER_FUNC oclraster_user_"+entry_function+"("+entry_function_params+")"); // create default/first/hinted image spec, do the final processing and compile kernel_spec spec { default_spec }; if(!images.image_names.empty() && spec.image_spec.size() != images.image_names.size()) { // create kernel image spec for the hinted or default image specs // note: if default_spec already contains some image_spec entries, only insert the remaining ones for(size_t i = spec.image_spec.size(); i < images.image_names.size(); i++) { spec.image_spec.emplace_back(images.image_hints[i].is_valid() ? images.image_hints[i] : image_type { IMAGE_TYPE::UINT_8, IMAGE_CHANNEL::RGBA }); } } else if(images.image_names.empty() && !spec.image_spec.empty()) { // default spec contains image_spec entries, but the are no images -> clear spec.image_spec.clear(); } // else: no images in kernel/program -> just one kernel / "empty image spec" build_kernel(spec); } catch(oclraster_exception& ex) { invalidate(ex.what()); } valid = true; }
string oclraster_program::create_user_kernel_parameters(const kernel_spec& spec, vector<string>& image_decls, const bool const_output) const { // creates user buffer dependent kernel parameters string (buffers will be called "user_buffer_#") string kernel_parameters = ""; size_t user_buffer_count = 0; for(const auto& oclr_struct : structs) { // if(oclr_struct->type == oclraster_program::STRUCT_TYPE::BUFFERS) { for(size_t i = 0, buffer_entries = oclr_struct->variables.size(); i < buffer_entries; i++) { kernel_parameters += "global " + oclr_struct->variable_types[i] + " user_buffer_"+size_t2string(user_buffer_count)+",\n"; user_buffer_count++; } continue; } // switch(oclr_struct->type) { case oclraster_program::STRUCT_TYPE::INPUT: kernel_parameters += "global const "; break; case oclraster_program::STRUCT_TYPE::OUTPUT: kernel_parameters += "global "; if(const_output) kernel_parameters += "const "; break; case oclraster_program::STRUCT_TYPE::UNIFORMS: kernel_parameters += "constant "; break; case oclraster_program::STRUCT_TYPE::BUFFERS: case oclraster_program::STRUCT_TYPE::IMAGES: case oclraster_program::STRUCT_TYPE::FRAMEBUFFER: oclr_unreachable(); } kernel_parameters += oclr_struct->name + "* user_buffer_"+size_t2string(user_buffer_count)+",\n"; user_buffer_count++; } for(size_t i = 0, img_count = images.image_names.size(); i < img_count; i++) { string type_str = ""; if(!spec.image_spec[i].native) { // buffer based image type_str = "global "; if(images.image_specifiers[i] == ACCESS_TYPE::READ && !images.is_framebuffer[i]) { type_str += "const "; } if(spec.image_spec[i].data_type == IMAGE_TYPE::FLOAT_16) { // if cl_khr_fp16 is not supported (-> all implementations ...), half vector types are not supported // and structs containing halfs (the workaround) are not allowed as kernel function parameter types // -> use custom half pointer type for distinction and later type-casting (note: type is correctly aligned) type_str += "oclr_"; } type_str += spec.image_spec[i].to_string(); type_str += "* "; if(images.is_framebuffer[i]) type_str += "oclr_framebuffer_"; } else { // native image if(images.image_specifiers[i] == ACCESS_TYPE::READ) { type_str += "read_only "; } else if(images.image_specifiers[i] == ACCESS_TYPE::WRITE) { type_str += "write_only "; } else { // this shouldn't actually happen and be caught much earlier, but you never know ... oclr_error("native images can not have a read_write access qualifier!"); type_str += "read_only "; // default to read_only } type_str += "image2d_t "; } type_str += images.image_names[i]; kernel_parameters += type_str+",\n"; image_decls.emplace_back(type_str); } return kernel_parameters; }
static void show_run(const struct run *run) { /* The animals came in two by two ... */ const struct chunk *cnk0 = &run->rn_chunk0; const struct chunk *cnk1 = &run->rn_chunk1; size_t nl_cnt0 = cnk0->ch_last.ps_nl_cnt - cnk0->ch_first.ps_nl_cnt; size_t nl_cnt1 = cnk1->ch_last.ps_nl_cnt - cnk1->ch_first.ps_nl_cnt; FILE *f0; FILE *f1; /* display heading of chunk */ if (!is_set_option('d')) { /* no assumptions about the lengths of the file names! */ size_t size = run->rn_size; int pos = 0; pos += pr_head(cnk0); while (pos < max_line_length + 1) { pos += prs(" "); } pos += prs("|"); pos += pr_head(cnk1); while (pos < 2*max_line_length - unslen(size)) { pos += prs(" "); } fprintf(Output_File, "[%s]\n", size_t2string(size)); } else { (void)pr_head(cnk0); fprintf(Output_File, "\n"); (void)pr_head(cnk1); fprintf(Output_File, "\n"); } /* stop if that suffices */ if (is_set_option('n')) return; /* ... had enough so soon ... */ /* open the files that hold the chunks */ f0 = open_chunk(cnk0); f1 = open_chunk(cnk1); /* display the chunks in the required format */ if (!is_set_option('d')) { /* fill 2-column lines and print them */ while (nl_cnt0 != 0 || nl_cnt1 != 0) { if (nl_cnt0) { fill_line(f0, line0); nl_cnt0--; } else { clear_line(line0); } if (nl_cnt1) { fill_line(f1, line1); nl_cnt1--; } else { clear_line(line1); } show_2C_line(line0, line1); } } else { /* display the lines in a diff(1)-like format */ while (nl_cnt0--) { show_1C_line(f0, "<"); } fprintf(Output_File, "---\n"); while (nl_cnt1--) { show_1C_line(f1, ">"); } } /* close the pertinent files */ fclose(f0); fclose(f1); }
static int pru(size_t u) { fprintf(Output_File, "%s", size_t2string(u)); return unslen(u); }
static void show_run(const struct run *run) { max_line_length = Page_Width / 2 - 1; max_line_length_UTF8 = max_line_length * FONT_SIZE; /* The animals came in two by two ... */ const struct chunk *cnk0 = &run->rn_chunk0; const struct chunk *cnk1 = &run->rn_chunk1; size_t nl_cnt0 = cnk0->ch_last.ps_nl_cnt - cnk0->ch_first.ps_nl_cnt; size_t nl_cnt1 = cnk1->ch_last.ps_nl_cnt - cnk1->ch_first.ps_nl_cnt; FILE *f0; FILE *f1; /* display heading of chunk */ if (!is_set_option('d')) { /* no assumptions about the lengths of the file names! */ size_t size = run->rn_size; int pos = print_header(cnk0); print_spaces(max_line_length - pos); print_char('|'); pos = print_header(cnk1); print_spaces(max_line_length - pos - length_size_t(size) - 2); fprintf(Output_File, "[%s]\n", size_t2string(size)); } else { /* diff-like format */ (void)print_header(cnk0); print_char('\n'); (void)print_header(cnk1); print_char('\n'); } /* stop if that suffices */ if (is_set_option('n')) return; /* ... had enough so soon ... */ /* open the files that hold the chunks */ f0 = open_chunk(cnk0); f1 = open_chunk(cnk1); /* display the chunks in the required format */ if (!is_set_option('d')) { /* print 2-column format */ while (nl_cnt0 != 0 || nl_cnt1 != 0) { int pos_UTF8 = 0; if (nl_cnt0) { pos_UTF8 = print_UTF8_line(f0); nl_cnt0--; } print_UTF8_spaces(max_line_length_UTF8 - pos_UTF8); print_char('|'); if (nl_cnt1) { (void)print_UTF8_line(f1); nl_cnt1--; } print_char('\n'); } } else { /* display the chunks in a diff(1)-like format */ while (nl_cnt0--) { show_1C_line(f0, "<"); } (void)print_string("---\n"); while (nl_cnt1--) { show_1C_line(f1, ">"); } } /* close the pertinent files */ fclose(f0); fclose(f1); }
static int print_size_t(size_t u) { fprintf(Output_File, "%s", size_t2string(u)); return length_size_t(u); }
string transform_program::specialized_processing(const string& code, const kernel_spec& spec) { // insert (processed) user code into template program string program_code = template_transform_program; core::find_and_replace(program_code, "//###OCLRASTER_USER_CODE###", code); // vector<string> image_decls; const string kernel_parameters { create_user_kernel_parameters(spec, image_decls, false) }; core::find_and_replace(program_code, "//###OCLRASTER_USER_STRUCTS###", kernel_parameters); // insert main call + prior buffer handling string buffer_handling_code = ""; string output_handling_code = ""; string main_call_parameters = ""; size_t cur_user_buffer = 0; for(const auto& oclr_struct : structs) { const string cur_user_buffer_str = size_t2string(cur_user_buffer); switch(oclr_struct->type) { case oclraster_program::STRUCT_TYPE::INPUT: buffer_handling_code += oclr_struct->name + " user_buffer_element_" + cur_user_buffer_str + " = user_buffer_"+cur_user_buffer_str+"[vertex_id];\n"; main_call_parameters += "&user_buffer_element_" + cur_user_buffer_str + ", "; break; case oclraster_program::STRUCT_TYPE::OUTPUT: buffer_handling_code += oclr_struct->name + " user_buffer_element_" + cur_user_buffer_str + ";\n"; main_call_parameters += "&user_buffer_element_" + cur_user_buffer_str + ", "; for(const auto& var : oclr_struct->variables) { output_handling_code += "user_buffer_" + cur_user_buffer_str + "[instance_vertex_id]." + var + " = "; output_handling_code += "user_buffer_element_" + cur_user_buffer_str + "." + var + ";\n"; } break; case oclraster_program::STRUCT_TYPE::UNIFORMS: buffer_handling_code += ("const " + oclr_struct->name + " user_buffer_element_" + cur_user_buffer_str + " = *user_buffer_" + cur_user_buffer_str + ";\n"); main_call_parameters += "&user_buffer_element_" + cur_user_buffer_str + ", "; break; case oclraster_program::STRUCT_TYPE::BUFFERS: { const size_t buffer_entries = oclr_struct->variables.size(); if(buffer_entries > 0) { for(size_t i = 0; i < buffer_entries; i++) { main_call_parameters += "user_buffer_" + size_t2string(cur_user_buffer) + ", "; cur_user_buffer++; } cur_user_buffer--; // prevent double-increase } } break; case oclraster_program::STRUCT_TYPE::IMAGES: case oclraster_program::STRUCT_TYPE::FRAMEBUFFER: oclr_unreachable(); } cur_user_buffer++; } for(const auto& img : images.image_names) { main_call_parameters += img + ", "; } main_call_parameters += "vertex_id, instance_id, camera_position"; // the same for all transform programs core::find_and_replace(program_code, "//###OCLRASTER_USER_PRE_MAIN_CALL###", buffer_handling_code); core::find_and_replace(program_code, "//###OCLRASTER_USER_MAIN_CALL###", "oclraster_user_"+entry_function+"("+main_call_parameters+");"); core::find_and_replace(program_code, "//###OCLRASTER_USER_OUTPUT_COPY###", output_handling_code); // replace remaining image placeholders for(size_t i = 0, img_count = image_decls.size(); i < img_count; i++) { core::find_and_replace(program_code, "//###OCLRASTER_IMAGE_"+size_t2string(i)+"###", image_decls[i]); } // done //oclr_msg("generated transform user program: %s", program_code); return program_code; }
void kernel_to_ptx(const string& identifier, const string& file_name, const string& func_name, std::function<string(const CC_TARGET&)> additional_options_fnc) { a2e_debug("compiling \"%s\" kernel!", identifier); // stringstream buffer(stringstream::in | stringstream::out); if(!file_io::file_to_buffer(file_name, buffer)) { a2e_error("failed to read kernel source!"); return; } const string src(buffer.str()); // nvcc compile for(const auto& target : cc_targets) { const string cc_target_str = target.second; // generate options string options = "-I " + kernel_path; string nvcc_log = ""; const string additional_options(additional_options_fnc(target.first)); if(!additional_options.empty()) { // convert all -DDEFINEs to -D DEFINE options += " " + core::find_and_replace(additional_options, "-D", "-D "); } // add kernel const string tmp_name = "/tmp/cudacl_tmp_"+identifier+"_"+cc_target_str+"_"+size_t2string(SDL_GetPerformanceCounter()); vector<cudacl_kernel_info> kernels_info; string cuda_source = ""; cudacl_translate(tmp_name, src.c_str(), options, cuda_source, kernels_info); // create tmp cu file fstream cu_file(tmp_name+".cu", fstream::out); cu_file << cuda_source << endl; cu_file.close(); // string build_cmd = "/usr/local/cuda/bin/nvcc --ptx --machine 64 -arch sm_" + cc_target_str + " -O3"; build_cmd += " " + options; // build_cmd += " -D NVIDIA"; build_cmd += " -D GPU"; build_cmd += " -D PLATFORM_NVIDIA"; build_cmd += " -o "+tmp_name+".ptx"; build_cmd += " "+tmp_name+".cu"; //build_cmd += " 2>&1"; core::system(build_cmd.c_str(), nvcc_log); // read ptx stringstream ptx_buffer(stringstream::in | stringstream::out); if(!file_io::file_to_buffer(tmp_name+".ptx", ptx_buffer)) { a2e_error("ptx file \"%s\" doesn't exist!", tmp_name+".ptx"); return; } // write to cache // ptx: file_io ptx_out(cache_path+identifier+"_"+cc_target_str+".ptx", file_io::OPEN_TYPE::WRITE); if(!ptx_out.is_open()) { a2e_error("couldn't create ptx cache file for %s!", identifier); return; } const string ptx_data(ptx_buffer.str()); ptx_out.write_block(ptx_data.c_str(), ptx_data.size()); ptx_out.close(); // kernel info: file_io info_out(cache_path+identifier+"_info_"+cc_target_str+".txt", file_io::OPEN_TYPE::WRITE); if(!info_out.is_open()) { a2e_error("couldn't create kernel info cache file for %s!", identifier); return; } auto& info_stream = *info_out.get_filestream(); bool found = false; for(const auto& info : kernels_info) { if(info.name == func_name) { found = true; info_stream << info.name << " " << info.parameters.size(); for(const auto& param : info.parameters) { info_stream << " " << get<0>(param); info_stream << " " << (unsigned int)get<1>(param); info_stream << " " << (unsigned int)get<2>(param); info_stream << " " << (unsigned int)get<3>(param); } break; } } info_out.close(); if(!found) a2e_error("kernel function \"%s\" does not exist in source file!", func_name); } task_counter--; }