示例#1
0
void kernel_to_ptx(const string& identifier, const string& file_name, const string& func_name,
				   std::function<string(const CC_TARGET&)> additional_options_fnc) {
	a2e_debug("compiling \"%s\" kernel!", identifier);
	
	//
	stringstream buffer(stringstream::in | stringstream::out);
	if(!file_io::file_to_buffer(file_name, buffer)) {
		a2e_error("failed to read kernel source!");
		return;
	}
	const string src(buffer.str());
	
	// nvcc compile
	for(const auto& target : cc_targets) {
		const string cc_target_str = target.second;
		
		// generate options
		string options = "-I " + kernel_path;
		string nvcc_log = "";
		const string additional_options(additional_options_fnc(target.first));
		if(!additional_options.empty()) {
			// convert all -DDEFINEs to -D DEFINE
			options += " " + core::find_and_replace(additional_options, "-D", "-D ");
		}
		
		// add kernel
		const string tmp_name = "/tmp/cudacl_tmp_"+identifier+"_"+cc_target_str+"_"+size_t2string(SDL_GetPerformanceCounter());
		vector<cudacl_kernel_info> kernels_info;
		string cuda_source = "";
		cudacl_translate(tmp_name, src.c_str(), options, cuda_source, kernels_info);
		
		// create tmp cu file
		fstream cu_file(tmp_name+".cu", fstream::out);
		cu_file << cuda_source << endl;
		cu_file.close();
		
		//
		string build_cmd = "/usr/local/cuda/bin/nvcc --ptx --machine 64 -arch sm_" + cc_target_str + " -O3";
		build_cmd += " " + options;
		
		//
		build_cmd += " -D NVIDIA";
		build_cmd += " -D GPU";
		build_cmd += " -D PLATFORM_NVIDIA";
		build_cmd += " -o "+tmp_name+".ptx";
		build_cmd += " "+tmp_name+".cu";
		//build_cmd += " 2>&1";
		core::system(build_cmd.c_str(), nvcc_log);
		
		// read ptx
		stringstream ptx_buffer(stringstream::in | stringstream::out);
		if(!file_io::file_to_buffer(tmp_name+".ptx", ptx_buffer)) {
			a2e_error("ptx file \"%s\" doesn't exist!", tmp_name+".ptx");
			return;
		}
		
		// write to cache
		// ptx:
		file_io ptx_out(cache_path+identifier+"_"+cc_target_str+".ptx", file_io::OPEN_TYPE::WRITE);
		if(!ptx_out.is_open()) {
			a2e_error("couldn't create ptx cache file for %s!", identifier);
			return;
		}
		const string ptx_data(ptx_buffer.str());
		ptx_out.write_block(ptx_data.c_str(), ptx_data.size());
		ptx_out.close();
		
		// kernel info:
		file_io info_out(cache_path+identifier+"_info_"+cc_target_str+".txt", file_io::OPEN_TYPE::WRITE);
		if(!info_out.is_open()) {
			a2e_error("couldn't create kernel info cache file for %s!", identifier);
			return;
		}
		auto& info_stream = *info_out.get_filestream();
		bool found = false;
		for(const auto& info : kernels_info) {
			if(info.name == func_name) {
				found = true;
				info_stream << info.name << " " << info.parameters.size();
				for(const auto& param : info.parameters) {
					info_stream << " " << get<0>(param);
					info_stream << " " << (unsigned int)get<1>(param);
					info_stream << " " << (unsigned int)get<2>(param);
					info_stream << " " << (unsigned int)get<3>(param);
				}
				break;
			}
		}
		info_out.close();
		if(!found) a2e_error("kernel function \"%s\" does not exist in source file!", func_name);
	}
	task_counter--;
}
示例#2
0
bus_model* model_loader::load(const string& elements_filename,
                              const string& vertices_filename,
                              const string& tex_coords_filename,
                              const string& bone_weights_filename,
                              const string& bone_indices_filename,
                              const string& matrices_filename) {
    stringstream elements_buffer, vertices_buffer, tc_buffer, bweights_buffer, bindices_buffer, bm_buffer;
    if(!f->file_to_buffer(elements_filename, elements_buffer)) {
        a2e_error("couldn't open elements file: %s!", elements_filename);
        return nullptr;
    }
    if(!f->file_to_buffer(vertices_filename, vertices_buffer)) {
        a2e_error("couldn't open vertices file: %s!", vertices_filename);
        return nullptr;
    }
    if(!f->file_to_buffer(tex_coords_filename, tc_buffer)) {
        a2e_error("couldn't open tex coords file: %s!", tex_coords_filename);
        return nullptr;
    }
    if(!f->file_to_buffer(bone_weights_filename, bweights_buffer)) {
        a2e_error("couldn't open bone weights file: %s!", bone_weights_filename);
        return nullptr;
    }
    if(!f->file_to_buffer(bone_indices_filename, bindices_buffer)) {
        a2e_error("couldn't open bone indices file: %s!", bone_indices_filename);
        return nullptr;
    }
    if(!f->file_to_buffer(matrices_filename, bm_buffer)) {
        a2e_error("couldn't open matrices file: %s!", matrices_filename);
        return nullptr;
    }

    stringstream normals_buffer;
    if(!f->file_to_buffer(e->data_path("NI-Nrms.txt"), normals_buffer)) {
        a2e_error("couldn't open normals file: %s!", "NI-Nrms.txt");
        return nullptr;
    }

    //
    bus_model* bmodel = new bus_model();
    static const float model_scale = 1.0f/10.0f;

    // read elements
    while(!elements_buffer.eof() && elements_buffer.good()) {
        unsigned int i1, i2, i3;
        elements_buffer >> i1;
        elements_buffer >> i2;
        elements_buffer >> i3;
        bmodel->indices.push_back(index3(i1, i2, i3));
    }

    // read vertices
    while(!vertices_buffer.eof() && vertices_buffer.good()) {
        float x, y, z;
        vertices_buffer >> x;
        vertices_buffer >> y;
        vertices_buffer >> z;
        bmodel->vertices.push_back(float3(x, y, z));
    }

    // read tex coords
    while(!tc_buffer.eof() && tc_buffer.good()) {
        float u, v;
        tc_buffer >> u;
        tc_buffer >> v;
        bmodel->tex_coords.push_back(coord(u, v));
    }

    // read normals
    while(!normals_buffer.eof() && normals_buffer.good()) {
        float x, y, z;
        normals_buffer >> x;
        normals_buffer >> y;
        normals_buffer >> z;
        bmodel->normals.push_back(float3(x, y, z));
    }

    // read bone weights
    while(!bweights_buffer.eof() && bweights_buffer.good()) {
        float wx, wy, wz, ww;
        bweights_buffer >> wx;
        bweights_buffer >> wy;
        bweights_buffer >> wz;
        bweights_buffer >> ww;
        bmodel->bone_weights.push_back(float4(wx, wy, wz, ww));
    }

    // read bone indices
    while(!bindices_buffer.eof() && bindices_buffer.good()) {
        unsigned int ix, iy, iz, iw;
        bindices_buffer >> ix;
        bindices_buffer >> iy;
        bindices_buffer >> iz;
        bindices_buffer >> iw;
        bmodel->bone_indices.push_back(uint4(ix, iy, iz, iw));
    }

    // read inverse matrices
    size_t m_index = 0;
    while(!bm_buffer.eof() && bm_buffer.good()) {
        if(m_index >= PLAYERS_BONE_COUNT) {
            a2e_error("too many matrices!");
            return nullptr;
        }

        matrix4f m;
        for(size_t i = 0; i < 16; i++) {
            bm_buffer >> m.data[i];
        }

        bmodel->matrices[m_index] = m;
        bmodel->matrices[m_index][3] *= model_scale;
        bmodel->matrices[m_index][7] *= model_scale;
        bmodel->matrices[m_index][11] *= model_scale;
        bmodel->joint_positions[m_index].set(-m.data[3], -m.data[7], -m.data[11], 1.0f);
        m_index++;
    }
    if(m_index != PLAYERS_BONE_COUNT) {
        a2e_error("too few matrices!");
        return nullptr;
    }

    for(auto& vertex : bmodel->vertices) {
        vertex *= model_scale;
    }

    a2e_debug("model (%s, %s, %s) read: #indices: %u, #vertices: %u, #normals, %u, #tex coords: %u",
              elements_filename, vertices_filename, tex_coords_filename,
              bmodel->indices.size(), bmodel->vertices.size(), bmodel->normals.size(), bmodel->tex_coords.size());

    return bmodel;
}
示例#3
0
int main(int argc, char *argv[]) {
	logger::init();
	
	a2e_log("kernelcacher v%u.%u.%u - %s %s", KERNELCACHER_MAJOR_VERSION, KERNELCACHER_MINOR_VERSION, KERNELCACHER_REVISION_VERSION, KERNELCACHER_BUILT_DATE, KERNELCACHER_BUILT_TIME);
	
	string usage = "usage: kernelcacher /path/to/data/kernels";
	if(argc == 1) {
		a2e_error("no kernel path specified!\n%s", usage.c_str());
		return 0;
	}
	kernel_path = argv[1];
	if(kernel_path.back() != '/') kernel_path.push_back('/');
	cache_path = kernel_path.substr(0, kernel_path.rfind('/', kernel_path.length()-2)) + "/cache/";
	a2e_debug("caching kernels from \"%s\" to \"%s\" ...", kernel_path, cache_path);
	
	// compile kernels
	const string lsl_sm_1x_str = " -DLOCAL_SIZE_LIMIT=512";
	const string lsl_sm_20p_str = " -DLOCAL_SIZE_LIMIT=1024";
	
	vector<tuple<string, string, string, std::function<string(const CC_TARGET&)>>> internal_kernels {
		{
			make_tuple("PARTICLE_INIT", "particle_spawn.cl", "particle_init",
					   [](const CC_TARGET&) { return " -DA2E_PARTICLE_INIT"; }),
			make_tuple("PARTICLE_RESPAWN", "particle_spawn.cl", "particle_respawn",
					   [](const CC_TARGET&) { return ""; }),
			make_tuple("PARTICLE_COMPUTE", "particle_compute.cl", "particle_compute",
					   [](const CC_TARGET&) { return ""; }),
			make_tuple("PARTICLE_SORT_LOCAL", "particle_sort.cl", "bitonicSortLocal",
					   [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }),
			make_tuple("PARTICLE_SORT_MERGE_GLOBAL", "particle_sort.cl", "bitonicMergeGlobal",
					   [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }),
			make_tuple("PARTICLE_SORT_MERGE_LOCAL", "particle_sort.cl", "bitonicMergeLocal",
					   [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }),
			make_tuple("PARTICLE_COMPUTE_DISTANCES", "particle_sort.cl", "compute_distances",
					   [&](const CC_TARGET& cc_target) { return (cc_target <= CC_TARGET::SM_13 ? lsl_sm_1x_str : lsl_sm_20p_str); }),
		}
	};
	task_counter = internal_kernels.size() + 1;
	
	for(const auto& int_kernel : internal_kernels) {
		task::spawn([=]() {
			kernel_to_ptx(get<0>(int_kernel),
						  kernel_path+get<1>(int_kernel),
						  get<2>(int_kernel),
						  get<3>(int_kernel));
		});
	}
	
	//
	task::spawn([=]() {
		file_io crc_file(cache_path+"CACHECRC", file_io::OPEN_TYPE::WRITE);
		if(!crc_file.is_open()) {
			a2e_error("couldn't create crc file!");
			return;
		}
		auto& crc_fstream = *crc_file.get_filestream();
		
		const auto kernel_files = core::get_file_list(kernel_path);
		for(const auto& kfile : kernel_files) {
			if(kfile.first[0] == '.') continue;
			stringstream buffer(stringstream::in | stringstream::out);
			if(!file_io::file_to_buffer(kernel_path+kfile.first, buffer)) {
				a2e_error("failed to read kernel source!");
				return;
			}
			const string src(buffer.str());
			const unsigned int crc = crc32(crc32(0L, Z_NULL, 0), (const Bytef*)src.c_str(), (uInt)src.size());
			crc_fstream << kfile.first << " " << hex << crc << dec << endl;
		}
		crc_file.close();
		task_counter--;
	});
	
	//
	while(task_counter != 0) {
		SDL_Delay(100);
	}

	// done!
	logger::destroy();
	return 0;
}