void construct(t_index& idx, const std::string& file, cache_config& config, uint8_t num_bytes, wt_tag) { auto event = memory_monitor::event("construct wavelet tree"); if ((t_index::alphabet_category::WIDTH==8 and num_bytes <= 1) or (t_index::alphabet_category::WIDTH==0 and num_bytes != 'd')) { int_vector_buffer<t_index::alphabet_category::WIDTH> text_buf(file, std::ios::in, 1024*1024, num_bytes*8, (bool)num_bytes); t_index tmp(text_buf, text_buf.size()); idx.swap(tmp); } else { int_vector<t_index::alphabet_category::WIDTH> text; load_vector_from_file(text, file, num_bytes); std::string tmp_key = util::to_string(util::pid())+"_"+util::to_string(util::id()); std::string tmp_file_name = cache_file_name(tmp_key, config); store_to_file(text, tmp_file_name); util::clear(text); { int_vector_buffer<t_index::alphabet_category::WIDTH> text_buf(tmp_file_name); t_index tmp(text_buf, text_buf.size()); idx.swap(tmp); } sdsl::remove(tmp_file_name); } }
void construct(t_index& idx, const std::string& file, cache_config& config, uint8_t num_bytes, csa_tag) { auto event = memory_monitor::event("construct CSA"); const char* KEY_TEXT = key_text_trait<t_index::alphabet_category::WIDTH>::KEY_TEXT; const char* KEY_BWT = key_bwt_trait<t_index::alphabet_category::WIDTH>::KEY_BWT; typedef int_vector<t_index::alphabet_category::WIDTH> text_type; { auto event = memory_monitor::event("parse input text"); // (1) check, if the text is cached if (!cache_file_exists(KEY_TEXT, config)) { text_type text; load_vector_from_file(text, file, num_bytes); if (contains_no_zero_symbol(text, file)) { append_zero_symbol(text); store_to_cache(text,KEY_TEXT, config); } } register_cache_file(KEY_TEXT, config); } { // (2) check, if the suffix array is cached auto event = memory_monitor::event("SA"); if (!cache_file_exists(conf::KEY_SA, config)) { construct_sa<t_index::alphabet_category::WIDTH>(config); } register_cache_file(conf::KEY_SA, config); } { // (3) construct BWT auto event = memory_monitor::event("BWT"); if (!cache_file_exists(KEY_BWT, config)) { construct_bwt<t_index::alphabet_category::WIDTH>(config); } register_cache_file(KEY_BWT, config); } { // (4) use BWT to construct the CSA auto event = memory_monitor::event("construct CSA"); t_index tmp(config); idx.swap(tmp); } if (config.delete_files) { auto event = memory_monitor::event("delete temporary files"); util::delete_all_files(config.file_map); } }