void testUtf8String(std::string const & fn) { ::libmaus::util::Utf8String::shared_ptr_type us = ::libmaus::util::Utf8String::constructRaw(fn); ::libmaus::util::Utf8StringPairAdapter usp(us); ::libmaus::util::Utf8DecoderWrapper decwr(fn); uint64_t const numsyms = ::libmaus::util::GetFileSize::getFileSize(decwr); assert ( us->size() == numsyms ); for ( uint64_t i = 0; i < numsyms; ++i ) { assert ( static_cast<wchar_t>(decwr.get()) == us->get(i) ); assert ( us->get(i) == ((usp[2*i] << 12) | (usp[2*i+1])) ); } ::std::map<int64_t,uint64_t> const chist = us->getHistogramAsMap(); ::libmaus::huffman::HuffmanTreeNode::shared_ptr_type htree = ::libmaus::huffman::HuffmanBase::createTree(chist); ::libmaus::huffman::EncodeTable<1> ET(htree.get()); for ( ::std::map<int64_t,uint64_t>::const_iterator ita = chist.begin(); ita != chist.end(); ++ita ) { ::libmaus::util::UTF8::encodeUTF8(ita->first,std::cerr); std::cerr << "\t" << ita->second << "\t" << ET.printCode(ita->first) << std::endl; } }
Species format_species(const Species& sp) { unit_species_comparerator comp(sp); std::vector<unit_species_comparerator::index_type> units; for (unit_species_comparerator::index_type i(0); i < sp.num_units(); ++i) { units.push_back(i); } std::sort(units.begin(), units.end(), comp); std::vector<unit_species_comparerator::index_type> next(sp.num_units(), sp.num_units()); unsigned int stride(0); for (unit_species_comparerator::index_type i(0); i < sp.num_units(); ++i) { const unit_species_comparerator::index_type idx(units[i]); comp.reorder_units(next, idx, stride); } for (unsigned int i(0); i < sp.num_units(); ++i) { units[next[i]] = i; } Species newsp; utils::get_mapper_mf<std::string, std::string>::type cache; stride = 1; std::stringstream ss; for (std::vector<unit_species_comparerator::index_type>::const_iterator i(units.begin()); i != units.end(); ++i) { UnitSpecies usp(sp.at(*i)); for (UnitSpecies::container_type::size_type j(0); j < static_cast<UnitSpecies::container_type::size_type>(usp.num_sites()); ++j) { UnitSpecies::container_type::value_type& site(usp.at(j)); if (site.second.second == "" || is_wildcard(site.second.second)) { continue; } utils::get_mapper_mf<std::string, std::string>::type::const_iterator it(cache.find(site.second.second)); if (it == cache.end()) { ss << stride; cache.insert(std::make_pair(site.second.second, ss.str())); site.second.second = ss.str(); ++stride; ss.clear(); ss.str(""); } else { site.second.second = (*it).second; } } newsp.add_unit(usp); } return newsp; }