Example #1
0
void testUtf8String(std::string const & fn)
{
	::libmaus::util::Utf8String::shared_ptr_type us = ::libmaus::util::Utf8String::constructRaw(fn);
	::libmaus::util::Utf8StringPairAdapter usp(us);
	
	::libmaus::util::Utf8DecoderWrapper decwr(fn);
	uint64_t const numsyms = ::libmaus::util::GetFileSize::getFileSize(decwr);
	assert ( us->size() == numsyms );

	for ( uint64_t i = 0; i < numsyms; ++i )
	{
		assert ( static_cast<wchar_t>(decwr.get()) == us->get(i) );
		assert ( 
			us->get(i) ==
			((usp[2*i] << 12) | (usp[2*i+1]))
		);
	}
	
	::std::map<int64_t,uint64_t> const chist = us->getHistogramAsMap();
	::libmaus::huffman::HuffmanTreeNode::shared_ptr_type htree = ::libmaus::huffman::HuffmanBase::createTree(chist);
	::libmaus::huffman::EncodeTable<1> ET(htree.get());
	
	for ( ::std::map<int64_t,uint64_t>::const_iterator ita = chist.begin(); ita != chist.end(); ++ita )
	{
		::libmaus::util::UTF8::encodeUTF8(ita->first,std::cerr);
		std::cerr << "\t" << ita->second << "\t" << ET.printCode(ita->first) << std::endl;
	}
}
Example #2
0
Species format_species(const Species& sp)
{
    unit_species_comparerator comp(sp);
    std::vector<unit_species_comparerator::index_type> units;
    for (unit_species_comparerator::index_type i(0); i < sp.num_units(); ++i)
    {
        units.push_back(i);
    }

    std::sort(units.begin(), units.end(), comp);

    std::vector<unit_species_comparerator::index_type>
        next(sp.num_units(), sp.num_units());
    unsigned int stride(0);
    for (unit_species_comparerator::index_type i(0); i < sp.num_units(); ++i)
    {
        const unit_species_comparerator::index_type idx(units[i]);
        comp.reorder_units(next, idx, stride);
    }
    for (unsigned int i(0); i < sp.num_units(); ++i)
    {
        units[next[i]] = i;
    }

    Species newsp;
    utils::get_mapper_mf<std::string, std::string>::type cache;
    stride = 1;
    std::stringstream ss;
    for (std::vector<unit_species_comparerator::index_type>::const_iterator
        i(units.begin()); i != units.end(); ++i)
    {
        UnitSpecies usp(sp.at(*i));
        for (UnitSpecies::container_type::size_type j(0);
            j < static_cast<UnitSpecies::container_type::size_type>(usp.num_sites()); ++j)
        {
            UnitSpecies::container_type::value_type& site(usp.at(j));
            if (site.second.second == "" || is_wildcard(site.second.second))
            {
                continue;
            }

            utils::get_mapper_mf<std::string, std::string>::type::const_iterator
                it(cache.find(site.second.second));
            if (it == cache.end())
            {
                ss << stride;
                cache.insert(std::make_pair(site.second.second, ss.str()));
                site.second.second = ss.str();
                ++stride;
                ss.clear();
                ss.str("");
            }
            else
            {
                site.second.second = (*it).second;
            }
        }
        newsp.add_unit(usp);
    }
    return newsp;
}