Exemplo n.º 1
0
bool LccrFeaturizer::AccumulateFeatureCountFromLine(
    trie_t &trie, const std::vector<std::vector<std::string>> &textfeats) {
  for (const std::vector<std::string> &posfeat : textfeats) {
    for (const std::string &feat : posfeat) {
      trie.update(feat.c_str(), feat.size(), 1);
    }
  }
  return true;
}
Exemplo n.º 2
0
void LccrFeaturizer::FilterFeatureWithCount(trie_t &raw, trie_t &filtered,
                                            int cutoff) {
  namespace signal = boost::signals2::detail;
  size_t from(0), p(0);
  signal::auto_buffer<char, signal::store_n_bytes<LccrFeaturizer::MAXFEATLEN>>
      buffer(LccrFeaturizer::MAXFEATLEN, '\0');
  int featcnt;
  for (featcnt = raw.begin(from, p); featcnt != trie_t::CEDAR_NO_PATH;
       featcnt = raw.next(from, p)) {
    if (featcnt >= cutoff) {
      raw.suffix(buffer.data(), p, from);
      int key = filtered.num_keys();
      filtered.update(buffer.data(), p, 0) = key;
    }
  }
}
Exemplo n.º 3
0
void makeIndex (trie_t & trie, props_t & props, bool & abort, Config const & cfg)
{
	try
	{
		props_t tmp_props;
		tmp_props.reserve(1024);
		std::map<tstring, int, icasecompare> tmp_propmap;
		for (SearchLocationInfo const & info : cfg.m_locations)
		{
			if (abort)
				return;

			SearchDirectory(
				  info.m_dir_path, info.m_includes, info.m_excludes, info.m_recursive, info.m_follow_symlinks
				, TEXT("")
				, [] (tstring const & fname, tstring const & cmp) { return true; }
				, [&trie, &tmp_props, &tmp_propmap] (tstring const & fname, tstring const & fpath)
					 {
						std::map<tstring, int>::iterator it = tmp_propmap.find(fname);
						if (it == tmp_propmap.end())
						{
							tstring fname_lwr = fname;
							boost::algorithm::to_lower(fname_lwr);
							tmp_props.push_back(Props(fname_lwr, fpath));
							trie_t::result_type const id = static_cast<trie_t::result_type>(tmp_props.size() - 1);
							tmp_propmap[fname_lwr] = id;
							//dbg_printf("insert: fname=%s fpath=%s idx=%i\n", fname_lwr.c_str(), fpath.c_str(), id);
							trie.update(fname_lwr.c_str(), fname_lwr.length(), id);
						}
						else
						{
							tmp_props[it->second].m_fpath.push_back(fpath);
							//dbg_printf("update: fname=%s fpath=%s idx=%i\n", fname.c_str(), fpath.c_str(), it->second);
						}
					 }
				, abort);
		}
		props = tmp_props;
	}
	catch (std::regex_error const & e)
	{
		dbg_printf("Exception caught: %s", e.what());
	}

	//printf("keys: %ld\n", trie.num_keys ());
	//printf("size: %ld\n", trie.size ());
}