Ejemplo n.º 1
0
void construct(t_index& idx, const std::string& file, cache_config& config, uint8_t num_bytes, lcp_tag)
{
    auto event = memory_monitor::event("construct compressed LCP");
    const char* KEY_TEXT = key_text_trait<t_width>::KEY_TEXT;
    typedef int_vector<t_width> text_type;
    {
        // (2) check, if the longest common prefix array is cached
        auto event = memory_monitor::event("LCP");
        if (!cache_file_exists(conf::KEY_LCP, config)) {
            {
                auto event = memory_monitor::event("parse input text");
                // (1) check, if the text is cached
                if (!cache_file_exists(KEY_TEXT, config)) {
                    text_type text;
                    load_vector_from_file(text, file, num_bytes);
                    if (contains_no_zero_symbol(text, file)) {
                        append_zero_symbol(text);
                        store_to_cache(text,KEY_TEXT, config);
                    }
                }
                register_cache_file(KEY_TEXT, config);
            }
            {
                // (2) check, if the suffix array is cached
                auto event = memory_monitor::event("SA");
                if (!cache_file_exists(conf::KEY_SA, config)) {
                    construct_sa<t_width>(config);
                }
                register_cache_file(conf::KEY_SA, config);
            }
            if (t_width==8) {
                construct_lcp_semi_extern_PHI(config);
            } else {
                construct_lcp_PHI<t_width>(config);
            }
        }
        register_cache_file(conf::KEY_LCP, config);
    }
    {
        auto event = memory_monitor::event("compressed LCP");
        t_index tmp(config);
        tmp.swap(idx);
    }
    if (config.delete_files) {
        auto event = memory_monitor::event("delete temporary files");
        util::delete_all_files(config.file_map);
    }
}
Ejemplo n.º 2
0
static bool
cache_file_valid(struct file *f)
{
  char *cp;
  int result;
  struct stat st;

  if(!cache_valid(f))
    return false;

  if(!cache_file_exists(f))
    return false;

  cp = cache_path(f);
  result = stat(cp, &st);
  free(cp);

  if(result != 0)
    return false;

  if(f->st->st_mtime > st.st_mtime)
    return false;

  return true;
}
Ejemplo n.º 3
0
Archivo: dir.c Proyecto: B-Rich/git
static enum path_treatment treat_one_path(struct dir_struct *dir,
					  struct strbuf *path,
					  const struct path_simplify *simplify,
					  int dtype, struct dirent *de)
{
	int exclude;
	int has_path_in_index = !!cache_file_exists(path->buf, path->len, ignore_case);

	if (dtype == DT_UNKNOWN)
		dtype = get_dtype(de, path->buf, path->len);

	/* Always exclude indexed files */
	if (dtype != DT_DIR && has_path_in_index)
		return path_none;

	/*
	 * When we are looking at a directory P in the working tree,
	 * there are three cases:
	 *
	 * (1) P exists in the index.  Everything inside the directory P in
	 * the working tree needs to go when P is checked out from the
	 * index.
	 *
	 * (2) P does not exist in the index, but there is P/Q in the index.
	 * We know P will stay a directory when we check out the contents
	 * of the index, but we do not know yet if there is a directory
	 * P/Q in the working tree to be killed, so we need to recurse.
	 *
	 * (3) P does not exist in the index, and there is no P/Q in the index
	 * to require P to be a directory, either.  Only in this case, we
	 * know that everything inside P will not be killed without
	 * recursing.
	 */
	if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
	    (dtype == DT_DIR) &&
	    !has_path_in_index &&
	    (directory_exists_in_index(path->buf, path->len) == index_nonexistent))
		return path_none;

	exclude = is_excluded(dir, path->buf, &dtype);

	/*
	 * Excluded? If we don't explicitly want to show
	 * ignored files, ignore it
	 */
	if (exclude && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
		return path_excluded;

	switch (dtype) {
	default:
		return path_none;
	case DT_DIR:
		strbuf_addch(path, '/');
		return treat_directory(dir, path->buf, path->len, exclude,
			simplify);
	case DT_REG:
	case DT_LNK:
		return exclude ? path_excluded : path_untracked;
	}
}
Ejemplo n.º 4
0
csa_wt<t_wt, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat>::csa_wt(cache_config& config)
{
    if (!cache_file_exists(key_trait<alphabet_type::int_width>::KEY_BWT, config)) {
        return;
    }
    {
        auto event = memory_monitor::event("construct csa-alpbabet");
        int_vector_buffer<alphabet_type::int_width> bwt_buf(cache_file_name(key_trait<alphabet_type::int_width>::KEY_BWT,config));
        size_type n = bwt_buf.size();
        alphabet_type tmp_alphabet(bwt_buf, n);
        m_alphabet.swap(tmp_alphabet);
    }
    {
        auto event = memory_monitor::event("construct wavelet tree");
        int_vector_buffer<alphabet_type::int_width> bwt_buf(cache_file_name(key_trait<alphabet_type::int_width>::KEY_BWT,config));
        size_type n = bwt_buf.size();
        wavelet_tree_type tmp_wt(bwt_buf, n);
        m_wavelet_tree.swap(tmp_wt);
    }
    {
        auto event = memory_monitor::event("sample SA");
        sa_sample_type tmp_sa_sample(config);
        m_sa_sample.swap(tmp_sa_sample);
    }
    {
        auto event = memory_monitor::event("sample ISA");
        isa_sample_type isa_s(config, &m_sa_sample);
        util::swap_support(m_isa_sample, isa_s, &m_sa_sample, &m_sa_sample);
    }
}
Ejemplo n.º 5
0
Archivo: dir.c Proyecto: B-Rich/git
static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
{
	if (cache_file_exists(pathname, len, ignore_case))
		return NULL;

	ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
	return dir->entries[dir->nr++] = dir_entry_new(pathname, len);
}
Ejemplo n.º 6
0
void construct(t_index& idx, const std::string& file, cache_config& config, uint8_t num_bytes, csa_tag)
{
    auto event = memory_monitor::event("construct CSA");
    const char* KEY_TEXT = key_text_trait<t_index::alphabet_category::WIDTH>::KEY_TEXT;
    const char* KEY_BWT  = key_bwt_trait<t_index::alphabet_category::WIDTH>::KEY_BWT;
    typedef int_vector<t_index::alphabet_category::WIDTH> text_type;
    {
        auto event = memory_monitor::event("parse input text");
        // (1) check, if the text is cached
        if (!cache_file_exists(KEY_TEXT, config)) {
            text_type text;
            load_vector_from_file(text, file, num_bytes);
            if (contains_no_zero_symbol(text, file)) {
                append_zero_symbol(text);
                store_to_cache(text,KEY_TEXT, config);
            }
        }
        register_cache_file(KEY_TEXT, config);
    }
    {
        // (2) check, if the suffix array is cached
        auto event = memory_monitor::event("SA");
        if (!cache_file_exists(conf::KEY_SA, config)) {
            construct_sa<t_index::alphabet_category::WIDTH>(config);
        }
        register_cache_file(conf::KEY_SA, config);
    }
    {
        //  (3) construct BWT
        auto event = memory_monitor::event("BWT");
        if (!cache_file_exists(KEY_BWT, config)) {
            construct_bwt<t_index::alphabet_category::WIDTH>(config);
        }
        register_cache_file(KEY_BWT, config);
    }
    {
        //  (4) use BWT to construct the CSA
        auto event = memory_monitor::event("construct CSA");
        t_index tmp(config);
        idx.swap(tmp);
    }
    if (config.delete_files) {
        auto event = memory_monitor::event("delete temporary files");
        util::delete_all_files(config.file_map);
    }
}
Ejemplo n.º 7
0
void construct(t_index& idx, const std::string& file, cache_config& config, uint8_t num_bytes, cst_tag)
{
    auto event = memory_monitor::event("construct CST");
    const char* KEY_TEXT = key_text_trait<t_index::alphabet_category::WIDTH>::KEY_TEXT;
    const char* KEY_BWT  = key_bwt_trait<t_index::alphabet_category::WIDTH>::KEY_BWT;
    csa_tag csa_t;
    {
        // (1) check, if the compressed suffix array is cached
        typename t_index::csa_type csa;
        if (!cache_file_exists(std::string(conf::KEY_CSA)+"_"+util::class_to_hash(csa), config)) {
            cache_config csa_config(false, config.dir, config.id, config.file_map);
            construct(csa, file, csa_config, num_bytes, csa_t);
            auto event = memory_monitor::event("store CSA");
            config.file_map = csa_config.file_map;
            store_to_cache(csa,std::string(conf::KEY_CSA)+"_"+util::class_to_hash(csa), config);
        }
        register_cache_file(std::string(conf::KEY_CSA)+"_"+util::class_to_hash(csa), config);
    }
    {
        // (2) check, if the longest common prefix array is cached
        auto event = memory_monitor::event("LCP");
        register_cache_file(KEY_TEXT, config);
        register_cache_file(KEY_BWT, config);
        register_cache_file(conf::KEY_SA, config);
        if (!cache_file_exists(conf::KEY_LCP, config)) {
            if (t_index::alphabet_category::WIDTH==8) {
                construct_lcp_semi_extern_PHI(config);
            } else {
                construct_lcp_PHI<t_index::alphabet_category::WIDTH>(config);
            }
        }
        register_cache_file(conf::KEY_LCP, config);
    }
    {
        auto event = memory_monitor::event("CST");
        t_index tmp(config);
        tmp.swap(idx);
    }
    if (config.delete_files) {
        auto event = memory_monitor::event("delete temporary files");
        util::delete_all_files(config.file_map);
    }
}
Ejemplo n.º 8
0
	rank_bm25(cache_config& cconfig) {
		uint64_t num_terms;
        load_from_cache(num_terms, surf::KEY_COLLEN, cconfig);
        if (!cache_file_exists(surf::KEY_DOC_LENGTHS, cconfig)){
            surf::construct_doc_lengths<sdsl::int_alphabet_tag::WIDTH>(cconfig);
        }
        load_from_cache(doc_lengths, surf::KEY_DOC_LENGTHS, cconfig);
		num_docs = doc_lengths.size();
        std::cerr<<"num_docs = "<<num_docs<<std::endl;
	    avg_doc_len = (double)num_terms / (double)num_docs;
        std::cerr<<"avg_doc_len = "<<avg_doc_len<<std::endl;
	}
Ejemplo n.º 9
0
	rank_tfidf(cache_config& cconfig) {
        load_from_cache(num_terms, surf::KEY_COLLEN, cconfig);
        if (!cache_file_exists(surf::KEY_DOC_LENGTHS, cconfig)){
            surf::construct_doc_lengths<sdsl::int_alphabet_tag::WIDTH>(cconfig);
        }
        load_from_cache(doc_lengths, surf::KEY_DOC_LENGTHS, cconfig);
		num_docs = doc_lengths.size();
        std::cerr<<"num_docs = "<<num_docs<<std::endl;
	    auto min_itr = std::min_element(doc_lengths.begin(),doc_lengths.end());
	    min_doc_len = *min_itr;
        std::cerr<<"min_doc_len = "<<min_doc_len<<std::endl;
	}
Ejemplo n.º 10
0
csa_sada<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat>::csa_sada(cache_config& config)
{
    create_buffer();
    if (!cache_file_exists(key_trait<alphabet_type::int_width>::KEY_BWT, config)) {
        return;
    }
    int_vector_buffer<alphabet_type::int_width> bwt_buf(cache_file_name(key_trait<alphabet_type::int_width>::KEY_BWT,config));
    size_type n = bwt_buf.size();
    {
        auto event = memory_monitor::event("construct csa-alpbabet");
        alphabet_type tmp_alphabet(bwt_buf, n);
        m_alphabet.swap(tmp_alphabet);
    }

    int_vector<> cnt_chr(sigma, 0, bits::hi(n)+1);
    for (typename alphabet_type::sigma_type i=0; i < sigma; ++i) {
        cnt_chr[i] = C[i];
    }
    // calculate psi
    {
        auto event = memory_monitor::event("construct PSI");
        // TODO: move PSI construct into construct_PSI.hpp
        int_vector<> psi(n, 0, bits::hi(n)+1);
        for (size_type i=0; i < n; ++i) {
            psi[ cnt_chr[ char2comp[bwt_buf[i]] ]++ ] = i;
        }
        std::string psi_file = cache_file_name(conf::KEY_PSI, config);
        if (!store_to_cache(psi, conf::KEY_PSI, config)) {
            return;
        }
    }
    {
        auto event = memory_monitor::event("encode PSI");
        int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config));
        t_enc_vec tmp_psi(psi_buf);
        m_psi.swap(tmp_psi);
    }
    {
        auto event = memory_monitor::event("sample SA");
        sa_sample_type tmp_sa_sample(config);
        m_sa_sample.swap(tmp_sa_sample);
    }
    {
        auto event = memory_monitor::event("sample ISA");
        isa_sample_type isa_s(config, &m_sa_sample);
        util::swap_support(m_isa_sample, isa_s, &m_sa_sample, (const sa_sample_type*)nullptr);
    }
}
Ejemplo n.º 11
0
Archivo: dir.c Proyecto: B-Rich/git
static int get_index_dtype(const char *path, int len)
{
	int pos;
	const struct cache_entry *ce;

	ce = cache_file_exists(path, len, 0);
	if (ce) {
		if (!ce_uptodate(ce))
			return DT_UNKNOWN;
		if (S_ISGITLINK(ce->ce_mode))
			return DT_DIR;
		/*
		 * Nobody actually cares about the
		 * difference between DT_LNK and DT_REG
		 */
		return DT_REG;
	}

	/* Try to look it up as a directory */
	pos = cache_name_pos(path, len);
	if (pos >= 0)
		return DT_UNKNOWN;
	pos = -pos-1;
	while (pos < active_nr) {
		ce = active_cache[pos++];
		if (strncmp(ce->name, path, len))
			break;
		if (ce->name[len] > '/')
			break;
		if (ce->name[len] < '/')
			continue;
		if (!ce_uptodate(ce))
			break;	/* continue? */
		return DT_DIR;
	}
	return DT_UNKNOWN;
}