Esempio n. 1
0
std::string write_random_tmp_file(
    size_t filesize,
    const std::string& extension
)
{
    std::vector<uint8_t> output_vector;
    randomize_vector(filesize, &output_vector);

    std::string tmp_filepath = get_random_filepath(extension);
    write_vector_to_file(output_vector, tmp_filepath);

    return tmp_filepath;
}
Esempio n. 2
0
int main(int argc, char * argv[])
{
    Options options = ProcessOptions(argc,argv);
    auto str_path_in = options.path_in.string();
    auto path_out=options.path_out;
    if (boost::filesystem::create_directory(path_out))
    {
        std::cerr << "creating target directory\n";
    }
    provenance = std::string();
    provenance += "vocab collected on " + get_str_time();
    provenance += "source corpus : " + str_path_in + "\n";

    std::cerr<<"assigning ids\n";
    vocab.read_from_dir(str_path_in);

    provenance = provenance + "words in corpus : "+ FormatHelper::ConvertToStr(vocab.cnt_words_processed)+"\n";
    provenance = provenance + "unique words : "+ FormatHelper::ConvertToStr(vocab.cnt_words)+"\n";
    
    vocab.reduce(options.min_frequency);
    provenance=provenance+"filtered with minimal frequency: "+FormatHelper::ConvertToStr(options.min_frequency)+"\n";
    provenance = provenance + "unique words : "+ FormatHelper::ConvertToStr(vocab.cnt_words)+"\n";

    std::cerr<<"creating list of frequencies\n";

    vocab.freq_per_id.resize(vocab.cnt_words);
    vocab.lst_id2word.resize(vocab.cnt_words);
    std::fill (vocab.freq_per_id.begin(),vocab.freq_per_id.end(),0);   
    std::cerr<<"populating frequencies\n";
    vocab.populate_frequency();
    vocab.reassign_ids(vocab.freq_per_id);
    vocab.populate_frequency();
    vocab.populate_ids();
    
    std::cerr<<"dumping ids and frequencies\n";

    vocab.dump_ids((path_out / boost::filesystem::path("ids")).string());
    vocab.dump_frequency((path_out / boost::filesystem::path("frequencies")).string());
    write_value_to_file((path_out / boost::filesystem::path("cnt_unique_words")).string(),vocab.cnt_words);
    write_value_to_file((path_out / boost::filesystem::path("cnt_words")).string(),vocab.cnt_words_processed);
    write_vector_to_file((path_out / boost::filesystem::path("freq_per_id")).string(),vocab.freq_per_id);
    write_value_to_file((path_out / boost::filesystem::path("provenance.txt")).string(),provenance);

    return 0;
}
Esempio n. 3
0
void write_vector(FILE *to, BITVEC *vector) {
    terminate_vector_for_writing(vector);
    write_vector_to_file(vector, to);
    initialise_vector(vector);
}