Beispiel #1
0
DataGenerator::DataGenerator()
{
    srand(0);
    t = 0;
    current_id = 0;
    for (int i = 0; i < NUM_POINTS; i++)
    {
        pts[i * 3 + 0] = rand() % (6 * MAX_BOX) - 3 * MAX_BOX;
        pts[i * 3 + 1] = rand() % (6 * MAX_BOX) - 3 * MAX_BOX;
        pts[i * 3 + 2] = rand() % (6 * MAX_BOX) - 3 * MAX_BOX;
    }
    Ric <<
        0, 0, -1,
        -1, 0, 0,
        0, 1, 0;
    //Tic << 4, 5, 6;
    Tic << 0.02, -0.14, 0.0;
    //acc_cov << 1.3967e-04, 1.4357e-06, 2.1468e-06,
    //        1.4357e-06, 1.4352e-04, 5.7168e-05,
    //        2.1468e-06, 5.7168e-05, 1.5757e-04;
    //acc_cov << 1.3967e-04, 0, 0,
    //        0, 1.4352e-04, 0,
    //        0, 0, 1.5757e-04;
    acc_cov = 1e-2 * Matrix3d::Identity();
    gyr_cov = 1e-4 * Matrix3d::Identity();


    pts_cov << .1 * .1 / 3.6349576068362910e+02 / 3.6349576068362910e+02, 0,
            0, .1 * .1 / 3.6356654972681025e+02 / 3.6356654972681025e+02;

    generator = default_random_engine(0);
    distribution = normal_distribution<double>(0.0, 1);
}
/*******************************************************************************
Function:	Randomize
Author:		Stephanie Athow
Description: 
	Randomize the order of the vectors in a vector of vectors, but does	not
	randomize the contents the second vector
Parameters:
	in/out:	data	A vector of vectors where one vector contains the year,
					burned acres, and 12 months of PDSI data
Returns:
	none
*******************************************************************************/
void randomize( vector< vector<double> > & data )
{
	// seeding for random number generator
	unsigned seed = chrono::system_clock::now().time_since_epoch().count();

	// randomize first vector (shuffle the data based on years, but not the data for the year)
	shuffle( data.begin(), data.end(), default_random_engine( seed ) ); 
	
}
Beispiel #3
0
int main(int argc, char* argv[]) {
    if (argc < 4) {
        cout << "usage: ./bayes train-set-file test-set-file mode:n|t [size-of-train-set] [debug-output:f|t]" << endl;
    } else {
        string trainSetFile = argv[1];
        string testSetFile = argv[2];
        bool treeAugmented = argv[3][0] == 't' ? true : false;
        int sizeOfTrainSet = argc >= 5 ? atoi(argv[4]) : 0;
        bool debugOutput = argc >= 6 ? (argv[5][0] == 't' ? true : false) : false;
        
        shared_ptr<Dataset> dataset(Dataset::loadDataset(trainSetFile, testSetFile));
        const DatasetMetadata* metadata = dataset->getMetadata();
        
        vector<Instance*> trainSet(dataset->getTrainSet().begin(), dataset->getTrainSet().end());
        if (sizeOfTrainSet > 0 && sizeOfTrainSet < trainSet.size()) {
            unsigned int seed = (unsigned int)chrono::system_clock::now().time_since_epoch().count();
            shuffle (trainSet.begin(), trainSet.end(), default_random_engine(seed));
            trainSet.resize(sizeOfTrainSet);
        }
        
        BayesNet bayesNet(metadata, trainSet, treeAugmented);
        
        if (debugOutput) {
            cout << bayesNet.getMutualInfoTable() << endl;
            cout << bayesNet.getMaximalSpanningTree() << endl;
            cout << bayesNet.getProbabilityTables() << endl;
        }
        
        cout << bayesNet.getBayesNet() << endl;
        
        const vector<Instance*>& testSet = dataset->getTestSet();
        int correctCount = 0;
        cout << "<Predictions for Test-set Instances>" << endl;
        cout << "Predicted" << DELIMITER << "Actual" << DELIMITER << "Probability" << endl;
        cout.setf(ios::fixed, ios::floatfield);
        cout.precision(PRECISION);
        for (int i = 0; i < testSet.size(); ++i) {
            Instance* inst = testSet[i];
            double prob = 0.0;
            string predicted = bayesNet.predict(inst, &prob);
            string actual = inst->toString(metadata, true);
            
            if (predicted == actual)
                correctCount++;
            
            cout << predicted << DELIMITER << actual << DELIMITER << prob << endl;
        }
        cout << correctCount << " out of " << testSet.size() << " test instances were correctly classified" << endl;
    }
}
Beispiel #4
0
int main(int argc, const char * argv[]) {
    if (argc < 4) {
        cout << "usage: ./dt-learn train-set-file test-set-file m [percentage-of-train-set]" << endl;
    } else {
        string trainSetFile = argv[1];
        string testSetFile = argv[2];
        int stopThreshold = atoi(argv[3]);
        int percentageOfTrainSet = argc == 5 ? atoi(argv[4]) : 100;
        
        shared_ptr<Dataset> dataset(Dataset::loadDataset(trainSetFile, testSetFile));
        const DatasetMetadata* metadata = dataset->getMetadata();
    
        vector<Instance*> trainSet(dataset->getTrainSet().begin(), dataset->getTrainSet().end());
        if (percentageOfTrainSet < 100) {
            unsigned int seed = (unsigned int)chrono::system_clock::now().time_since_epoch().count();
            shuffle (trainSet.begin(), trainSet.end(), default_random_engine(seed));
            int newSize = (int)trainSet.size() * percentageOfTrainSet / 100;
            trainSet.resize(newSize);
        }
    
        DecisionTree tree(metadata, trainSet, stopThreshold);
        cout << tree.toString();
    
        const vector<Instance*>& testSet = dataset->getTestSet();
        int correctCount = 0;
        cout << "<Predictions for the Test Set Instances>" << endl;
        for (int i = 0; i < testSet.size(); ++i) {
            Instance* inst = testSet[i];
            string predicted = tree.predict(inst);
            string actual = inst->toString(metadata, true);
            if (predicted == actual)
                correctCount++;
            cout << setfill(' ') << setw(3) << (i + 1) << ": ";
            cout << "Actual: " << actual << "  Predicted: " << predicted<< endl;
        }
        cout << "Number of correctly classified: " << correctCount << "  Total number of test instances: " << testSet.size() << endl;
    }
}
Beispiel #5
0
/* Finalizes a chord progression graph and sets up the random integer generator.
   Note: any chords added after calling this function is inaccessible by the
   getRandomChord method. */
void ProgressionGraph::finalize() {
	ChordRNG = uniform_int_distribution<>(0, chords.size() - 1);

	unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
	generator = default_random_engine(seed);
}
    void generate_data_q2()
    {
        long compressed_bytes_docs = 0;
        long compressed_bytes_terms = 0;
        long compressed_bytes_freqs = 0;
        
        a_exact_docs_oo = input::docs_bench_items(); // check at index 5
        
        show_info("[P1] Generating random data...");
        a_p1_terms = new unsigned short[input::NUM_TUPLES];
        int index = 0;
        
        for (int t = 0; t < input::T_PM; ++t)
        {
            int cnt = pubmed::get_group_by_term(t);
            
            for (int d = 0; d < cnt; ++d)
            {
                a_p1_terms[index++] = t;
            }
        }
        
        // shuffle
        show_info("[P1] Shuffle...");
        shuffle(a_p1_terms, a_p1_terms + input::NUM_TUPLES, default_random_engine(42));
        
#ifdef HUFFMAN
        // compress
        show_info("[P1] Generating Huffman tree...");
        generate_array_tree_representation(a_p1_terms, input::NUM_TUPLES, a_p1_huffman_array, a_p1_terminator_array, a_p1_tree);
        encoding_dict<unsigned short> encoding_dict_terms;
        build_inverse_mapping(a_p1_tree, encoding_dict_terms);
        delete a_p1_tree;
#endif
        
        // single lists
        show_info("[P1] Generating single lists...");
        a_p1_terms_fragments = new unsigned short*[input::D_PM];
        
        index = 0;
        for (int d = 0; d < input::D_PM; ++d)
        {
            int num_terms = pubmed::get_group_by_doc(d);
            a_p1_terms_fragments[d] = new unsigned short[num_terms];
            
            for (int t = 0; t < num_terms; ++t)
            {
                a_p1_terms_fragments[d][t] = a_p1_terms[index++];
            }
        }
        
        delete[] a_p1_terms;
        
#ifdef HUFFMAN
        // compress
        show_info("[P1] Compressing...");
        a_p1_terms_fragments_compressed = new char*[input::D_PM];
        a_p1_terms_fragments_compressed_bytes = new int[input::D_PM];
        
        for (int d = 0; d < input::D_PM; ++d)
        {
            a_p1_terms_fragments_compressed_bytes[d] = encode(a_p1_terms_fragments[d], pubmed::get_group_by_doc(d), a_p1_terms_fragments_compressed[d], encoding_dict_terms);
            
            compressed_bytes_terms += a_p1_terms_fragments_compressed_bytes[d];
            delete[] a_p1_terms_fragments[d];
        }
        delete[] a_p1_terms_fragments;
#endif
        
        show_info("[P2] Generating random data...");
        a_p2_docs = new int[input::NUM_TUPLES];
        a_p2_freqs = new unsigned char[input::NUM_TUPLES];
        index = 0;
        
        for (int d = 0; d < input::D_PM; ++d)
        {
            int cnt = pubmed::get_group_by_doc(d);
            
            for (int t = 0; t < cnt; ++t)
            {
                int r = rand() % 100;
                if (r < 25) a_p2_freqs[index] = 1;
                else if (r < 45) a_p2_freqs[index] = 2;
                else if (r < 60) a_p2_freqs[index] = 3;
                else if (r < 70) a_p2_freqs[index] = 4;
                else if (r < 75) a_p2_freqs[index] = 5;
                else if (r < 77) a_p2_freqs[index] = 6;
                else a_p2_freqs[index] = rand() % 40;
                
                a_p2_docs[index++] = d;
            }
        }
        
        // shuffle
        show_info("[P2] Shuffle...");
        shuffle(a_p2_docs, a_p2_docs + input::NUM_TUPLES, default_random_engine(42));
        shuffle(a_p2_freqs, a_p2_freqs + input::NUM_TUPLES, default_random_engine(42));
        
#ifdef HUFFMAN
        // generate Huffman tree
        show_info("[P2] Generating Huffman tree...");
        generate_array_tree_representation(a_p2_docs, input::NUM_TUPLES, a_p2_huffman_array, a_p2_terminator_array, a_p2_tree);
        encoding_dict<int> encoding_dict_docs;
        build_inverse_mapping(a_p2_tree, encoding_dict_docs);
        
        generate_array_tree_representation(a_p2_freqs, input::NUM_TUPLES, a_p2_f_huffman_array, a_p2_f_terminator_array, a_p2_f_tree);
        encoding_dict<unsigned char> encoding_dict_freqs;
        build_inverse_mapping(a_p2_f_tree, encoding_dict_freqs);
        delete a_p2_tree;
        delete a_p2_f_tree;
#endif
        
        // single lists
        show_info("[P2] Generating single lists...");
        a_p2_docs_fragments = new int*[input::T_PM];
        a_p2_freqs_fragments = new unsigned char*[input::T_PM];
        
        index = 0;
        for (int t = 0; t < input::T_PM; ++t)
        {
            int num_docs = pubmed::get_group_by_term(t);
            a_p2_docs_fragments[t] = new int[num_docs];
            a_p2_freqs_fragments[t] = new unsigned char[num_docs];
            
            for (int d = 0; d < num_docs; ++d)
            {
                a_p2_freqs_fragments[t][d] = a_p2_freqs[index];
                a_p2_docs_fragments[t][d] = a_p2_docs[index++];
            }
        }
        
        delete[] a_p2_docs;
        delete[] a_p2_freqs;
        
#ifdef HUFFMAN
        // compress
        show_info("[P2] Compressing...");
        a_p2_docs_fragments_compressed = new char*[input::T_PM];
        a_p2_freqs_fragments_compressed = new char*[input::T_PM];
        
        a_p2_docs_fragments_compressed_bytes = new int[input::T_PM];
        a_p2_freqs_fragments_compressed_bytes = new int[input::T_PM];
        
        for (int t = 0; t < input::T_PM; ++t)
        {
            a_p2_docs_fragments_compressed_bytes[t] = encode(a_p2_docs_fragments[t], pubmed::get_group_by_term(t), a_p2_docs_fragments_compressed[t], encoding_dict_docs);
            delete[] a_p2_docs_fragments[t];
            
            a_p2_freqs_fragments_compressed_bytes[t] = encode(a_p2_freqs_fragments[t], pubmed::get_group_by_term(t), a_p2_freqs_fragments_compressed[t], encoding_dict_freqs);
            delete[] a_p2_freqs_fragments[t];
            
            compressed_bytes_freqs += a_p2_freqs_fragments_compressed_bytes[t];
            compressed_bytes_docs += a_p2_docs_fragments_compressed_bytes[t];
        }
        delete[] a_p2_docs_fragments;
        delete[] a_p2_freqs_fragments;
#endif
        
        show_info("Swapping prevention...");
#ifdef HUFFMAN
        int tmp = 0;
        
        for (int d = 0; d < input::D_PM; ++d)
        {
            for (int t = 0; t < a_p1_terms_fragments_compressed_bytes[d]; ++t)
            {
                tmp = (tmp + a_p1_terms_fragments_compressed[d][t]) % 256;
            }
        }
        
        for (int t = 0; t < input::T_PM; ++t)
        {
            for (int d = 0; d < a_p2_docs_fragments_compressed_bytes[t]; ++d)
            {
                tmp = (tmp + a_p2_docs_fragments_compressed[t][d]) % 256;
            }
            for (int d = 0; d < a_p2_freqs_fragments_compressed_bytes[t]; ++d)
            {
                tmp = (tmp + a_p2_freqs_fragments_compressed[t][d]) % 256;
            }
        }
#else
#ifndef FASTBIT
        int tmp = 0;
        
        for (int d = 0; d < input::D_PM; ++d)
        {
            for (int t = 0; t < pubmed::get_group_by_doc(d); ++t)
            {
                tmp = (tmp + a_p1_terms_fragments[d][t]) % 256;
            }
        }
        
        for (int t = 0; t < input::T_PM; ++t)
        {
            for (int d = 0; d < pubmed::get_group_by_doc(t); ++d)
            {
                tmp = (tmp + a_p2_docs_fragments[t][d] + a_p2_freqs_fragments[t][d]) % 256;
            }
        }
#endif
#endif
        
        show_info("Compressed bytes terms: " << compressed_bytes_terms);
        show_info("Compressed bytes docs: " << compressed_bytes_docs);
        show_info("Compressed bytes freqs: " << compressed_bytes_freqs);
        show_info("DONE.");
    }
    void huffman_query_generate_lists()
    {
        output::start_timer("run/bench_huffman_query_generate");
        
        // generate tuples
        show_info("[1] Generating tuples...");
        show_info("Allocating " << input::NUM_TUPLES << " shorts/chars (array).");
        tuples = new unsigned short[input::NUM_TUPLES];
        tuples_freq = new unsigned char[input::NUM_TUPLES];
        show_info("Alloc success");
        
        long next_index = 0;
        
        for (short t = 0; t < input::T_PM; ++t)
        {
            for (long c = 0; c < pubmed::get_group_by_term(t); ++c)
            {
                if (rand() % 100 < 20) {
                    // 20% probability for < 1024
                    tuples_freq[next_index] = rand();
                } else {
                    tuples_freq[next_index] = rand() % 15;
                }
                
                tuples[next_index++] = t;
            }
            
            if (t % (input::T_PM/1000) == 0) debug_n("  " << t*100.0/input::T_PM << " % complete.    ");
        }
        debug_n("  " << 100 << " % complete.    \n");
        
        // shuffle
        show_info("[2] Shuffling...");
        shuffle(tuples, tuples + input::NUM_TUPLES, default_random_engine(42));
        
        // generate terms per doc
        show_info("[3] Generating separate lists...");
        terms_per_doc = new unsigned short*[input::D_PM];
        terms_per_doc_size = new unsigned short[input::D_PM];
        freqs_per_doc = new unsigned char*[input::D_PM];
        
        next_index = 0;
        for (long d = 0; d < input::D_PM; ++d)
        {
            unsigned short* list = new unsigned short[pubmed::get_group_by_doc(d)];
            memcpy(list, tuples + next_index, pubmed::get_group_by_doc(d) * sizeof(unsigned short));
            terms_per_doc[d] = list;
            
            unsigned char* freqs_list = new unsigned char[pubmed::get_group_by_doc(d)];
            memcpy(freqs_list, tuples_freq + next_index, pubmed::get_group_by_doc(d) * sizeof(unsigned char));
            freqs_per_doc[d] = freqs_list;
            
            terms_per_doc_size[d] = pubmed::get_group_by_doc(d);
            
            if (d % (input::D_PM/1000) == 0) debug_n("  " << d*100.0/input::D_PM << " % complete.    ");
        }
        debug_n("  " << 100 << " % complete.    \n");
        
        
#ifdef s_compress
#ifndef use_fastbit
                // compress with Huffman
                show_info("[4] Generating Huffman tree for terms...");
                terms_per_doc_compressed = new char*[input::D_PM];
                
                generate_array_tree_representation(tuples, input::NUM_TUPLES, huffman_array_terms, terminator_array_terms, tree);
                encoding_dict<unsigned short> encoding_dict_terms;
                build_inverse_mapping(tree, encoding_dict_terms);
                
                delete(tuples);
                
                show_info("[5] Compressing terms...");
                for (long d = 0; d < input::D_PM; ++d)
                {
                    char* terms_compressed;
                    terms_bytes_uncompressed += terms_per_doc_size[d] * sizeof(unsigned short);
                    terms_bytes_compressed += encode(terms_per_doc[d], terms_per_doc_size[d], terms_compressed, encoding_dict_terms);
                    terms_per_doc_compressed[d] = terms_compressed;
                    delete terms_per_doc[d];
                    
                    if (d % (input::D_PM/1000) == 0) debug_n("  " << d*100.0/input::D_PM << " % complete.    ");
                }
                debug_n("  " << 100 << " % complete.    \n");
#else
                // compress with FastBit
                show_info("[4] Compressing terms with bit vector...");
                terms_per_doc_bitvector = new ibis::bitvector*[input::D_PM];
                
                delete tuples;
                
                for (long d = 0; d < input::D_PM; ++d)
                {
                    ibis::bitvector* terms_compressed = new ibis::bitvector();
                    terms_bytes_uncompressed += terms_per_doc_size[d] * sizeof(unsigned short);
                    
                    for (long term_index = 0; term_index < terms_per_doc_size[d]; ++ term_index)
                    {
                        terms_compressed->setBit(terms_per_doc[d][term_index], 1);
                    }
                    
                    
                    terms_compressed->compress();
                    // force new allocate
                    ibis::array_t<uint32_t>* arr = new ibis::array_t<uint32_t>();
                    terms_compressed->write(*arr);
                    delete terms_compressed;
                    terms_compressed = new ibis::bitvector(*arr);
                    delete arr;
                    
                    delete terms_per_doc[d];
                    
                    terms_per_doc_bitvector[d] = terms_compressed;
                    terms_bytes_compressed += terms_compressed->bytes();
                    
                    
                    if (d % (input::D_PM/1000) == 0)
                    {
                       debug_n("  " << d*100.0/input::D_PM << " % complete. Using " << terms_bytes_compressed << " / " << terms_bytes_uncompressed << " bytes.   ");
                    }
                }
                
                debug_n("  " << 100 << " % complete.    \n");
                
                show_info("[5] n/a");
#endif
            
            // compress
            show_info("[6] Generating Huffman tree for frequencies...");
            freqs_per_doc_compressed = new char*[input::D_PM];

            generate_array_tree_representation(tuples_freq, input::NUM_TUPLES, huffman_array_freqs, terminator_array_freqs, tree_freqs);
            encoding_dict<unsigned char> encoding_dict_freqs;
            build_inverse_mapping(tree_freqs, encoding_dict_freqs);
            
            delete(tuples_freq);
            
            show_info("[7] Compressing frequencies...");
            for (long d = 0; d < input::D_PM; ++d)
            {
                char* freqs_compressed;
                freqs_bytes_uncompressed += terms_per_doc_size[d] * sizeof(unsigned char);
                freqs_bytes_compressed += encode(freqs_per_doc[d], terms_per_doc_size[d], freqs_compressed, encoding_dict_freqs);
                freqs_per_doc_compressed[d] = freqs_compressed;
                delete freqs_per_doc[d];
                
                if (d % (input::D_PM/1000) == 0) debug_n("  " << d*100.0/input::D_PM << " % complete.    ");
            }
            debug_n("  " << 100 << " % complete.    \n");
            
            delete freqs_per_doc;
            delete terms_per_doc;
            
            show_info("terms bytes uncompressed: " << terms_bytes_uncompressed);
            show_info("terms bytes compressed: " << terms_bytes_compressed);
            show_info("freqs bytes uncompressed: " << freqs_bytes_uncompressed);
            show_info("freqs bytes compressed: " << freqs_bytes_compressed);
#else
            delete(tuples);
            delete(tuples_freq);
            show_info("No compression.");
#endif
        
        exact_docs_a = input::docs_bench_items();
        
        output::stop_timer("run/bench_huffman_query_generate");
        
        show_info("Done.");
    }
 /** @param head The linked list's head.
     Note that the head is guaranteed to be not null, so it contains at least one node. */
 Solution(ListNode* head) {
     this->head = head;
     generator = default_random_engine();
 }
	default_random_engine randomizer::chance()
	{
		random_device rd;
		return default_random_engine(rd());
	}
namespace Diehard
{
    default_random_engine ProblemTweet::random_engine = default_random_engine(random_device()());
    
    ProblemTweet::ProblemTweet()
    : ProblemDot(*GetRandomCapacities()) {}
    
    shared_ptr<list<string> > ProblemTweet::GetTweets() const
    {
        list<const Node*> nodes_route;
        auto node_goal = GetGoalNodeRandomly();
        if (!node_goal) return nullptr;
        
        auto node_ptr = node_goal;
        while (node_ptr)
        {
            nodes_route.push_front(node_ptr);
            if (node_ptr && node_ptr->GetSum() == 0) break;
            node_ptr = GetFromNodeRandomly(node_ptr);
        }
        
        auto tweets = shared_ptr<list<string> >(new list<string>());
        {
            stringstream ss;
            ss << "Capacities: " << GetName();
            ss << " -> ";
            ss << "Request: " << goal_sum;
            tweets->push_back(ss.str());
        }
        for_each(nodes_route.begin(), nodes_route.end(), [&](const Node* node)
                 {
                     stringstream ss;
                     ss << "Bucket: " << node->GetName(false);
                     tweets->push_back(ss.str());
                 });
        {
            stringstream ss;
            ss << "Final result: ";
            for (Dimention d = 0; d < capacities.size(); d++)
            {
                if (d != 0) ss << "+";
                ss << node_goal->volumes[d];
            }
            ss << "=" << node_goal->GetSum();
            tweets->push_back(ss.str());
        }
        
        return tweets;
    }
    
#pragma mark Random generator
    
    const Node* ProblemTweet::GetGoalNodeRandomly() const
    {
        list<const Node*> nodes_goal;
        for (auto& node : nodes)
        {
            if (
                node.is_used &&
                node.cost < Node::cost_max &&
                node.GetSum() == goal_sum)
                nodes_goal.push_back(&node);
        }
        
        return GetLowestCostRandomly(nodes_goal);
    }
    
    const Node* ProblemTweet::GetFromNodeRandomly(const Node* node)
    {
        list<const Node*> nodes_from(node->from.begin(), node->from.end());
        return GetLowestCostRandomly(nodes_from);
    }
    
    const Node* ProblemTweet::GetLowestCostRandomly(const std::list<const Node*>& nodes)
    {
        Node::Cost cost_min = Node::cost_max;
        for_each(nodes.begin(), nodes.end(), [&](const Node* node)
                 {
                     if (node->cost < cost_min) cost_min = node->cost;
                 });
        
        vector<const Node*> nodes_lowcost;
        for_each(nodes.begin(), nodes.end(), [&](const Node* node)
                 {
                     if (node->cost == cost_min) nodes_lowcost.push_back(node);
                 });
        
        if (nodes_lowcost.empty())
        {
            return nullptr;
        }
        else
        {
            uniform_int_distribution<size_t> dist_idx(0, nodes_lowcost.size() - 1);
            return nodes_lowcost[dist_idx(random_engine)];
        }
    }
    
    shared_ptr<vector<Volume> > ProblemTweet::GetRandomCapacities()
    {
        uniform_int_distribution<Volume> dist_capacity(2, 200);
        uniform_int_distribution<Dimention> dist_dimention(2, 4);
        auto rand_capacity = bind(dist_capacity, random_engine);
        auto dimention = dist_dimention(random_engine);
        
        shared_ptr<vector<Volume> > capacities(new vector<Volume>(dimention));
        for (Dimention i = 0; i < dimention; i++)
        {
            (*capacities)[i] = rand_capacity();
        }
        
        return capacities;
    }
    
    Volume ProblemTweet::GetRandomGoal()
    {
        Volume sum = 0;
        for_each(capacities.begin(), capacities.end(), [&](Volume v) { sum += v; });
        
        uniform_int_distribution<Volume> dist_goal(1, sum);
        return dist_goal(random_engine);
    }
    
}
 void generate_tuples_q5_omc()
 {
     // scale
     int index = 0;
     
     show_info("[1] Generating terms per doc fragments...");
     t_terms_per_doc = new int[input::NUM_TUPLES];
     
     for (int term = 0; term < input::T_PM; ++term)
     {
         for (int i = 0; i < pubmed::get_group_by_term(term); ++i)
         {
             t_terms_per_doc[index++] = term;
         }
     }
     
     shuffle(t_terms_per_doc, t_terms_per_doc + input::NUM_TUPLES, default_random_engine(42));
     
     // split
     index = 0;
     t_terms_per_doc_docs = new rle_tuple[input::D_PM];
     for (int doc = 0; doc < input::D_PM; ++doc)
     {
         t_terms_per_doc_docs[doc].row_id = index;
         t_terms_per_doc_docs[doc].length = pubmed::get_group_by_doc(doc);
         t_terms_per_doc_docs[doc].id = doc;
         index += pubmed::get_group_by_doc(doc);
     }
     
     show_info("[2] Generating docs per term fragments...");
     t_docs_per_term = new int[input::NUM_TUPLES];
     index = 0;
     
     for (int doc = 0; doc < input::D_PM; ++doc)
     {
         for (int i = 0; i < pubmed::get_group_by_doc(doc); ++i)
         {
             t_docs_per_term[index++] = doc;
         }
     }
     
     shuffle(t_docs_per_term, t_docs_per_term + input::NUM_TUPLES, default_random_engine(42));
     
     // split
     index = 0;
     t_docs_per_term_terms = new rle_tuple[input::T_PM];
     for (int term = 0; term < input::T_PM; ++term)
     {
         t_docs_per_term_terms[term].row_id = index;
         t_docs_per_term_terms[term].length = pubmed::get_group_by_term(term);
         t_docs_per_term_terms[term].id = term;
         index += pubmed::get_group_by_term(term);
     }
     
     show_info("[3] Generate authors per doc fragments...");
     t_authors_per_doc = new int[input::NUM_TUPLES_DA];
     index = 0;
     
     for (int author = 0; author < input::A_PM; ++author)
     {
         for (int i = 0; i < pubmed::get_DA_group_by_author(author); ++i)
         {
             t_authors_per_doc[index++] = author;
         }
     }
     
     shuffle(t_authors_per_doc, t_authors_per_doc + input::NUM_TUPLES_DA, default_random_engine(42));
     
     // split
     index = 0;
     t_authors_per_doc_docs = new rle_tuple[input::D_PM];
     for (int doc = 0; doc < input::D_PM; ++doc)
     {
         t_authors_per_doc_docs[doc].row_id = index;
         t_authors_per_doc_docs[doc].length = pubmed::get_DA_group_by_doc(doc);
         t_authors_per_doc_docs[doc].id = doc;
         index += pubmed::get_DA_group_by_doc(doc);
     }
     
     show_info("[4] Generate docs per author fragments...");
     t_docs_per_author = new int[input::NUM_TUPLES_DA];
     index = 0;
     
     for (int doc = 0; doc < input::D_PM; ++doc)
     {
         for (int i = 0; i < pubmed::get_DA_group_by_doc(doc); ++i)
         {
             t_docs_per_author[index++] = doc;
         }
     }
     
     shuffle(t_docs_per_author, t_docs_per_author + input::NUM_TUPLES_DA, default_random_engine(42));
     
     // split
     index = 0;
     t_docs_per_author_authors = new rle_tuple[input::A_PM];
     for (int author = 0; author < input::A_PM; ++author)
     {
         t_docs_per_author_authors[author].row_id = index;
         t_docs_per_author_authors[author].length = pubmed::get_DA_group_by_author(author);
         t_docs_per_author_authors[author].id = author;
         index += pubmed::get_DA_group_by_author(author);
     }
     
     show_info("[5] Generating years per doc...");
     year_doc = new int[input::D_PM];
     for (int i = 0; i < input::D_PM; ++i)
     {
         year_doc[i] = rand() % 100 + 1915;
     }
 }
    void generate_random_tuples()
    {
        debug("Generating " << input::NUM_TUPLES / TUPLES_DIVIDER << " random tuples.");
        
        output::start_timer("run/top_k_column_db_tf_in_documents_generate_random");
        
        if (TUPLES_DIVIDER > 1)
        {
            show_info("Running benchmark with " << input::NUM_TUPLES/TUPLES_DIVIDER << " instead of " << input::NUM_TUPLES << " tuples.");
        }

        c_term = new unsigned short[input::NUM_TUPLES/TUPLES_DIVIDER];
        debug("c_term alloc success");
        c_doc = new unsigned int[input::NUM_TUPLES/TUPLES_DIVIDER];
        debug("c_doc alloc success");
        c_freq = new unsigned char[input::NUM_TUPLES/TUPLES_DIVIDER];
        debug("c_freq alloc success");
        
        // offsets define where cluster of same items begins 
        term_offsets = new long[input::T_PM + 1];
        for (DOMAIN_TYPE i = 0; i < input::T_PM + 1; ++i) term_offsets[i] = 20000000000L;
        
        doc_offsets = new long[input::D_PM + 1];
        for (DOMAIN_TYPE i = 0; i < input::D_PM + 1; ++i) doc_offsets[i] = 200000000000L;

        int next_index = 0;
        for (long term = 0; term < input::T_PM; ++term)
        {
            long times = MAX(1, pubmed::get_group_by_term(term) / TUPLES_DIVIDER);
            term_offsets[term] = next_index;
            for (int i = 0; i < times; ++i)
            {
                if (next_index < input::NUM_TUPLES / TUPLES_DIVIDER)
                {
                    c_freq[next_index] = rand() % input::b_MAX_FREQUENCY;
                    c_term[next_index++] = term;
                }
            }
            
        }

        if (sorted_by_term == S_UNOPTIMIZED)
        {
            shuffle(c_term, c_term + input::NUM_TUPLES / TUPLES_DIVIDER, default_random_engine(42));
        }
        debug("Done generating terms and frequencies.");

        next_index = 0;
        for (long doc = 0; doc < input::D_PM; ++doc)
        {
            long times = MAX(1, pubmed::get_group_by_doc(doc) / TUPLES_DIVIDER);
            doc_offsets[doc] = next_index;
            for (int i = 0; i < times; ++i)
            {
                if (next_index < input::NUM_TUPLES / TUPLES_DIVIDER)
                {
                    c_doc[next_index++] = doc;
                }
            }
        }

        if (sorted_by_doc == S_UNOPTIMIZED)
        {
            shuffle(c_doc, c_doc + input::NUM_TUPLES / TUPLES_DIVIDER, default_random_engine(42));
        }
        debug("Done generating documents.");

        output::stop_timer("run/top_k_column_db_tf_in_documents_generate_random");
    }