/** * Do a given number of zipfian reads on a cog. * * @param cog - the given cog * @param alpha - zipfian rate of decay * @param number - number of reads to do on a cog * @param range - the key range for reads * @return the resulting BTree */ struct cog *zipfianReads(struct cog *cog, double alpha, long number, long range) { for (long i = 0; i < number; i++) { long a = zipf(alpha, range); long b = zipf(alpha, range); long low = a <= b ? a : b; long high = a > b ? a : b; cog = crack(cog, low, high); } return cog; }
int main(int argc, char *argv[]) { int k = 100000; int exp = 1; // double c0 = 2, c1 = 3, c2 = 1; // double c0 = 1.3, c1 = 8, c2 = 1.5; std::CommandLine cmd; cmd.AddValue ("exp", "", exp); cmd.Parse (argc, argv); // Set sample size for test std::vector<int> testN; long n = 1000; for ( int i = 0; i < 15; i++ ) { n *= 2; } testN.push_back( n ); // Set distribution for test std::vector<double> p; switch(exp) { case 0: p = uniform(k); break; case 1: p = zipf(k); break; case 2: p = zipfd5(k); break; case 3: p = mixgeozipf(k); break; } // Set estimator Entropy entropy( k ); entropy.setDegree( 18 ); entropy.setInterval( 40 ); entropy.setThreshold( 18 ); printf("Alphabet size=%d.\n", entropy.getAlphabetSize()); printf("Polynoimal degree=%d.\n", entropy.getDegree()); printf("Approximation interval=[0,%.2f/n].\n", entropy.getInterval()); printf("Plug-in threshold=%d.\n",(int)floor(entropy.getThreshold())+1); printf("Unit: bits\n"); // TEST_fixed_P(p, entropy, testN); const int trials = 50; TEST_fixed_P_RMSE(p, entropy, testN, trials); return 0; }
//Retorna se um evento acontece ou não, pela probabilidade Zipf //u = quantidade de universidades, i = Ui, p = tamanho do vetor a ser preenchido com as probabilidades void DistribZipf(int vet[], int u, int i, int p) { //Aloca vetor de u posiçoes double rand_prob; int size = i-p+1; int j; double *probs = (double *) malloc(size*sizeof(double)); //cria vetor para cópia dos valores não ordenados if (probs == NULL) exit(1); //Prenche vetor com a probabilidade de cada i int k; for (k=0;k<u;k++){ probs[k]=zipf(u,i); } //Gera uma probabilidade aleatória entre 0 e 1 //Ordena vetor de double //verifica em área a probabilidade gerada se encaixa //Gera p números for (j=1;j<p;j++){ rand_prob = (double)rand()/(double)RAND_MAX; for (k=1;k<u;k++){ if (rand_prob < probs[k]) vet[k] = 1; else vet[k] = 0; } } //Verifica se há repetição //Vetor com as probabilidades free(probs); ////-------------------/// if (rand_prob >= probs[k]) return; return ; }
BaseQuery * YCSBQueryGenerator::gen_requests_zipf(uint64_t home_partition_id, Workload * h_wl) { YCSBQuery * query = (YCSBQuery*) mem_allocator.alloc(sizeof(YCSBQuery)); new(query) YCSBQuery(); query->requests.init(g_req_per_query); uint64_t access_cnt = 0; set<uint64_t> all_keys; set<uint64_t> partitions_accessed; uint64_t table_size = g_synth_table_size / g_part_cnt; double r_twr = (double)(mrand->next() % 10000) / 10000; int rid = 0; for (UInt32 i = 0; i < g_req_per_query; i ++) { double r = (double)(mrand->next() % 10000) / 10000; uint64_t partition_id; if ( FIRST_PART_LOCAL && rid == 0) { partition_id = home_partition_id;; } else { partition_id = mrand->next() % g_part_cnt; if(g_strict_ppt && g_part_per_txn <= g_part_cnt) { while( (partitions_accessed.size() < g_part_per_txn && partitions_accessed.count(partition_id) > 0) || (partitions_accessed.size() == g_part_per_txn && partitions_accessed.count(partition_id) == 0)) { partition_id = mrand->next() % g_part_cnt; } } } ycsb_request * req = (ycsb_request*) mem_allocator.alloc(sizeof(ycsb_request)); if (r_twr < g_txn_read_perc || r < g_tup_read_perc) req->acctype = RD; else req->acctype = WR; uint64_t row_id = zipf(table_size - 1, g_zipf_theta);; assert(row_id < table_size); uint64_t primary_key = row_id * g_part_cnt + partition_id; assert(primary_key < g_synth_table_size); req->key = primary_key; req->value = mrand->next() % (1<<8); // Make sure a single row is not accessed twice if (all_keys.find(req->key) == all_keys.end()) { all_keys.insert(req->key); access_cnt ++; } else { // Need to have the full g_req_per_query amount i--; continue; } partitions_accessed.insert(partition_id); rid ++; query->requests.add(req); } assert(query->requests.size() == g_req_per_query); // Sort the requests in key order. if (g_key_order) { for(uint64_t i = 0; i < query->requests.size(); i++) { for(uint64_t j = query->requests.size() - 1; j > i ; j--) { if(query->requests[j]->key < query->requests[j-1]->key) { query->requests.swap(j,j-1); } } } //std::sort(query->requests.begin(),query->requests.end(),[](ycsb_request lhs, ycsb_request rhs) { return lhs.key < rhs.key;}); } query->partitions.init(partitions_accessed.size()); for(auto it = partitions_accessed.begin(); it != partitions_accessed.end(); ++it) { query->partitions.add(*it); } //query->print(); return query; }
int spec_file() { return FILE_ORDER[ zipf(FILE_ZIPF) ]; }
int spec_dir() { return zipf(DIR_ZIPF); }
int main(int argc, char *argv[]) { int i, which = 0, cnt = 100, trunc = 0, verbose = 0, weight = 1; float alpha = 0, k = 0, value = 0, scale = 1.0, limit = 0; float total = 0, median = -1, min = 0, max = 0; for (i=1 ; i<argc ; i++) { if (strcmp(argv[i], "-zipf") == 0) { which = DIST_ZIPF; if (++i >= argc || *argv[i] == '-') usage(); alpha = atof(argv[i]); if (verbose) { fprintf(stderr, "zipf(a=%0.2f)\n", alpha); } } else if (strcmp(argv[i], "-pareto") == 0) { which = DIST_PARETO; if (++i >= argc || *argv[i] == '-') usage(); alpha = atof(argv[i]); if (++i >= argc || *argv[i] == '-') usage(); k = atof(argv[i]); if (verbose) { fprintf(stderr, "pareto(a=%0.2f, k=%0.2f)\n", alpha, k); } } else if (strcmp(argv[i], "-uniform") == 0) { which = DIST_UNIFORM; if (verbose) { fprintf(stderr, "uniform()\n"); } } else if (strcmp(argv[i], "-cnt") == 0) { if (++i >= argc || *argv[i] == '-') usage(); cnt = atoi(argv[i]); if (verbose) { fprintf(stderr, "cnt=%d\n", cnt); } } else if (strcmp(argv[i], "-scale") == 0) { if (++i >= argc || *argv[i] == '-') usage(); scale = atof(argv[i]); if (verbose) { fprintf(stderr, "scale=%0.2f\n", scale); } } else if (strcmp(argv[i], "-limit") == 0) { if (++i >= argc || *argv[i] == '-') usage(); limit = atof(argv[i]); if (verbose) { fprintf(stderr, "limit=%0.2f\n", limit); } } else if (strcmp(argv[i], "-trunc") == 0) { trunc = 1; if (verbose) { fprintf(stderr, "truncating values\n"); } } else if (strcmp(argv[i], "-weight") == 0) { if (++i >= argc || *argv[i] == '-') usage(); weight = atoi(argv[i]); if (verbose) { fprintf(stderr, "weight=%d\n", weight); } } else if (strcmp(argv[i], "-v") == 0) { verbose = 1; if (verbose) { fprintf(stderr, "verbose output\n"); } } else { fprintf(stderr, "unknown option \"%s\"\n", argv[i]); usage(); } } for (i=1 ; i<=cnt ; i++) { switch (which) { case DIST_ZIPF: value = zipf(i, alpha); break; case DIST_PARETO: value = pareto(i, alpha, k); break; case DIST_UNIFORM: value = uniform(i, cnt); break; default: usage(); break; } /* * scale value. */ value = value * scale; /* * optionally truncate values exceeding limit. */ if (limit && value > limit) { value = limit; } /* * optionally truncate values to integers. */ if (trunc) { value = (int)value; } /* * print value with proper decimal format. */ if (trunc) { printf("%d\t%d\n", (int)value, weight); } else { printf("%0.2f\t%d\n", value, weight); } /* * statistics: max, median, min, and total (for avg). */ if (i == 1) { max = value; } if (i == cnt / 2) { median = value; } if (i == cnt) { min = value; } total += value; } if (verbose) { fprintf(stderr, "total = %0.2f\n", total); fprintf(stderr, "min = %0.2f (%0.2f%% of total)\n", min, min / total * 100.0); fprintf(stderr, "max = %0.2f (%0.2f%% of total)\n", max, max / total * 100.0); fprintf(stderr, "average = %0.2f (%0.2f%% of total)\n", total/(float)cnt, (total/(float)cnt) / total * 100.0); fprintf(stderr, "median = %0.2f (%0.2f%% of total)\n", median, median / total * 100.0); } return 0; }
//===== Main program ======================================================== void main(void) { FILE *fp; // File pointer to output file char file_name[256]; // Output file name string char temp_string[256]; // Temporary string variable double alpha; // Alpha parameter double n; // N parameter int num_values; // Number of values int zipf_rv; // Zipf random variable int i; // Loop counter // Output banner printf("---------------------------------------- genzipf.c ----- \n"); printf("- Program to generate Zipf random variables - \n"); printf("-------------------------------------------------------- \n"); // Prompt for output filename and then create/open the file printf("Output file name ===================================> "); scanf("%s", file_name); fp = fopen(file_name, "w"); if (fp == NULL) { printf("ERROR in creating output file (%s) \n", file_name); exit(1); } // Prompt for random number seed and then use it printf("Random number seed (greater than 0) ================> "); scanf("%s", temp_string); rand_val((int) atoi(temp_string)); // Prompt for alpha value printf("Alpha value ========================================> "); scanf("%s", temp_string); alpha = atof(temp_string); // Prompt for N value printf("N value ============================================> "); scanf("%s", temp_string); n = atoi(temp_string); // Prompt for number of values to generate printf("Number of values to generate =======================> "); scanf("%s", temp_string); num_values = atoi(temp_string); // Output "generating" message printf("-------------------------------------------------------- \n"); printf("- Generating samples to file - \n"); printf("-------------------------------------------------------- \n"); // Generate and output zipf random variables for (i=0; i<num_values; i++) { zipf_rv = zipf(alpha, n); fprintf(fp, "%d \n", zipf_rv); } // Output "done" message and close the output file printf("-------------------------------------------------------- \n"); printf("- Done! \n"); printf("-------------------------------------------------------- \n"); fclose(fp); }
void ycsb_query::gen_requests(uint64_t thd_id, workload * h_wl) { #if CC_ALG == HSTORE assert(g_virtual_part_cnt == g_part_cnt); #endif int access_cnt = 0; set<uint64_t> all_keys; part_num = 0; double r = 0; int64_t rint64 = 0; drand48_r(&_query_thd->buffer, &r); lrand48_r(&_query_thd->buffer, &rint64); if (r < g_perc_multi_part) { for (UInt32 i = 0; i < g_part_per_txn; i++) { if (i == 0 && FIRST_PART_LOCAL) part_to_access[part_num] = thd_id % g_virtual_part_cnt; else { part_to_access[part_num] = rint64 % g_virtual_part_cnt; } UInt32 j; for (j = 0; j < part_num; j++) if ( part_to_access[part_num] == part_to_access[j] ) break; if (j == part_num) part_num ++; } } else { part_num = 1; if (FIRST_PART_LOCAL) part_to_access[0] = thd_id % g_part_cnt; else part_to_access[0] = rint64 % g_part_cnt; } int rid = 0; for (UInt32 tmp = 0; tmp < g_req_per_query; tmp ++) { double r; drand48_r(&_query_thd->buffer, &r); ycsb_request * req = &requests[rid]; if (r < g_read_perc) { req->rtype = RD; } else if (r >= g_read_perc && r <= g_write_perc + g_read_perc) { req->rtype = WR; } else { req->rtype = SCAN; req->scan_len = SCAN_LEN; } // the request will access part_id. uint64_t ith = tmp * part_num / g_req_per_query; uint64_t part_id = part_to_access[ ith ]; uint64_t table_size = g_synth_table_size / g_virtual_part_cnt; uint64_t row_id = zipf(table_size - 1, g_zipf_theta); assert(row_id < table_size); uint64_t primary_key = row_id * g_virtual_part_cnt + part_id; req->key = primary_key; int64_t rint64; lrand48_r(&_query_thd->buffer, &rint64); req->value = rint64 % (1<<8); // Make sure a single row is not accessed twice if (req->rtype == RD || req->rtype == WR) { if (all_keys.find(req->key) == all_keys.end()) { all_keys.insert(req->key); access_cnt ++; } else continue; } else { bool conflict = false; for (UInt32 i = 0; i < req->scan_len; i++) { primary_key = (row_id + i) * g_part_cnt + part_id; if (all_keys.find( primary_key ) != all_keys.end()) conflict = true; } if (conflict) continue; else { for (UInt32 i = 0; i < req->scan_len; i++) all_keys.insert( (row_id + i) * g_part_cnt + part_id); access_cnt += SCAN_LEN; } } rid ++; } request_cnt = rid; // Sort the requests in key order. if (g_key_order) { for (int i = request_cnt - 1; i > 0; i--) for (int j = 0; j < i; j ++) if (requests[j].key > requests[j + 1].key) { ycsb_request tmp = requests[j]; requests[j] = requests[j + 1]; requests[j + 1] = tmp; } for (UInt32 i = 0; i < request_cnt - 1; i++) assert(requests[i].key < requests[i + 1].key); } }