Beispiel #1
0
/**
 * Do a given number of zipfian reads on a cog.
 *
 * @param cog - the given cog
 * @param alpha - zipfian rate of decay
 * @param number - number of reads to do on a cog
 * @param range - the key range for reads
 * @return the resulting BTree
 */
struct cog *zipfianReads(struct cog *cog, double alpha, long number, long range) {
  for (long i = 0; i < number; i++) {
    long a = zipf(alpha, range);
    long b = zipf(alpha, range);
    long low = a <= b ? a : b;
    long high = a > b ? a : b;
    cog = crack(cog, low, high);
  }
  return cog;
}
Beispiel #2
0
int main(int argc, char *argv[])
{
    int k = 100000;
    int exp = 1;
    // double c0 = 2, c1 = 3, c2 = 1;
    // double c0 = 1.3, c1 = 8, c2 = 1.5;
    
    std::CommandLine cmd;
    cmd.AddValue ("exp",  "", exp);
    cmd.Parse (argc, argv);
    

    // Set sample size for test
    std::vector<int> testN;
    long n = 1000;
    for ( int i = 0; i < 15; i++ )
    {
        n *= 2;
    }
    testN.push_back( n );

    // Set distribution for test
    std::vector<double> p;
    switch(exp)
    {
    case 0: p = uniform(k); break;
    case 1: p = zipf(k); break;
    case 2: p = zipfd5(k); break;
    case 3: p = mixgeozipf(k); break;

    }
    // Set estimator
    Entropy entropy( k );
    entropy.setDegree( 18 );
    entropy.setInterval( 40 );
    entropy.setThreshold( 18 ); 
    printf("Alphabet size=%d.\n", entropy.getAlphabetSize());
    printf("Polynoimal degree=%d.\n", entropy.getDegree());
    printf("Approximation interval=[0,%.2f/n].\n", entropy.getInterval());
    printf("Plug-in threshold=%d.\n",(int)floor(entropy.getThreshold())+1);
    printf("Unit: bits\n");

    // TEST_fixed_P(p, entropy, testN);

    const int trials = 50;
    TEST_fixed_P_RMSE(p, entropy, testN, trials);
    
    return 0;
}
Beispiel #3
0
//Retorna se um evento acontece ou não, pela probabilidade Zipf
//u = quantidade de universidades, i = Ui, p = tamanho do vetor a ser preenchido com as probabilidades
void DistribZipf(int vet[], int u, int i, int p) {

    //Aloca vetor de u posiçoes
    double rand_prob;
    int size = i-p+1;
    int j;
    double *probs = (double *) malloc(size*sizeof(double)); //cria vetor para cópia dos valores não ordenados
    if (probs == NULL) exit(1);

    //Prenche vetor com a probabilidade de cada i
    int k;
    for (k=0;k<u;k++){
        probs[k]=zipf(u,i);
    }

    //Gera uma probabilidade aleatória entre 0 e 1


    //Ordena vetor de double

    //verifica em área a probabilidade gerada se encaixa
    //Gera p números
    for (j=1;j<p;j++){
        rand_prob = (double)rand()/(double)RAND_MAX;
        for (k=1;k<u;k++){
            if (rand_prob < probs[k]) vet[k] = 1;
            else vet[k] = 0;
        }
    }

    //Verifica se há repetição

    //Vetor com as probabilidades

    free(probs);

    ////-------------------///




    if (rand_prob >= probs[k]) return;

    return ;
}
Beispiel #4
0
BaseQuery * YCSBQueryGenerator::gen_requests_zipf(uint64_t home_partition_id, Workload * h_wl) {
  YCSBQuery * query = (YCSBQuery*) mem_allocator.alloc(sizeof(YCSBQuery));
  new(query) YCSBQuery();
  query->requests.init(g_req_per_query);

	uint64_t access_cnt = 0;
	set<uint64_t> all_keys;
	set<uint64_t> partitions_accessed;
  uint64_t table_size = g_synth_table_size / g_part_cnt;

  double r_twr = (double)(mrand->next() % 10000) / 10000;		

	int rid = 0;
	for (UInt32 i = 0; i < g_req_per_query; i ++) {		
		double r = (double)(mrand->next() % 10000) / 10000;		
    uint64_t partition_id;
    if ( FIRST_PART_LOCAL && rid == 0) {
      partition_id = home_partition_id;;
    } else {
      partition_id = mrand->next() % g_part_cnt;
        if(g_strict_ppt && g_part_per_txn <= g_part_cnt) {
          while( (partitions_accessed.size() < g_part_per_txn &&  partitions_accessed.count(partition_id) > 0) || 
              (partitions_accessed.size() == g_part_per_txn &&  partitions_accessed.count(partition_id) == 0)) {
            partition_id = mrand->next() % g_part_cnt;
          }
        }
    }
		ycsb_request * req = (ycsb_request*) mem_allocator.alloc(sizeof(ycsb_request));
		if (r_twr < g_txn_read_perc || r < g_tup_read_perc) 
			req->acctype = RD;
		else
			req->acctype = WR;
    uint64_t row_id = zipf(table_size - 1, g_zipf_theta);; 
		assert(row_id < table_size);
		uint64_t primary_key = row_id * g_part_cnt + partition_id;
    assert(primary_key < g_synth_table_size);

		req->key = primary_key;
		req->value = mrand->next() % (1<<8);
		// Make sure a single row is not accessed twice
		if (all_keys.find(req->key) == all_keys.end()) {
			all_keys.insert(req->key);
			access_cnt ++;
		} else {
      // Need to have the full g_req_per_query amount
      i--;
      continue;
    }
    partitions_accessed.insert(partition_id);
		rid ++;

    query->requests.add(req);
	}
  assert(query->requests.size() == g_req_per_query);
	// Sort the requests in key order.
	if (g_key_order) {
    for(uint64_t i = 0; i < query->requests.size(); i++) {
      for(uint64_t j = query->requests.size() - 1; j > i ; j--) {
        if(query->requests[j]->key < query->requests[j-1]->key) {
          query->requests.swap(j,j-1);
        }
      }
    }
    //std::sort(query->requests.begin(),query->requests.end(),[](ycsb_request lhs, ycsb_request rhs) { return lhs.key < rhs.key;});
	}
  query->partitions.init(partitions_accessed.size());
  for(auto it = partitions_accessed.begin(); it != partitions_accessed.end(); ++it) {
    query->partitions.add(*it);
  }

  //query->print();
  return query;

}
Beispiel #5
0
int spec_file() {
  return FILE_ORDER[ zipf(FILE_ZIPF) ];
}
Beispiel #6
0
int spec_dir() {
  return zipf(DIR_ZIPF);
}
Beispiel #7
0
int
main(int argc, char *argv[])
{
	int i, which = 0, cnt = 100, trunc = 0, verbose = 0, weight = 1;
	float alpha = 0, k = 0, value = 0, scale = 1.0, limit = 0;
	float total = 0, median = -1, min = 0, max = 0;

	for (i=1 ; i<argc ; i++) {
		if (strcmp(argv[i], "-zipf") == 0) {
			which = DIST_ZIPF;
			if (++i >= argc || *argv[i] == '-') usage();
			alpha = atof(argv[i]);
			if (verbose) {
				fprintf(stderr, "zipf(a=%0.2f)\n",
					alpha);
			}
		}
		else if (strcmp(argv[i], "-pareto") == 0) {
			which = DIST_PARETO;
			if (++i >= argc || *argv[i] == '-') usage();
			alpha = atof(argv[i]);
			if (++i >= argc || *argv[i] == '-') usage();
			k = atof(argv[i]);
			if (verbose) {
				fprintf(stderr, "pareto(a=%0.2f, k=%0.2f)\n",
					alpha, k);
			}
		}
		else if (strcmp(argv[i], "-uniform") == 0) {
			which = DIST_UNIFORM;
			if (verbose) {
				fprintf(stderr, "uniform()\n");
			}
		}
		else if (strcmp(argv[i], "-cnt") == 0) {
			if (++i >= argc || *argv[i] == '-') usage();
			cnt = atoi(argv[i]);
			if (verbose) {
				fprintf(stderr, "cnt=%d\n", cnt);
			}
		}
		else if (strcmp(argv[i], "-scale") == 0) {
			if (++i >= argc || *argv[i] == '-') usage();
			scale = atof(argv[i]);
			if (verbose) {
				fprintf(stderr, "scale=%0.2f\n", scale);
			}			
		}
		else if (strcmp(argv[i], "-limit") == 0) {
			if (++i >= argc || *argv[i] == '-') usage();
			limit = atof(argv[i]);
			if (verbose) {
				fprintf(stderr, "limit=%0.2f\n", limit);
			}			
		}
		else if (strcmp(argv[i], "-trunc") == 0) {
			trunc = 1;
			if (verbose) {
				fprintf(stderr, "truncating values\n");
			}			
		}
		else if (strcmp(argv[i], "-weight") == 0) {
			if (++i >= argc || *argv[i] == '-') usage();
			weight = atoi(argv[i]);
			if (verbose) {
				fprintf(stderr, "weight=%d\n", weight);
			}			
		}
		else if (strcmp(argv[i], "-v") == 0) {
			verbose = 1;
			if (verbose) {
				fprintf(stderr, "verbose output\n");
			}			
		}
		else {
			fprintf(stderr, "unknown option \"%s\"\n", argv[i]);
			usage();
		}
	}
	
	for (i=1 ; i<=cnt ; i++) {
		switch (which) {
		case DIST_ZIPF:
			value = zipf(i, alpha);
			break;
		case DIST_PARETO:
			value = pareto(i, alpha, k);
			break;
		case DIST_UNIFORM:
			value = uniform(i, cnt);
			break;
		default:
			usage();
			break;
		}

		/*
		 * scale value.
		 */
		value = value * scale;

		/*
		 * optionally truncate values exceeding limit.
		 */
		if (limit && value > limit) {
			value = limit;
		}

		/*
		 * optionally truncate values to integers.
		 */
		if (trunc) {
			value = (int)value;
		}

		/*
		 * print value with proper decimal format.
		 */
		if (trunc) {
			printf("%d\t%d\n", (int)value, weight);
		} else {
			printf("%0.2f\t%d\n", value, weight);
		}

		/*
		 * statistics: max, median, min, and total (for avg).
		 */
		if (i == 1) {
			max = value;
		}
		if (i == cnt / 2) {
			median = value;
		}
		if (i == cnt) {
			min = value;
		}
		total += value;
	}

	if (verbose) {
		fprintf(stderr, "total = %0.2f\n", total);
		fprintf(stderr, "min = %0.2f (%0.2f%% of total)\n", 
			min, min / total * 100.0);
		fprintf(stderr, "max = %0.2f (%0.2f%% of total)\n", 
			max, max / total * 100.0);
		fprintf(stderr, "average = %0.2f (%0.2f%% of total)\n", 
			total/(float)cnt, (total/(float)cnt) / total * 100.0);
		fprintf(stderr, "median = %0.2f (%0.2f%% of total)\n", 
			median, median / total * 100.0);
	}

	return 0;
}
Beispiel #8
0
//===== Main program ========================================================
void main(void)
{
  FILE   *fp;                   // File pointer to output file
  char   file_name[256];        // Output file name string
  char   temp_string[256];      // Temporary string variable 
  double alpha;                 // Alpha parameter
  double n;                     // N parameter
  int    num_values;            // Number of values
  int    zipf_rv;               // Zipf random variable
  int    i;                     // Loop counter

  // Output banner
  printf("---------------------------------------- genzipf.c ----- \n");
  printf("-     Program to generate Zipf random variables        - \n");
  printf("-------------------------------------------------------- \n");

  // Prompt for output filename and then create/open the file
  printf("Output file name ===================================> ");
  scanf("%s", file_name);
  fp = fopen(file_name, "w");
  if (fp == NULL)
  {
    printf("ERROR in creating output file (%s) \n", file_name);
    exit(1);
  }

  // Prompt for random number seed and then use it
  printf("Random number seed (greater than 0) ================> ");
  scanf("%s", temp_string);
  rand_val((int) atoi(temp_string));

  // Prompt for alpha value
  printf("Alpha value ========================================> ");
  scanf("%s", temp_string);
  alpha = atof(temp_string);

  // Prompt for N value
  printf("N value ============================================> ");
  scanf("%s", temp_string);
  n = atoi(temp_string);

  // Prompt for number of values to generate
  printf("Number of values to generate =======================> ");
  scanf("%s", temp_string);
  num_values = atoi(temp_string);

  // Output "generating" message
  printf("-------------------------------------------------------- \n");
  printf("-  Generating samples to file                          - \n");
  printf("-------------------------------------------------------- \n");

  // Generate and output zipf random variables
  for (i=0; i<num_values; i++)
  {
    zipf_rv = zipf(alpha, n);
    fprintf(fp, "%d \n", zipf_rv);
  }

  // Output "done" message and close the output file
  printf("-------------------------------------------------------- \n");
  printf("-  Done! \n");
  printf("-------------------------------------------------------- \n");
  fclose(fp);
}
Beispiel #9
0
void ycsb_query::gen_requests(uint64_t thd_id, workload * h_wl) {
#if CC_ALG == HSTORE
	assert(g_virtual_part_cnt == g_part_cnt);
#endif
	int access_cnt = 0;
	set<uint64_t> all_keys;
	part_num = 0;
	double r = 0;
	int64_t rint64 = 0;
	drand48_r(&_query_thd->buffer, &r);
	lrand48_r(&_query_thd->buffer, &rint64);
	if (r < g_perc_multi_part) {
		for (UInt32 i = 0; i < g_part_per_txn; i++) {
			if (i == 0 && FIRST_PART_LOCAL)
				part_to_access[part_num] = thd_id % g_virtual_part_cnt;
			else {
				part_to_access[part_num] = rint64 % g_virtual_part_cnt;
			}
			UInt32 j;
			for (j = 0; j < part_num; j++) 
				if ( part_to_access[part_num] == part_to_access[j] )
					break;
			if (j == part_num)
				part_num ++;
		}
	} else {
		part_num = 1;
		if (FIRST_PART_LOCAL)
			part_to_access[0] = thd_id % g_part_cnt;
		else
			part_to_access[0] = rint64 % g_part_cnt;
	}

	int rid = 0;
	for (UInt32 tmp = 0; tmp < g_req_per_query; tmp ++) {		
		double r;
		drand48_r(&_query_thd->buffer, &r);
		ycsb_request * req = &requests[rid];
		if (r < g_read_perc) {
			req->rtype = RD;
		} else if (r >= g_read_perc && r <= g_write_perc + g_read_perc) {
			req->rtype = WR;
		} else {
			req->rtype = SCAN;
			req->scan_len = SCAN_LEN;
		}

		// the request will access part_id.
		uint64_t ith = tmp * part_num / g_req_per_query;
		uint64_t part_id = 
			part_to_access[ ith ];
		uint64_t table_size = g_synth_table_size / g_virtual_part_cnt;
		uint64_t row_id = zipf(table_size - 1, g_zipf_theta);
		assert(row_id < table_size);
		uint64_t primary_key = row_id * g_virtual_part_cnt + part_id;
		req->key = primary_key;
		int64_t rint64;
		lrand48_r(&_query_thd->buffer, &rint64);
		req->value = rint64 % (1<<8);
		// Make sure a single row is not accessed twice
		if (req->rtype == RD || req->rtype == WR) {
			if (all_keys.find(req->key) == all_keys.end()) {
				all_keys.insert(req->key);
				access_cnt ++;
			} else continue;
		} else {
			bool conflict = false;
			for (UInt32 i = 0; i < req->scan_len; i++) {
				primary_key = (row_id + i) * g_part_cnt + part_id;
				if (all_keys.find( primary_key )
					!= all_keys.end())
					conflict = true;
			}
			if (conflict) continue;
			else {
				for (UInt32 i = 0; i < req->scan_len; i++)
					all_keys.insert( (row_id + i) * g_part_cnt + part_id);
				access_cnt += SCAN_LEN;
			}
		}
		rid ++;
	}
	request_cnt = rid;

	// Sort the requests in key order.
	if (g_key_order) {
		for (int i = request_cnt - 1; i > 0; i--) 
			for (int j = 0; j < i; j ++)
				if (requests[j].key > requests[j + 1].key) {
					ycsb_request tmp = requests[j];
					requests[j] = requests[j + 1];
					requests[j + 1] = tmp;
				}
		for (UInt32 i = 0; i < request_cnt - 1; i++)
			assert(requests[i].key < requests[i + 1].key);
	}

}