Beispiel #1
0
int main(int argc, char* argv[])
{
  std::ifstream in;

  openFile(argv[1],in);

  auto us = readFile<std::unordered_set<std::string>>(in);

  std::cout << "Read " << us.size() << " words from " << argv[1] << ".\n\n";
  std::cout << "Hashtable load factor is: " << us.load_factor() << ".\n";

  auto bc = us.bucket_count();
  std::cout << "Bucket count is: " << us.bucket_count() << ".\n";
  for (int b = 0; b < bc; ++b)
  {
    if (us.bucket_size(b)) 
    {
      std::cout << "Bucket " << b << " contains " << us.bucket_size(b) << " items.\n";
      if (us.bucket_size(b) > 1)
      {
        std::copy(us.cbegin(b),us.cend(b), std::ostream_iterator<std::string>(std::cout," "));
        std::cout << std::endl;
      }
    }
  }
}
Beispiel #2
0
  /* Calculates approximation of the percentile of the original distribution
   * The quality of the value depends on how much information was lost when creating the histogram
   * Inside a bucket, we use linear interpolation
   */
  double percentile( double p )
  {
    assert( p >= 0.0 && p <= 1.0 && "p must be within [0.0 1.0]" );
    if ( !num_entries() )
      return 0.0;

    size_t target = static_cast<size_t>( p * num_entries() );

    // Performance Optimization: We assume a roughly balanced distribution,
    // so for p <= 0.5 we start from min counting upwards, otherwise from max counting downwards
    if ( p <= 0.5 )
    {
      size_t count = 0;
      for ( size_t i = 0, size = data().size(); i < size; ++i )
      {
        count += data()[ i ];
        if ( count >= target )
        {
          // We reached the target bucket.

          // Calculate linear interpolation x
          double x = data()[ i ] ? ( count - target ) / data()[ i ] : 0.0;
          assert( x >= 0.0 && x <= 1.0 );

          // Return result
          return _min + ( i + x ) * bucket_size();
        }
      }
    }
    else
    {
      size_t count = num_entries();
      for ( int i = static_cast< int >( data().size() ) - 1; i >= 0; --i )
      {
        count -= data()[ i ];
        if ( count <= target )
        {
          // We reached the target bucket.

          // Calculate linear interpolation x
          double x = data()[ i ] ? ( target - count ) / data()[ i ] : 0.0;
          assert( x >= 0.0 && x <= 1.0 );

          // Return result
          return _max - ( i - x ) * bucket_size();
        }
      }
    }

    assert( false ); return 0.0;
  }
Beispiel #3
0
/* implement the 'g' graphing command
*/
void
do_graph1(csv_t *D, int col) {
	/* fix column number to match array indexing */
	int array_col=col-1;

	row_buckets_t graph_buckets;
	int graph_values[GRAPHROWS] = {0};
	
	/* determine the min and max of the column */
	graph_buckets.min = find_min(D, array_col);
	graph_buckets.max = find_max(D, array_col);

	/* use the min and max to compute the size of the buckets */
	graph_buckets.bucket_step_size = bucket_size(graph_buckets.max, 
		                                graph_buckets.min, GRAPHROWS);


	/* fill an array of buckets, where the value of each index is the lower 
	   end of the bucket range */
	row_bucket_values(&graph_buckets);

	/* fill an array determining how many values are in each bucket */
	fill_buckets(&graph_buckets, D, array_col, graph_values);

	/* print the graph of bucket quantities per bucket value */
	print_bucket_graph(&graph_buckets, D->labs[col-1], graph_values);
}
/*
 * Rebuilds array.
 */
static void rebuild_array(struct bucket **done, int *array)
{
	int j;    /* array[] offset. */
	int i, k; /* Loop index.     */
	
	#define BUCKETS_PER_CORE (NUM_BUCKETS/NUM_IO_CORES)
	
	/* Spawn threads. */
	j = 0;
	for (i = 0; i < NUM_IO_CORES; i++)
	{
		tdata[i].args.i0 = i*BUCKETS_PER_CORE;
		tdata[i].args.in = (i + 1)*BUCKETS_PER_CORE;
		tdata[i].args.done = done;
		tdata[i].args.array = array;
		pthread_create(&tdata[i].tid, NULL, thread_main, (void *)&tdata[i]);
		
		for (k = i*BUCKETS_PER_CORE; k < (i + 1)*BUCKETS_PER_CORE; k++)
			j += bucket_size(done[k]);
	}
	
	/* Join threads. */
	for (i = 0; i < NUM_IO_CORES; i++)
		pthread_join(tdata[i].tid, NULL);
}
Beispiel #5
0
/* implement the 'p' plot command to generate
   a 2d graph showing correlation between two columns
*/
void   
do_graph2(csv_t *D, int col1, int col2) {
	/* fix columns to match array indexing */
	int array_col1 = col1 - 1;
	int array_col2 = col2 - 1;

	row_buckets_t vert_buckets;
	col_buckets_t horiz_buckets;

	/* determine the min and max of the columns */
	vert_buckets.min = find_min(D, array_col1);
	vert_buckets.max = find_max(D, array_col1);
	horiz_buckets.min = find_min(D, array_col2);
	horiz_buckets.max = find_max(D, array_col2);

	/* use the min and max to compute the size of the buckets */
	vert_buckets.bucket_step_size = bucket_size(vert_buckets.max, 
		                vert_buckets.min, GRAPHROWS);
	horiz_buckets.bucket_step_size = bucket_size(horiz_buckets.max, 
		                horiz_buckets.min, GRAPHCOLS);

	/* fill an array of buckets, where the value of each index is the 
	   lower end of the bucket range */
	row_bucket_values(&vert_buckets);
	col_bucket_values(&horiz_buckets);
	

	/* fill 2D array with data points in correct bucketed values */
	int plot_quantities[GRAPHROWS][GRAPHCOLS] = {{0}};
	fill_plot_array(&vert_buckets, &horiz_buckets, D, array_col1, 
		                array_col2, plot_quantities);

	/* print 2D plot */
	print_2d_plot(D, &vert_buckets, array_col1, array_col2,plot_quantities);

	
	
	return;
}
/*
 * Thread's main.
 */
static void *thread_main(void *args)
{
	int i, j;        /* Loop indexes.  */
	struct tdata *t; /* Thread's data. */
		
	t = args;
	
	/* Rebuild array. */
	j = t->args.j0;
	for (i = t->args.i0; i < t->args.in; i++)
	{
		bucket_merge(t->args.done[i], &t->args.array[j]);
		j += bucket_size(t->args.done[i]);
	}
	
	pthread_exit(NULL);
	return (NULL);
}
Beispiel #7
0
bucket_t* bucket_split(bucket_t* bucket1, bool (*split_funct)(void* data, unsigned int key), void* data)
{
	assert(bucket1 != NULL && split_funct != NULL);
	
	unsigned int i;
	unsigned int curr_node_size;
	bool error = false;
	
	bucket_t* bucket2 = NULL;
	bucket_t** dst_bucket_ptr = NULL;
	
	bucket_t* src_bucket = NULL;
	bucket_node_t* src_bucket_node = NULL;
	
	/* Allocate memory for the new bucket */
	
	if(!error && (bucket2 = bucket_init(bucket1->node_capacity)) == NULL)
		error = true;
	
	/* Make a copy of the old bucket's head */
	
	if((src_bucket = bucket_init(bucket1->node_capacity)) == NULL)
		error = true;
	
	if(!error)
	{
		/* Move internal data from the old bucket' head to the new
		   bucket head. No deep copy is done, the chain is moved too. */
		
		*src_bucket = *bucket1;
		
		/* Reset the given bucket's head */
		
		*bucket1 = *bucket2;
		
		/* Initialize src_bucket_node */
		
		src_bucket_node = src_bucket->chain;
	}
	
	/* For every bucket_node in the chain... */
	
	while(!error && src_bucket_node != NULL)
	{
		/* Get the current bucket_node's size */
		
		curr_node_size = bucket_node_size(src_bucket, src_bucket_node);
		
		/* For every entry in the bucket_node... */
		
		for(i=0; !error && i < curr_node_size; i++)
		{
			/* Call split_funct to get the target bucket */
			
			if(!(*split_funct)(data, src_bucket_node->key[i]))
				dst_bucket_ptr = &bucket1;
			else
				dst_bucket_ptr = &bucket2;
			
			/* Append to the appropriate new bucket (entries are already sorted) */
			
			if(!bucket_node_insert(*dst_bucket_ptr, bucket_size(*dst_bucket_ptr), src_bucket_node->key[i], src_bucket_node->value[i]))
				error = true;
		}
		
		src_bucket_node = src_bucket_node->overflow;
	}
	
	if(error)
	{
		/* Restore the old bucket in its original condition */
		
		*bucket1 = *src_bucket;
		
		/* Free allocated memory */
		
		bucket_destroy(&bucket2);
		
		/* Prevent old bucket chain from being destroyed */
		
		src_bucket->chain = NULL; 
	}
	
	/* Destroy the old bucket */
	
	bucket_destroy(&src_bucket);
	
	return bucket2;
}
/*
 * Bucket-sort algorithm.
 */
extern void bucketsort(int *array, int n)
{
	int max;                  /* Maximum number.      */
	int i, j;                 /* Loop indexes.        */
	int range;                /* Bucket range.        */
	struct minibucket *minib; /* Working mini-bucket. */
	struct message *msg;      /* Working message.     */
	struct bucket **todo;     /* Todo buckets.        */
	struct bucket **done;     /* Done buckets.        */
	uint64_t start, end;      /* Timers.              */
	
	/* Setup slaves. */
	open_noc_connectors();
	spawn_slaves();
	sync_slaves();
	
	todo = smalloc(NUM_BUCKETS*sizeof(struct bucket *));
	done = smalloc(NUM_BUCKETS*sizeof(struct bucket *));
	for (i = 0; i < NUM_BUCKETS; i++)
	{
		done[i] = bucket_create();
		todo[i] = bucket_create();
	}

	/* Find max number in the array. */
	start = timer_get();
	max = INT_MIN;
	for (i = 0; i < n; i++)
	{
		/* Found. */
		if (array[i] > max)
			max = array[i];
	}

	/* Distribute numbers. */
	range = max/NUM_BUCKETS;
	for (i = 0; i < n; i++)
	{
		j = array[i]/range;
		if (j >= NUM_BUCKETS)
			j = NUM_BUCKETS - 1;
		
		bucket_insert(&todo[j], array[i]);
	}
	end = timer_get();
	master += timer_diff(start, end);

	/* Sort buckets. */
	j = 0;
	for (i = 0; i < NUM_BUCKETS; i++)
	{	
		while (bucket_size(todo[i]) > 0)
		{
			minib = bucket_pop(todo[i]);
			
			/* Send message. */
			msg = message_create(SORTWORK, i, minib->size);
			message_send(outfd[j], msg);
			message_destroy(msg);
			
			/* Send data. */
			communication += 
				data_send(outfd[j], minib->elements, minib->size*sizeof(int));
			minibucket_destroy(minib);
			
			j++;
			
			/* 
			 * Slave processes are busy.
			 * So let's wait for results.
			 */
			if (j == nclusters)
			{	
				/* Receive results. */
				for (/* NOOP */ ; j > 0; j--)
				{					
					/* Receive message. */
					msg = message_receive(infd[nclusters - j]);
					
					/* Receive mini-bucket. */
					minib = minibucket_create();
					minib->size = msg->u.sortresult.size;
					communication += data_receive(infd[nclusters -j], minib->elements, 
													minib->size*sizeof(int));
					
					bucket_push(done[msg->u.sortresult.id], minib);
					
					message_destroy(msg);
				}
			}
		}
	}

	/* Receive results. */
	for (/* NOOP */ ; j > 0; j--)
	{						
		/* Receive message. */
		msg = message_receive(infd[j - 1]);
					
		/* Receive bucket. */
		minib = minibucket_create();
		minib->size = msg->u.sortresult.size;
		communication += 
			data_receive(infd[j - 1], minib->elements, minib->size*sizeof(int));
					
		bucket_push(done[msg->u.sortresult.id], minib);
					
		message_destroy(msg);
	}

	start = timer_get();
	rebuild_array(done, array);
	end = timer_get();
	master += timer_diff(start, end);
	
	/* House keeping. */
	for (i = 0; i < NUM_BUCKETS; i++)
	{
		bucket_destroy(todo[i]);
		bucket_destroy(done[i]);
	}
	free(done);
	free(todo);
	join_slaves();
	close_noc_connectors();
}