Ejemplo n.º 1
0
int main()
{
	int matrix_width = WIDTH,
	    matrix_size = WIDTH * WIDTH;
	void *params = malloc(sizeof(int) +
		3 * matrix_size * sizeof(float));

	*((int *) params) = matrix_width;
	float *A = (float *) (((char *) params) + sizeof(int));
	float *B = A + matrix_size;
	float *C = B + matrix_size;

	init_mtx(A, matrix_size);
	init_mtx(B, matrix_size);

	printf("Both Matrices A and B look like this:\n");
	print_mtx(A, matrix_width);
	printf("\n");

	optimized_matrix_multiplication(params);

	printf("Result looks like this:\n");
	print_mtx(C, matrix_width);

	free(params);
	return 0;
}
Ejemplo n.º 2
0
main(int ac, char **av)
{
	FILE *fp;
	matrix prox=NULL,prepare_prox_mtx(),first_guess=NULL;
	matrix TMP=NULL,RES=NULL,mds2res();
	int dim ;


	if (ac != 3) {
		printf("USAGE: %s <data_file> <dimenssion> \n",av[0]);
		exit(1);
	}

	fp = fopen(av[1],"r");
	dim = atoi(av[2]);

	prox = prepare_prox_mtx(fp,dim);

	first_guess = generate_first_guess(dim,pnts,50);
	if (dim >1 ) TMP = metric_mds(dim,pnts,first_guess,prox,J_ee,DJ_ee);
	else  TMP = metric_mds(dim,pnts,first_guess,prox,OneJ_ee,DOneJ_ee);

	RES = mds2res(TMP,dim);
	compare_results(RES,prox,dim); 

	free_all_mtx(dim,&TMP,&prox,&first_guess);

        printf("Points coordinates are: \n \n");
        print_mtx(RES);   
	

}
Ejemplo n.º 3
0
int main(int argc, char *argv[])
{
	scif_epd_t epd;
	int bytes_sent, bytes_received, count;

	/* message state related variables */
	int *id, *type, *threads;
	size_t message_size, request, request_size;
	void *message, *params;

	/* do the standard open, bind, connect in SCIF */
	epd = scif_obc();

	/* Create request */
	request_size = sizeof(size_t);
	request = 1;

	printf("= About to send %zd bytes\n", request_size);

	/* Send message */
	bytes_sent = scif_send(epd, &request, request_size, 1);
	printf("= Sent %d bytes\n= Waiting for reply ...\n", bytes_sent);

	/* Receive size of the reply */
	bytes_received = scif_recv(epd, &message_size, sizeof(size_t), 1);
	printf("= Received %d bytes. Expecting a message of size %zu bytes\n",
		bytes_received, message_size);

	/* Receive the actual reply */
	message = malloc(message_size);
	count = scif_recv(epd, message, message_size, 1);
	bytes_received += count;
	printf("= Received %d bytes. Total bytes received: %d bytes\n",
		count, bytes_received);

	/*
	 * Extract the message received:
	 * --------------------------------------------------
	 * | type (int) | threads (int) | id (int) | params |
	 * --------------------------------------------------
	 */
	type = (int *) message;
	threads = type + 1;
	id = threads + 1;
	params = (void *) ((char *) id + sizeof(int));

	printf("= Content size: %zu bytes - Type: %d - Threads: %d - ID: %d\n",
		message_size, *type, *threads, *id);


	/* output results */
	int matrix_width, matrix_size;
	float *result;
	switch(*type) {
	case 1:
		printf("= Sleep duration left: %u\n", *((unsigned int *) params));
		break;
	case 2:
		matrix_width = *((int *) params);
		result = (float *) (((char *) params) + sizeof(int));
		print_mtx(result, matrix_width);
		break;
	case 3: case 4: case 6:
		matrix_width = *((int *) params);
		matrix_size = matrix_width * matrix_width;
		result = (float *) (((char *) params) + sizeof(int)
			+ 2 * matrix_size * sizeof(float));
		print_mtx(result, matrix_width);
		break;
	case 5:
		matrix_width = *((int *) params + 1);
		matrix_size = matrix_width * matrix_width;
		result = (float *) (((char *) params) + 2 * sizeof(int)
			+ 2 * matrix_size * sizeof(float));
		print_mtx(result, matrix_width);
		break;
	default:
		printf("= Dat shit cray!\n");
		break;
	}

	free(message);

	if (scif_close(epd) != 0) {
		fprintf(stderr, "scif_close failed with error %d\n", errno);
		exit(EXIT_FAILURE);
	}
	printf("= scif_close success\n");

	return EXIT_SUCCESS;
}
Ejemplo n.º 4
0
int main(int argc, char *argv[])
{
	int node_id = 0;
	int arrival_lambda = 10;
	int thread_cpu_map[N_THREADS];
	int i,j,k;
	int n_threads;
	int n_left;
	int n_right;
	int next_index_left = 3;
	int next_index_right = 7;
	float local_square = 0.0, remote_square = 0.0;


	/***************** make sure #args is correct and get the n_threads, n_left and n_right */
	if(argc < 4)
	{
		printf("Usage: ./test_numa_comb n_of_threads n_of_threads_on_node0 n_of_threads_on_node1\n");
		exit(-1);
	}
	n_threads = atoi(argv[1]);
	n_left = atoi(argv[2]);
	n_right = atoi(argv[3]);
	/******************* Set the thread_cpu_map according to the n_left and n_right */
	printf("n_threads: %d, n_left: %d, n_right: %d\n",n_threads,n_left,n_right);
	for(i = 0; i < n_left; i++)
	{
		thread_cpu_map[i] = next_index_left;
		next_index_left--;
	}
	for(i = n_left; i < n_threads; i++)
	{
		thread_cpu_map[i] = next_index_right;
		next_index_right--;
	}
	for(i = 0; i < n_threads; i++)
	{
		printf("Thread %d is on cpu %d\n",i,thread_cpu_map[i]);
	}



	thread_params para[n_threads]; //The parameters to pass to the threads

	//printf("The return value of numa_get_run_node_mask(void) is %d\n",numa_get_run_node_mask());
	//printf("The return value of numa_max_node(void) is %d\n",numa_max_node());
	//numa_tonode_memory((void *)spinlock_ptr,sizeof(pthread_spinlock_t),node_id); //This doesn't work

	//initilize the spinlock pointer and put it on a specific node
	pthread_spinlock_t *spinlock_ptr = numa_alloc_onnode(sizeof(pthread_spinlock_t),node_id);

	if(spinlock_ptr == NULL) //error handling of the allocating of a spinlock pointer on a specific node
	{
		printf("alloc of spinlock on a node failed.\n");
		exit(-1);
	}

	/* initialise  syncs */
	pthread_barrier_init(&fin_barrier, NULL, n_threads);
	pthread_spin_init(spinlock_ptr,0);
	int rc;
	//create the threads
	for(i = 0; i < n_threads; i++)
	{

		para[i].thread_id = i;
		para[i].arrival_lambda = arrival_lambda;
		para[i].spinlock_ptr = spinlock_ptr;
		CPU_ZERO(&cpuset[i]);
		CPU_SET(thread_cpu_map[i],&cpuset[i]);
		rc = pthread_create(&threads[i],NULL,work,(void*)&para[i]);
		E (rc);


	}
	start_work_flag = 1; 

	/* wait here */
	for(i = 0; i < n_threads; i++)
	    pthread_join(threads[i],NULL);


	pthread_barrier_destroy(&fin_barrier);

	/*
	for(i = 0; i < n_threads; i++)
	{
		printf("The time to get one lock for thread %d is : %.9f\n",i,time_in_cs[i]/num_access_each_thread[i]);
		printf("The number of lock accesses for thread %d is : %d\n",i,num_access_each_thread[i]);
	}
	*/

	qsort((void*)g_tss,(size_t)access_count,(size_t)sizeof(timestamp),cmp_timestamp);
	/*
	for (i = 0; i < access_count; i++)
		printf("%lu with id %d\n",g_tss[i].ts,g_tss[i].id);
	*/

	/* for (i = 0; i < access_count; i++)
	 * {
	 *     printf ("%lu %d\n", g_tss[i].ts, g_tss[i].id);
	 * } */

	/* */
	
	int cs_order[access_count/2];
	for(i = 0; i < access_count/2; i++)
	{
		cs_order[i] = g_tss[i*2].id;
		//printf("%d in cs\n",cs_order[i]);
	}
	int cs_matrix[n_threads][n_threads];
	uint64_t delay_matrix[n_threads][n_threads];
	float prob_matrix[n_threads][n_threads];
	float rate_matrix[n_threads][n_threads];

	// zero out all the matrices
	memset(&cs_matrix, '\0', n_threads*n_threads*sizeof(int));
	memset(&delay_matrix, '\0', n_threads*n_threads*sizeof(uint64_t));
	memset(&prob_matrix, '\0', n_threads*n_threads*sizeof(float));


	int local_count2 = 0, remote_count2 = 0;
	uint64_t diff;
	for(i = 0; i < n_threads; i++)
	    for(j = 0; j < n_threads; j++)
		for(k = 0; k < access_count/2 -1 ; k++)
		{
		    if(cs_order[k] == i && cs_order[k+1] == j)
		    {
			cs_matrix[i][j]++;
			diff = g_tss[2*k+2].ts - g_tss[2*k+1].ts; 
			delay_matrix[i][j] += diff;
			if(is_on_same_node(i, j, n_threads, n_left, n_right))
			{
			    dprintf("local_delay: %lu\n", diff);
			    local_square += sqr(diff);
			    local_count2++;
			}
			else
			{
			    dprintf("remote_delay: %lu\n", diff);
			    remote_square += sqr(diff);
			    remote_count2++;
			}
		    }
		}

	int num_access[n_threads];
	for(i = 0; i < access_count/2 -1; i++)
	    for(j = 0; j < n_threads; j++)
	    {
		if (cs_order[i] == j) num_access[j]++;
	    }

	for(i = 0; i < n_threads; i++)
		printf("num_access[%d]:%d\n",i,num_access[i]);

	for(i = 0; i < n_threads; i++)
		for(j = 0; j < n_threads ; j++)
		{
			prob_matrix[i][j] = (float)cs_matrix[i][j]/(float)num_access[i];
			rate_matrix[i][j] = 1.0/((delay_matrix[i][j]/(float)cs_matrix[i][j])/CPU_FREQ);
		}


	printf ("\n***************** PROBS *******************\n");
	printf ("Lock is on LP, [L, R] is [%d, %d]:\n", n_left - 1, n_right);
	// tl
	printf ("L -> L\n");
	print_mtx (n_threads, n_threads, prob_matrix,
			   0, 0, n_left, n_left, 0);
    // tr
	printf ("L -> R\n");
	print_mtx (n_threads, n_threads, prob_matrix,
			   n_left, 0, n_threads, n_left, 0);

	printf ("Lock is on RP, [L, R] is [%d, %d]:\n", n_left, n_right - 1);
	// br
	printf ("R -> R\n");
	print_mtx (n_threads, n_threads, prob_matrix,
			   n_left, n_left, n_threads, n_threads, 0);
	// bl
	printf ("R -> L\n");
	print_mtx (n_threads, n_threads, prob_matrix,
			   0, n_left, n_left, n_threads, 0);
	

	printf ("\n***************** RATES *******************\n");

	printf ("Lock is on LP, [L, R] is [%d, %d]:\n", n_left - 1, n_right);
	// tl
	printf ("L -> L\n");
	print_mtx (n_threads, n_threads, rate_matrix,
			   0, 0, n_left, n_left, 1);
    // tr
	printf ("L -> R\n");
	print_mtx (n_threads, n_threads, rate_matrix,
			   n_left, 0, n_threads, n_left, 1);

	printf ("Lock is on RP, [L, R] is [%d, %d]:\n", n_left, n_right - 1);
	// br
	printf ("R -> R\n");
	print_mtx (n_threads, n_threads, rate_matrix,
			   n_left, n_left, n_threads, n_threads, 1);
	// bl
	printf ("R -> \n");
	print_mtx (n_threads, n_threads, rate_matrix,
			   0, n_left, n_left, n_threads, 1);




	//print the intra-core and inter-core delay
	//thread 0 - n_left -1 are on the left core, n_left to n_threads are on the right core
	uint64_t local_delay = 0, remote_delay = 0;
	int local_count = 0, remote_count = 0;
	float local_prob = 0.0, remote_prob = 0.0;

	for(i = 0; i < n_threads; i++)
	    for(j = 0; j < n_threads; j++)
	    {
			if (j == i)
				continue;
			if(is_on_same_node(i, j, n_threads, n_left, n_right))
			{
				//printf("%d and %d on the same node\n",i,j);
				local_delay += delay_matrix[i][j];
				local_count += cs_matrix[i][j];
				local_prob += prob_matrix[j][i];
			}
			else
			{
				//printf("%d and %d not the same node\n",i,j);
				remote_delay += delay_matrix[i][j];
				remote_count += cs_matrix[i][j];
				remote_prob += prob_matrix[j][i];
			}
	    }


	float local = (float)local_delay/(local_count);
	float remote = (float)remote_delay/(remote_count);

	printf("\n\n**************************** Aggregates ***************************\n");
	printf("local delay: %f, remote_delay: %f, local_count: %d, remote_count: %d\n",(float)local_delay/(local_count),(float)remote_delay/(remote_count),local_count,remote_count);
	printf("local prob:%f, remote prob: %f\n",local_prob/n_threads, remote_prob/n_threads);
	printf("local delay variance:%f, remote delay variance: %f\n",local_square/local_count - local*local, remote_square/remote_count - remote*remote);
	printf("local count2: %d, remote_count2:%d\n",local_count2, remote_count2);
	pthread_spin_destroy(spinlock_ptr);
	numa_free((void *)spinlock_ptr,sizeof(pthread_spinlock_t));
	pthread_exit(NULL);
	return 0;
}