int main() { int matrix_width = WIDTH, matrix_size = WIDTH * WIDTH; void *params = malloc(sizeof(int) + 3 * matrix_size * sizeof(float)); *((int *) params) = matrix_width; float *A = (float *) (((char *) params) + sizeof(int)); float *B = A + matrix_size; float *C = B + matrix_size; init_mtx(A, matrix_size); init_mtx(B, matrix_size); printf("Both Matrices A and B look like this:\n"); print_mtx(A, matrix_width); printf("\n"); optimized_matrix_multiplication(params); printf("Result looks like this:\n"); print_mtx(C, matrix_width); free(params); return 0; }
main(int ac, char **av) { FILE *fp; matrix prox=NULL,prepare_prox_mtx(),first_guess=NULL; matrix TMP=NULL,RES=NULL,mds2res(); int dim ; if (ac != 3) { printf("USAGE: %s <data_file> <dimenssion> \n",av[0]); exit(1); } fp = fopen(av[1],"r"); dim = atoi(av[2]); prox = prepare_prox_mtx(fp,dim); first_guess = generate_first_guess(dim,pnts,50); if (dim >1 ) TMP = metric_mds(dim,pnts,first_guess,prox,J_ee,DJ_ee); else TMP = metric_mds(dim,pnts,first_guess,prox,OneJ_ee,DOneJ_ee); RES = mds2res(TMP,dim); compare_results(RES,prox,dim); free_all_mtx(dim,&TMP,&prox,&first_guess); printf("Points coordinates are: \n \n"); print_mtx(RES); }
int main(int argc, char *argv[]) { scif_epd_t epd; int bytes_sent, bytes_received, count; /* message state related variables */ int *id, *type, *threads; size_t message_size, request, request_size; void *message, *params; /* do the standard open, bind, connect in SCIF */ epd = scif_obc(); /* Create request */ request_size = sizeof(size_t); request = 1; printf("= About to send %zd bytes\n", request_size); /* Send message */ bytes_sent = scif_send(epd, &request, request_size, 1); printf("= Sent %d bytes\n= Waiting for reply ...\n", bytes_sent); /* Receive size of the reply */ bytes_received = scif_recv(epd, &message_size, sizeof(size_t), 1); printf("= Received %d bytes. Expecting a message of size %zu bytes\n", bytes_received, message_size); /* Receive the actual reply */ message = malloc(message_size); count = scif_recv(epd, message, message_size, 1); bytes_received += count; printf("= Received %d bytes. Total bytes received: %d bytes\n", count, bytes_received); /* * Extract the message received: * -------------------------------------------------- * | type (int) | threads (int) | id (int) | params | * -------------------------------------------------- */ type = (int *) message; threads = type + 1; id = threads + 1; params = (void *) ((char *) id + sizeof(int)); printf("= Content size: %zu bytes - Type: %d - Threads: %d - ID: %d\n", message_size, *type, *threads, *id); /* output results */ int matrix_width, matrix_size; float *result; switch(*type) { case 1: printf("= Sleep duration left: %u\n", *((unsigned int *) params)); break; case 2: matrix_width = *((int *) params); result = (float *) (((char *) params) + sizeof(int)); print_mtx(result, matrix_width); break; case 3: case 4: case 6: matrix_width = *((int *) params); matrix_size = matrix_width * matrix_width; result = (float *) (((char *) params) + sizeof(int) + 2 * matrix_size * sizeof(float)); print_mtx(result, matrix_width); break; case 5: matrix_width = *((int *) params + 1); matrix_size = matrix_width * matrix_width; result = (float *) (((char *) params) + 2 * sizeof(int) + 2 * matrix_size * sizeof(float)); print_mtx(result, matrix_width); break; default: printf("= Dat shit cray!\n"); break; } free(message); if (scif_close(epd) != 0) { fprintf(stderr, "scif_close failed with error %d\n", errno); exit(EXIT_FAILURE); } printf("= scif_close success\n"); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int node_id = 0; int arrival_lambda = 10; int thread_cpu_map[N_THREADS]; int i,j,k; int n_threads; int n_left; int n_right; int next_index_left = 3; int next_index_right = 7; float local_square = 0.0, remote_square = 0.0; /***************** make sure #args is correct and get the n_threads, n_left and n_right */ if(argc < 4) { printf("Usage: ./test_numa_comb n_of_threads n_of_threads_on_node0 n_of_threads_on_node1\n"); exit(-1); } n_threads = atoi(argv[1]); n_left = atoi(argv[2]); n_right = atoi(argv[3]); /******************* Set the thread_cpu_map according to the n_left and n_right */ printf("n_threads: %d, n_left: %d, n_right: %d\n",n_threads,n_left,n_right); for(i = 0; i < n_left; i++) { thread_cpu_map[i] = next_index_left; next_index_left--; } for(i = n_left; i < n_threads; i++) { thread_cpu_map[i] = next_index_right; next_index_right--; } for(i = 0; i < n_threads; i++) { printf("Thread %d is on cpu %d\n",i,thread_cpu_map[i]); } thread_params para[n_threads]; //The parameters to pass to the threads //printf("The return value of numa_get_run_node_mask(void) is %d\n",numa_get_run_node_mask()); //printf("The return value of numa_max_node(void) is %d\n",numa_max_node()); //numa_tonode_memory((void *)spinlock_ptr,sizeof(pthread_spinlock_t),node_id); //This doesn't work //initilize the spinlock pointer and put it on a specific node pthread_spinlock_t *spinlock_ptr = numa_alloc_onnode(sizeof(pthread_spinlock_t),node_id); if(spinlock_ptr == NULL) //error handling of the allocating of a spinlock pointer on a specific node { printf("alloc of spinlock on a node failed.\n"); exit(-1); } /* initialise syncs */ pthread_barrier_init(&fin_barrier, NULL, n_threads); pthread_spin_init(spinlock_ptr,0); int rc; //create the threads for(i = 0; i < n_threads; i++) { para[i].thread_id = i; para[i].arrival_lambda = arrival_lambda; para[i].spinlock_ptr = spinlock_ptr; CPU_ZERO(&cpuset[i]); CPU_SET(thread_cpu_map[i],&cpuset[i]); rc = pthread_create(&threads[i],NULL,work,(void*)¶[i]); E (rc); } start_work_flag = 1; /* wait here */ for(i = 0; i < n_threads; i++) pthread_join(threads[i],NULL); pthread_barrier_destroy(&fin_barrier); /* for(i = 0; i < n_threads; i++) { printf("The time to get one lock for thread %d is : %.9f\n",i,time_in_cs[i]/num_access_each_thread[i]); printf("The number of lock accesses for thread %d is : %d\n",i,num_access_each_thread[i]); } */ qsort((void*)g_tss,(size_t)access_count,(size_t)sizeof(timestamp),cmp_timestamp); /* for (i = 0; i < access_count; i++) printf("%lu with id %d\n",g_tss[i].ts,g_tss[i].id); */ /* for (i = 0; i < access_count; i++) * { * printf ("%lu %d\n", g_tss[i].ts, g_tss[i].id); * } */ /* */ int cs_order[access_count/2]; for(i = 0; i < access_count/2; i++) { cs_order[i] = g_tss[i*2].id; //printf("%d in cs\n",cs_order[i]); } int cs_matrix[n_threads][n_threads]; uint64_t delay_matrix[n_threads][n_threads]; float prob_matrix[n_threads][n_threads]; float rate_matrix[n_threads][n_threads]; // zero out all the matrices memset(&cs_matrix, '\0', n_threads*n_threads*sizeof(int)); memset(&delay_matrix, '\0', n_threads*n_threads*sizeof(uint64_t)); memset(&prob_matrix, '\0', n_threads*n_threads*sizeof(float)); int local_count2 = 0, remote_count2 = 0; uint64_t diff; for(i = 0; i < n_threads; i++) for(j = 0; j < n_threads; j++) for(k = 0; k < access_count/2 -1 ; k++) { if(cs_order[k] == i && cs_order[k+1] == j) { cs_matrix[i][j]++; diff = g_tss[2*k+2].ts - g_tss[2*k+1].ts; delay_matrix[i][j] += diff; if(is_on_same_node(i, j, n_threads, n_left, n_right)) { dprintf("local_delay: %lu\n", diff); local_square += sqr(diff); local_count2++; } else { dprintf("remote_delay: %lu\n", diff); remote_square += sqr(diff); remote_count2++; } } } int num_access[n_threads]; for(i = 0; i < access_count/2 -1; i++) for(j = 0; j < n_threads; j++) { if (cs_order[i] == j) num_access[j]++; } for(i = 0; i < n_threads; i++) printf("num_access[%d]:%d\n",i,num_access[i]); for(i = 0; i < n_threads; i++) for(j = 0; j < n_threads ; j++) { prob_matrix[i][j] = (float)cs_matrix[i][j]/(float)num_access[i]; rate_matrix[i][j] = 1.0/((delay_matrix[i][j]/(float)cs_matrix[i][j])/CPU_FREQ); } printf ("\n***************** PROBS *******************\n"); printf ("Lock is on LP, [L, R] is [%d, %d]:\n", n_left - 1, n_right); // tl printf ("L -> L\n"); print_mtx (n_threads, n_threads, prob_matrix, 0, 0, n_left, n_left, 0); // tr printf ("L -> R\n"); print_mtx (n_threads, n_threads, prob_matrix, n_left, 0, n_threads, n_left, 0); printf ("Lock is on RP, [L, R] is [%d, %d]:\n", n_left, n_right - 1); // br printf ("R -> R\n"); print_mtx (n_threads, n_threads, prob_matrix, n_left, n_left, n_threads, n_threads, 0); // bl printf ("R -> L\n"); print_mtx (n_threads, n_threads, prob_matrix, 0, n_left, n_left, n_threads, 0); printf ("\n***************** RATES *******************\n"); printf ("Lock is on LP, [L, R] is [%d, %d]:\n", n_left - 1, n_right); // tl printf ("L -> L\n"); print_mtx (n_threads, n_threads, rate_matrix, 0, 0, n_left, n_left, 1); // tr printf ("L -> R\n"); print_mtx (n_threads, n_threads, rate_matrix, n_left, 0, n_threads, n_left, 1); printf ("Lock is on RP, [L, R] is [%d, %d]:\n", n_left, n_right - 1); // br printf ("R -> R\n"); print_mtx (n_threads, n_threads, rate_matrix, n_left, n_left, n_threads, n_threads, 1); // bl printf ("R -> \n"); print_mtx (n_threads, n_threads, rate_matrix, 0, n_left, n_left, n_threads, 1); //print the intra-core and inter-core delay //thread 0 - n_left -1 are on the left core, n_left to n_threads are on the right core uint64_t local_delay = 0, remote_delay = 0; int local_count = 0, remote_count = 0; float local_prob = 0.0, remote_prob = 0.0; for(i = 0; i < n_threads; i++) for(j = 0; j < n_threads; j++) { if (j == i) continue; if(is_on_same_node(i, j, n_threads, n_left, n_right)) { //printf("%d and %d on the same node\n",i,j); local_delay += delay_matrix[i][j]; local_count += cs_matrix[i][j]; local_prob += prob_matrix[j][i]; } else { //printf("%d and %d not the same node\n",i,j); remote_delay += delay_matrix[i][j]; remote_count += cs_matrix[i][j]; remote_prob += prob_matrix[j][i]; } } float local = (float)local_delay/(local_count); float remote = (float)remote_delay/(remote_count); printf("\n\n**************************** Aggregates ***************************\n"); printf("local delay: %f, remote_delay: %f, local_count: %d, remote_count: %d\n",(float)local_delay/(local_count),(float)remote_delay/(remote_count),local_count,remote_count); printf("local prob:%f, remote prob: %f\n",local_prob/n_threads, remote_prob/n_threads); printf("local delay variance:%f, remote delay variance: %f\n",local_square/local_count - local*local, remote_square/remote_count - remote*remote); printf("local count2: %d, remote_count2:%d\n",local_count2, remote_count2); pthread_spin_destroy(spinlock_ptr); numa_free((void *)spinlock_ptr,sizeof(pthread_spinlock_t)); pthread_exit(NULL); return 0; }