shr_dlist shr_dcons(shared void *element, shr_dlist prev, shr_dlist next) { shr_dlist temp = upc_alloc(sizeof(struct shr_dcell)); temp -> element = element; temp -> prev = prev; temp -> next = next; return temp; }
local_shared_block_ptrs shared_2d_array_alloc(int sizex, int sizey, int offsetx, int offsety){ long int alloc_size = sizex * sizey * sizeof(DTYPE); local_shared_block ptr; ptr = upc_alloc(alloc_size); if(ptr == NULL) bail_out("Failing shared allocation of %d bytes", alloc_size); long int line_ptrs_size = sizeof(local_shared_block) * sizey; local_shared_block_ptrs line_ptrs = upc_alloc(line_ptrs_size); if(line_ptrs == NULL) bail_out("Failing shared allocation of %d bytes", line_ptrs_size); for(int y=0; y<sizey; y++){ line_ptrs[y] = ptr + (y * sizex) - offsetx; } line_ptrs -= offsety; return line_ptrs; }
local_shared_block_ptrs shared_2d_array_alloc(int sizex, int sizey, int offsetx, int offsety){ size_t alloc_size = (size_t)sizex * sizey * sizeof(double); local_shared_block ptr; debug("Allocating main array size(%d, %d) offset(%d, %d) %zu", sizex, sizey, offsetx, offsety, alloc_size); ptr = upc_alloc(alloc_size); if(ptr == NULL) die("Failing shared allocation of %d bytes", alloc_size); int line_ptrs_size = sizeof(local_shared_block) * sizey; debug("Allocating ptr array %d", line_ptrs_size); local_shared_block_ptrs line_ptrs = upc_alloc(line_ptrs_size); if(line_ptrs == NULL) die("Failing shared allocation of %d bytes", line_ptrs_size); for(long y=0; y<sizey; y++){ line_ptrs[y] = ptr + (y * sizex) - offsetx; } line_ptrs -= offsety; return line_ptrs; }
void setup_limited_directory(int max_dir_size) { s_time = (shared int *) upc_all_alloc( THREADS, sizeof(int) ); s_read = (shared int *) upc_all_alloc( THREADS, sizeof(int) ); s_write = (shared int *) upc_all_alloc( THREADS, sizeof(int) ); sentinel = (shared strict int *) upc_all_alloc( THREADS, sizeof(int) ); s_directory = (shared sintpt *) upc_all_alloc( THREADS, sizeof(sintpt) ); s_directory[MYTHREAD] = (sintpt) upc_alloc(max_dir_size * sizeof(int)); dir_locks = (shared slockpt *) upc_all_alloc(THREADS, sizeof(slockpt)); dir_locks[MYTHREAD] = upc_global_lock_alloc(); upc_barrier; }
void ws_init(void *func, size_t input_s, size_t output_s) { // store input parameters input_size = input_s; output_size = output_s; handler = func; // allocate the shared private deque collectively task_size = sizeof(uint64_t) + input_s + output_s; block_size = WS_DEQUE_SIZE * task_size; if (MYTHREAD == 0) deque = upc_alloc(block_size * 4); // allocate and initialize the 'empty task' empty_task = malloc(task_size); *(uint64_t*)empty_task = WS_TASK_EMPTY; has_last_hint = false; // allocate shared transfer cells transfer = upc_all_alloc(THREADS, task_size); // get local pointers to shared data owned by this thread deque_p = upc_cast(WS_DEQUE_ELEM(WS_DEQUE_OFFSET)); deque_head_p = upc_cast(WS_DEQUE_ELEM(WS_DEQUE_OFFSET)); transfer_p = upc_cast(WS_TRANS_ELEM(MYTHREAD)); request_p = upc_cast(&request[MYTHREAD]); term_p = upc_cast(&term[MYTHREAD]); // assign initial values *request_p = 0; head = 0; tail = 0; // initialize victim arrays and semaphores init_victim_array(); init_comp_semaphores(); }
int main(int argc, char **argv) { const int matrixDim = atoi(argv[1]); const int blockDim = atoi(argv[2]); const int c_rep = atoi(argv[3]); const int big_blockDim = atoi(argv[4]); const int num_blocks_dim = matrixDim/blockDim; const int num_pes_dim = sqrt(THREADS/c_rep); const int my_num_blocks_dim = num_blocks_dim/num_pes_dim; const int num_big_blocks_dim = matrixDim/big_blockDim; const int num_small_in_big_blk = big_blockDim/blockDim; const int my_num_small_in_big_blk = num_small_in_big_blk/num_pes_dim; const int threads_per_layer = THREADS/c_rep; upccoll_team_t my_row_team, my_column_team, my_peers_team, my_layer_team; int row_rank, column_rank, my_layer_rank, my_layer_id, my_row_rank, my_column_rank, my_peer_rank, layer_rank; /* Create teams per layer */ upccoll_team_split(UPC_TEAM_ALL, MYTHREAD/threads_per_layer, MYTHREAD%threads_per_layer, &my_layer_team); upccoll_team_rank(my_layer_team, &my_layer_rank); /* Create teams for intra-layer broadcasts */ upccoll_team_split(my_layer_team, my_layer_rank/num_pes_dim, my_layer_rank%num_pes_dim, &my_row_team); upccoll_team_rank(my_row_team, &my_row_rank); upccoll_team_split(my_layer_team, my_layer_rank%num_pes_dim, my_layer_rank/num_pes_dim, &my_column_team); upccoll_team_rank(my_column_team, &my_column_rank); /* Create teams for inter-layer broadcasts (peers per layer) */ upccoll_team_split(UPC_TEAM_ALL, MYTHREAD%threads_per_layer, MYTHREAD/threads_per_layer, &my_peers_team); upccoll_team_rank(my_peers_team, &my_peer_rank); int layer_rank = my_layer_rank; double * priv_A; int i_big, width, blocks_to_sub, blocks_to_proceed ; /* Count how many elements each process should allocate, 2 cases: (1) If a process is above diagonal, then it owns (my_num_blocks_dim * (my_num_blocks_dim-1))/2 blocks (2) If a process is on/below diagonal then it owns (my_num_blocks_dim * (my_num_blocks_dim+1))/2 blocks */ received_factorized_block[MYTHREAD] = (mytype_ptr) upc_alloc(chunk_dim * chunk_dim * sizeof(double)); int above_diag = (my_layer_rank/num_pes_dim) < (my_layer_rank%num_pes_dim); if (above_diag) { // case (1) distr_A[MYTHREAD] = (mytype_ptr) upc_alloc(((my_num_blocks_dim * (my_num_blocks_dim-1)) * blockDim * blockDim / 2)* sizeof(double)); priv_A = (double *) distr_A[MYTHREAD]; } else { // case(2) distr_A[MYTHREAD] = (mytype_ptr) upc_alloc(((my_num_blocks_dim * (my_num_blocks_dim+1)) * blockDim * blockDim / 2)* sizeof(double)); priv_A = (double *) distr_A[MYTHREAD]; } /* Create symmetric positive definite (Lehmer) matrix and store lower half */ /* TODO */ /* Replicate input matrix A */ /* TODO */ for (i_big=0; i_big < num_big_blocks_dim; i_big++) { /* Offset private A by the nymber of big blocks we have already factorized */ width = i_big * my_num_small_in_big_blk - 1 + above_diag ; blocks_to_sub = width * (1+width) / 2 ; blocks_to_proceed = my_num_blocks_dim * i_big * my_num_small_in_big_blk - blocks_to_sub; } }