int main(int argc, char *argv[]) //int main() { isCreditScheduler = 0; if(argc > 1) if(*argv[1] == '1') isCreditScheduler = 1; isCreditScheduler = 1; uthread_arg_t *uarg; int inx,i; // kthread_block_signal(SIGVTALRM); // kthread_block_signal(SIGUSR1); gtthread_app_init(); uthread_info_init(); // initialises the uthread info that is shared among all uthreads. Used for logging purposes. init_possible_groups(); for(i=0;i<TOTAL_GROUPS;i++) init_matrices(&possible_groups[i]); //int current_matrix_size_index, current_row_index, current_credit_value_index, current_num_thread_per_group_index, current_num_group_index; int size, rows_per_thread, current_row = 0, group_id = 0; for(inx=0; inx<NUM_THREADS; inx++) { size = possible_groups[group_id].matrix_size; rows_per_thread = size / (THREADS_PER_GROUP); uarg = &uargs[inx]; uarg->_A = matrices_A[group_id]; uarg->_B = matrices_B[group_id]; uarg->_C = matrices_C[group_id]; uarg->tid = inx; uarg->gid = group_id; uarg->start_row = current_row; uarg->end_row = current_row + rows_per_thread; uarg->start_col = 0; uarg->end_col = size; int credit = matrices_A[group_id]->matrix_group->credit_value; printf("group_id: %d, current_row: %d, rows_per_thread: %d, size: %d, credits: %d, inx: %d, NUM_THREADS: %d\n", group_id, current_row, rows_per_thread, size, credit, inx, NUM_THREADS); current_row = current_row + rows_per_thread; if(current_row == size) { current_row = 0; group_id++; } //#ifdef GT_GROUP_SPLIT // Wanted to split the columns by groups !!! * //uarg->start_col = (uarg->gid * PER_GROUP_COLS); //#endif // printf("going to uthread_create\n"); uthread_create(&utids[inx], uthread_mulmat, uarg, uarg->gid, credit, possible_groups[group_id].matrix_size); //` printf("exit from uthread_create\n"); } // kthread_unblock_signal(SIGVTALRM); // kthread_unblock_signal(SIGUSR1); FILE *fp, *fp1; fp = fopen("stat.txt","w+"); fp1 = fopen("stat1.txt","w+"); gtthread_app_exit(); print_statistics(fp,fp1); init_group_stats(); print_group_stats(fp,fp1); verify_answer(); fclose(fp); fclose(fp1); // for(i=0;i<TOTAL_GROUPS;i++) // { // int size = possible_groups[i].matrix_size; // printf("Matrix A, i:%d, size: %d, credits: %d\n", i, size, possible_groups[i].credit_value); // print_matrix(matrices_A[i], size); // printf("Matrix B, i:%d, size: %d, credits: %d\n", i, size, possible_groups[i].credit_value); // print_matrix(matrices_B[i], size); // printf("Matrix C, i:%d, size: %d, credits: %d\n", i, size, possible_groups[i].credit_value); // print_matrix(matrices_C[i], size); // } // print_matrix(&C); // fprintf(stderr, "********************************"); return(0); }
void test_transpose(int size, int offset, CL_MAGMA_RT *rt) { int M, N, K; float *A, *B; cl_mem cmB; // OpenCL buffers for M, V, and W unsigned int mem_size_A, mem_size_B; cl_int ciErrNum; // Error code var if (offset%128!=0) { printf ("Error: offset must be a multiple of %d\n", NB); return; } M=N=K=size; printf ("--------------------------------------------------\n"); printf ("testing size %d, %d/%d=%d\n", size, size, 32, size/32); //--------- Allocate and initialize host arrays -----------// { printf ("allocate host arrays..."); fflush (stdout); mem_size_A = M * K * sizeof(float); A = (float*)malloc(mem_size_A); mem_size_B = K * N * sizeof(float); B = (float*)malloc(mem_size_B); if (!A || !B) { printf ("Error: could not allocating host matrices\n"); return; } int j; for (j = 0; j < M*K; j++) A[j] = rand() / (float)RAND_MAX; memcpy (B, A, M*K*sizeof(float)); } //----------- Allocate the OpenCL buffer memory objects ----------------// { printf ("allocate gpu arrays of size %d, %p...", mem_size_B, rt); fflush (stdout); cmB = clCreateBuffer(rt->GetContext(), CL_MEM_READ_WRITE, mem_size_B, NULL, &ciErrNum); if (ciErrNum != CL_SUCCESS) { printf("Error: clCreateBuffer at %d in file %s!\n", __LINE__, __FILE__); return; } printf ("done\n"); } //-------------- write data to GPU device ----------------// { printf ("copy data to GPU..."); fflush (stdout); ciErrNum = clEnqueueWriteBuffer(rt->GetCommandQueue(0), cmB, CL_TRUE, 0, mem_size_B, B, 0, NULL, NULL); if (ciErrNum != CL_SUCCESS) { printf("Error: clEnqueueWriteBuffer at %d in file %s!\n", __LINE__, __FILE__); return; } printf ("done\n"); } int lda=M, ldb=K; offset = 128; int offsetrange = offset*offset; //----------- call cpu sinplace ------------// { printf ("cpu transpose..."); fflush (stdout); magmablas_sinplace_transpose_cpu(A+offsetrange, lda, M-offset); } //----------- call opencl sgemm -------------// printf ("gpu transpose..."); fflush (stdout); magma_sinplace_transpose(cmB, offsetrange, ldb, K-offset, rt->GetCommandQueue(0) ); //----------- check result ---------------// // Read back results and check accumulated errors ciErrNum = clEnqueueReadBuffer(rt->GetCommandQueue(0), cmB, CL_TRUE, 0, mem_size_B, (void*)B, 0, NULL, NULL); if (ciErrNum != CL_SUCCESS) { printf("Error: clEnqueueWriteBuffer at %d in file %s!\n", __LINE__, __FILE__); return; } float resid; verify_answer (M, N, A, B, resid); printf ("resid=%e\n", resid); // clean up if (cmB)clReleaseMemObject(cmB); free(A); free(B); }