Example #1
0
int main(int argc, char *argv[])
//int main()
{
	isCreditScheduler = 0;
	if(argc > 1)
		if(*argv[1] == '1')
			isCreditScheduler = 1;	
	
	isCreditScheduler = 1;
	uthread_arg_t *uarg;
	int inx,i;
	
//	kthread_block_signal(SIGVTALRM);
//	kthread_block_signal(SIGUSR1);
	gtthread_app_init();
	uthread_info_init(); // initialises the uthread info that is shared among all uthreads. Used for logging purposes.

	init_possible_groups();
	for(i=0;i<TOTAL_GROUPS;i++)
		init_matrices(&possible_groups[i]);

	//int current_matrix_size_index, current_row_index, current_credit_value_index, current_num_thread_per_group_index, current_num_group_index;
	int size, rows_per_thread, current_row = 0, group_id = 0;
	for(inx=0; inx<NUM_THREADS; inx++)
	{
		size = possible_groups[group_id].matrix_size;
		rows_per_thread = size / (THREADS_PER_GROUP);
		
		uarg = &uargs[inx];
		uarg->_A = matrices_A[group_id];
		uarg->_B = matrices_B[group_id];
		uarg->_C = matrices_C[group_id];

		uarg->tid = inx;
		uarg->gid = group_id;
		uarg->start_row = current_row;
		uarg->end_row = current_row + rows_per_thread;
		uarg->start_col = 0;
		uarg->end_col = size;
		int credit = matrices_A[group_id]->matrix_group->credit_value;

		printf("group_id: %d, current_row: %d, rows_per_thread: %d, size: %d, credits: %d, inx: %d, NUM_THREADS: %d\n", 
			group_id, current_row, rows_per_thread, size,  credit, inx, NUM_THREADS);

		current_row = current_row + rows_per_thread;
		if(current_row == size)
		{
			current_row = 0;
			group_id++;
		}
//#ifdef GT_GROUP_SPLIT
		// Wanted to split the columns by groups !!! *
		//uarg->start_col = (uarg->gid * PER_GROUP_COLS);
//#endif
	//	printf("going to uthread_create\n");
		uthread_create(&utids[inx], uthread_mulmat, uarg, uarg->gid, credit, possible_groups[group_id].matrix_size);
	//`	printf("exit from uthread_create\n");
	}
	
//	kthread_unblock_signal(SIGVTALRM);
//	kthread_unblock_signal(SIGUSR1);
	FILE *fp, *fp1;
	fp = fopen("stat.txt","w+");
	fp1 = fopen("stat1.txt","w+");
	gtthread_app_exit();
	print_statistics(fp,fp1);
	init_group_stats();
	print_group_stats(fp,fp1);
	verify_answer();
	fclose(fp);
	fclose(fp1);

//	for(i=0;i<TOTAL_GROUPS;i++)
//	{
//		int size = possible_groups[i].matrix_size;
//		printf("Matrix A, i:%d, size: %d, credits: %d\n", i, size, possible_groups[i].credit_value);
//		print_matrix(matrices_A[i], size);
//		printf("Matrix B, i:%d, size: %d, credits: %d\n", i, size, possible_groups[i].credit_value);
//		print_matrix(matrices_B[i], size);
//		printf("Matrix C, i:%d, size: %d, credits: %d\n", i, size, possible_groups[i].credit_value);
//		print_matrix(matrices_C[i], size);
//	}

	// print_matrix(&C);
	// fprintf(stderr, "********************************");
	return(0);
}
Example #2
0
void test_transpose(int size, int offset, CL_MAGMA_RT *rt)
{
    int M, N, K;
    float *A, *B;
    cl_mem cmB;        // OpenCL buffers for M, V, and W
    unsigned int mem_size_A, mem_size_B;
    cl_int ciErrNum;                // Error code var
    
    if (offset%128!=0)
    {
        printf ("Error: offset must be a multiple of %d\n", NB);
        return;
    }
    
    M=N=K=size;
    printf ("--------------------------------------------------\n");
    printf ("testing size %d, %d/%d=%d\n", size, size, 32, size/32);
    
    //--------- Allocate and initialize host arrays -----------//
    {
        printf ("allocate host arrays..."); fflush (stdout);
        mem_size_A = M * K * sizeof(float);
        A = (float*)malloc(mem_size_A);
        
        mem_size_B = K * N * sizeof(float);
        B = (float*)malloc(mem_size_B);
        
        if (!A || !B)
        {
            printf ("Error: could not allocating host matrices\n");
            return;
        }
        
        int j;
        for (j = 0; j < M*K; j++)
            A[j] = rand() / (float)RAND_MAX;
        
        memcpy (B, A, M*K*sizeof(float));
    }
    
    //----------- Allocate the OpenCL buffer memory objects ----------------//
    {
        printf ("allocate gpu arrays of size %d, %p...", mem_size_B, rt); fflush (stdout);
        cmB = clCreateBuffer(rt->GetContext(), CL_MEM_READ_WRITE, mem_size_B, NULL, &ciErrNum);
        if (ciErrNum != CL_SUCCESS)
        {
            printf("Error: clCreateBuffer at %d in file %s!\n", __LINE__, __FILE__);
            return;
        }
        
        printf ("done\n");
    }
    
    //-------------- write data to GPU device ----------------//
    {
        printf ("copy data to GPU..."); fflush (stdout);
        ciErrNum = clEnqueueWriteBuffer(rt->GetCommandQueue(0), cmB, CL_TRUE, 0, mem_size_B, B, 0, NULL, NULL);
        if (ciErrNum != CL_SUCCESS)
        {
            printf("Error: clEnqueueWriteBuffer at %d in file %s!\n", __LINE__, __FILE__);
            return;
        }
        printf ("done\n");
    }
    
    int lda=M, ldb=K;
    
    offset = 128;
    int offsetrange = offset*offset;
    //----------- call cpu sinplace ------------//
    {
        printf ("cpu transpose..."); fflush (stdout);
        magmablas_sinplace_transpose_cpu(A+offsetrange, lda, M-offset);
    }
    
    //----------- call opencl sgemm -------------//
    printf ("gpu transpose..."); fflush (stdout);
    magma_sinplace_transpose(cmB, offsetrange, ldb, K-offset, rt->GetCommandQueue(0) );
    
    //----------- check result ---------------//
    // Read back results and check accumulated errors
    ciErrNum = clEnqueueReadBuffer(rt->GetCommandQueue(0), cmB, CL_TRUE, 0, mem_size_B, (void*)B, 0, NULL, NULL);
    if (ciErrNum != CL_SUCCESS)
    {
        printf("Error: clEnqueueWriteBuffer at %d in file %s!\n", __LINE__, __FILE__);
        return;
    }
    
    float resid;
    verify_answer (M, N, A, B, resid);
    
    printf ("resid=%e\n", resid);
    
    // clean up
    if (cmB)clReleaseMemObject(cmB);
    free(A);
    free(B);
}