static void* convertible(PyObject *obj_ptr)
  {
    // Check for a null pointer.
    if(!obj_ptr)
      {
        //THROW_TYPE_ERROR("PyObject pointer was null");
        return 0;
      }

    // Make sure this is a numpy array.
    if (!PyArray_Check(obj_ptr))
      {
        //THROW_TYPE_ERROR("Conversion is only defined for numpy array and matrix types");
        return 0;
      }

    // Check the type of the array.
    int npyType = PyArray_ObjectType(obj_ptr, 0);
    
    if(!TypeToNumPy<scalar_t>::canConvert(npyType))
      {
        //THROW_TYPE_ERROR("Can not convert " << npyArrayTypeString(obj_ptr) << " to " << toString() 
        //                 << ". Mismatched types.");
        return 0;
      }

    

    // Check the array dimensions.
    int nd = PyArray_NDIM(obj_ptr);
    
    if(nd != 1 && nd != 2)
      {
	THROW_TYPE_ERROR("Conversion is only valid for arrays with 1 or 2 dimensions. Argument has " << nd << " dimensions");
      }

    if(nd == 1)
      {
	checkVectorSizes(obj_ptr);
      }
    else 
      {
	// Two-dimensional matrix type.
	checkMatrixSizes(obj_ptr);
      }


    return obj_ptr;
  }
Beispiel #2
0
clblasStatus
doRot(
	CLBlasKargs *kargs,
    size_t N,
    const cl_mem X,
    size_t offx,
    int incx,
    cl_mem Y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
	{
		cl_int err;
		ListHead seq;
        clblasStatus retCode = clblasSuccess;

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

        retCode = checkMemObjects(X, Y, X, false, X_VEC_ERRSET, Y_VEC_ERRSET, X_VEC_ERRSET );
		if (retCode) {
			#ifdef DEBUG_ROT
            printf("Invalid mem object..\n");
            #endif
            return retCode;
		}

		// Check wheather enough memory was allocated

		if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET))) {
			#ifdef DEBUG_ROT
            printf("Invalid Size for X\n");
            #endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
			#ifdef DEBUG_ROT
            printf("Invalid Size for Y\n");
            #endif
            return retCode;
		}
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
			return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}


		kargs->N = N;
		kargs->A = X;
		kargs->offBX = offx;
		kargs->ldb.vector = incx;	// Will be using this as incx
		kargs->B = Y;
		kargs->offCY = offy;
		kargs->ldc.vector = incy;	// Will be using this as incy
		kargs->pigFuncID = CLBLAS_ROT;  // Using ROTM kernel for ROT. Both are similar

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_ROTM, kargs, numCommandQueues, commandQueues,
        					        numEventsInWaitList, eventWaitList, events, &seq);
		if (err == CL_SUCCESS) {
       		err = executeSolutionSeq(&seq);
		}

		freeSolutionSeq(&seq);

		return (clblasStatus)err;
	}
Beispiel #3
0
clblasStatus
doGer(
	CLBlasKargs *kargs,
	clblasOrder order,
    size_t M,
    size_t N,
    const cl_mem X,
    size_t offx,
    int incx,
    const cl_mem Y,
    size_t offy,
    int incy,
    cl_mem  A,
    size_t offa,
    size_t lda,
	int doConj,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
	{
		cl_int err;
		ListHead seq;
        clblasStatus retCode = clblasSuccess;

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

		if ((retCode = checkMemObjects(A, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
			#ifdef DEBUG_GER
			printf("Invalid mem object..\n");
			#endif
            return retCode;
		}

		// Check wheather enough memory was allocated

		if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, M, N, A, offa, lda, A_MAT_ERRSET))) {

			#ifdef DEBUG_GER
			printf("Invalid Size for A %d\n",retCode );
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, M, X, offx, incx, X_VEC_ERRSET))) {
			#ifdef DEBUG_GER
			printf("Invalid Size for X\n");
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
			#ifdef DEBUG_GER
			printf("Invalid Size for Y\n");
			#endif
            return retCode;
		}
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
			return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}

		/*
 		 * ASSUMPTION:
 		 * doTRMV assumes "commandQueue" of 0. The same is reflected in
		 * "makeSolutionSeq" as well. If either of them changes in future,
		 * this code needs to be revisited.
  		 */

		kargs->order = order;
		kargs->M = M;
		kargs->N = N;
		kargs->A = A;
		kargs->offa = offa;
		kargs->offA = offa;
		kargs->lda.matrix = lda;
		kargs->B = X;
		kargs->offBX = offx;
		kargs->ldb.vector = incx;	// Will be using this as incx
		kargs->C = Y;
		kargs->offCY = offy;
		kargs->ldc.vector = incy;	// Will be using this as incy
		kargs->offsetM = 0;
		kargs->offsetN = 0;
		kargs->scimage[0] = 0;
		kargs->scimage[1] = 0;
		kargs->K = (size_t)doConj; // Will be using K as doConj parameter

		#ifdef DEBUG_GER
		printf("Calling makeSolutionSeq from DoGer: GER\n");
		#endif

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_GER, kargs, numCommandQueues, commandQueues,
        					  numEventsInWaitList, eventWaitList, events, &seq);
		if (err == CL_SUCCESS) {
       		err = executeSolutionSeq(&seq);
		}

		freeSolutionSeq(&seq);

		return (clblasStatus)err;
	}
Beispiel #4
0
static clblasStatus
doHemv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
	size_t N,
    const cl_mem A,
    size_t offA,
    size_t lda,
    const cl_mem x,
    size_t offx,
    int incx,
    cl_mem y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq1, seq2;
	cl_event first_event;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects(A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N,
                                    A, offA, lda, A_MAT_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, y, offy, incy, Y_VEC_ERRSET))) {
        return retCode;
    }
	if ((commandQueues == NULL) || (numCommandQueues == 0))
    {
        return clblasInvalidValue;
    }
    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

	numCommandQueues = 1;
    kargs->order = order;
    kargs->uplo = uplo;
    kargs->N = N;
    kargs->A = A;
    kargs->offA = offA;
	kargs->offa = offA;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->offBX = offx;
    kargs->ldb.Vector = incx;
    kargs->C = y;
    kargs->offCY = offy;
    kargs->ldc.Vector = incy;
	kargs->transA = clblasNoTrans;
	kargs->diag = clblasNonUnit;

	listInitHead(&seq1);
    err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
        numEventsInWaitList, eventWaitList, &first_event, &seq1);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq1);
		if (err == CL_SUCCESS)
		{
			listInitHead(&seq2);
			kargs->transA = clblasConjTrans;
		    kargs->diag   = clblasUnit;
			err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
			       1, &first_event, events, &seq2);
			if (err == CL_SUCCESS)
			{
				err = executeSolutionSeq(&seq2);
			}
			freeSolutionSeq(&seq2);
		}
    }

    freeSolutionSeq(&seq1);
    return (clblasStatus)err;

	//printf("doHemv called\n");
	//return 0;
}
Beispiel #5
0
static clblasStatus
doSymv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
    size_t N,
    const cl_mem A,
    size_t offA,
    size_t lda,
    const cl_mem x,
    size_t offx,
    int incx,
    cl_mem y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq;
    clblasStatus retCode = clblasSuccess;
    #ifdef USE_SYMV
        ListHead seq2;
        ListNode *listNodePtr;
	    cl_event first_event;
    #endif

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects(A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N,
                                    A, offA, lda, A_MAT_ERRSET ))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET ))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, y, offy, incy, Y_VEC_ERRSET ))) {
        return retCode;
    }

    kargs->order = order;
    kargs->uplo = uplo;
    kargs->N = N;
    kargs->K = N; //store original N
    kargs->A = A;
    kargs->offA = offA;
    kargs->offa = offA;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->offBX = offx;
    kargs->ldb.vector = incx;
    kargs->C = y;
    kargs->offCY = offy;
    kargs->ldc.vector = incy;

    #ifndef USE_SYMV

        listInitHead(&seq);
        err = makeSolutionSeq(CLBLAS_SYMV, kargs, numCommandQueues, commandQueues,
            numEventsInWaitList, eventWaitList, events, &seq);
        if (err == CL_SUCCESS) {
            err = executeSolutionSeq(&seq);
        }

    #else   // version of SYMV using kprintf

        numCommandQueues = 1;
        listInitHead(&seq);

	    kargs->transA = clblasNoTrans;
	    kargs->diag = clblasNonUnit;
		err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
        					  numEventsInWaitList, eventWaitList, &first_event, &seq);
		if (err == CL_SUCCESS)
        {
            listInitHead(&seq2);

			kargs->transA = clblasTrans;
		    kargs->diag   = clblasUnit;
            err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
                       1, &first_event, events, &seq2);

            if (err == CL_SUCCESS)
            {
                // Adding node from seq2 to main seq
                listNodePtr = listNodeFirst(&seq2);
                listAddToTail(&seq, listNodePtr);

                err = executeSolutionSeq(&seq);     // Executes both kernels in the seq one after other
            }
		}

    #endif

    freeSolutionSeq(&seq);
    return (clblasStatus)err;
}
Beispiel #6
0
clblasStatus
doHer2(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
    size_t N,
    const cl_mem X,
    size_t offx,
    int incx,
	const cl_mem Y,
	size_t offy,
	int incy,
    cl_mem A,
    size_t offa,
    size_t lda,
    cl_uint numCommandQueues,
    cl_command_queue* commandQueue,
    cl_uint numEventsInWaitList,
    const cl_event* eventWaitList,
    cl_event* events)
{
    cl_int err;
    ListHead seq;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    #ifdef DEBUG_HER2
    printf("doHer2 called\n");
    #endif

    /* Validate arguments */

    if ((retCode = checkMemObjects(A, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid mem object..\n");
        #endif
        return retCode;
    }

    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, A, offa, lda, A_MAT_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid Size for A\n");
        #endif
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid Size for X\n");
        #endif
        return retCode;
    }

	if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid Size for Y\n");
        #endif
        return retCode;
    }

    if ((commandQueue == NULL) || (numCommandQueues == 0))
    {
        return clblasInvalidValue;
    }

    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

    kargs->order = order;
	if(order == clblasRowMajor)		// Handling row-major. Invert X, Y and uplo
	{
		kargs->uplo = (uplo == clblasUpper) ? clblasLower : clblasUpper;
		kargs->B = Y;
		kargs->ldb.vector = incy;
		kargs->offBX = offy;
		kargs->C = X;
		kargs->ldc.vector = incx;
		kargs->offCY = offx;
	}
	else
	{
		kargs->uplo = uplo;
		kargs->B = X;
		kargs->ldb.vector = incx;
		kargs->offBX = offx;
		kargs->C = Y;
		kargs->ldc.vector = incy;
		kargs->offCY = offy;
	}
    kargs->N = N;
    kargs->A = A;
    kargs->lda.matrix = lda;
    kargs->offa = offa;
	kargs->offA = offa;

    #ifdef DEBUG_HER2
    printf("Calling makeSolutionSeq : HER2\n");
    #endif

    /*
     * Always use CommandQueue (0)
     * PENDING:
     * 1. No Multi-GPU / Multi-command queue support
     * 2. This can be optimized to use the commandQ with the higher
     *    memmory bandwidth that supports the data-type and the LDA
     */
    numCommandQueues = 1;

    listInitHead(&seq);
    err = makeSolutionSeq(CLBLAS_HER2, kargs, numCommandQueues, commandQueue,
                          numEventsInWaitList, eventWaitList, events, &seq);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq);
    }

    freeSolutionSeq(&seq);
    return (clblasStatus)err;
}
Beispiel #7
0
static clblasStatus
doHpmv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
	size_t N,
    const cl_mem AP,
    size_t offa,
    const cl_mem X,
    size_t offx,
    int incx,
    cl_mem Y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq1, seq2;
	cl_event first_event;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects(AP, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N,
                                    AP, offa, 0, A_MAT_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
        return retCode;
    }
	if ((commandQueues == NULL) || (numCommandQueues == 0))
    {
        return clblasInvalidValue;
    }
    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

	numCommandQueues = 1;
    kargs->order = order;
    kargs->uplo = uplo;
    kargs->N = N;
    kargs->A = AP;
    kargs->offA = offa;
	kargs->offa = offa;
    kargs->lda.matrix = 0;      // Set lda as zero for packed matrices
    kargs->B = X;
    kargs->offBX = offx;
    kargs->ldb.vector = incx;
    kargs->C = Y;
    kargs->offCY = offy;
    kargs->ldc.vector = incy;
	kargs->transA = clblasNoTrans;
	kargs->diag = clblasNonUnit;

    kargs->pigFuncID = CLBLAS_HPMV;

	listInitHead(&seq1);
    err = makeSolutionSeq(CLBLAS_TRMV, kargs, numCommandQueues, commandQueues,
                            numEventsInWaitList, eventWaitList, &first_event, &seq1);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq1);
		if (err == CL_SUCCESS)
		{
			listInitHead(&seq2);
			kargs->transA = clblasConjTrans;
		    kargs->diag   = clblasUnit;
			err = makeSolutionSeq(CLBLAS_TRMV, kargs, numCommandQueues, commandQueues,
			                            1, &first_event, events, &seq2);
			if (err == CL_SUCCESS)
			{
				err = executeSolutionSeq(&seq2);
			}
			freeSolutionSeq(&seq2);
		}
    }

    freeSolutionSeq(&seq1);
    return (clblasStatus)err;
}
Beispiel #8
0
clblasStatus
doScal(
	CLBlasKargs *kargs,
    size_t N,
    cl_mem X,
    size_t offx,
    int incx,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
	{
		cl_int err;
		ListHead seq;
        clblasStatus retCode = clblasSuccess;

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

        retCode = checkMemObjects(X, X, X, false, X_VEC_ERRSET, X_VEC_ERRSET, X_VEC_ERRSET );
		if (retCode) {
			printf("Invalid mem object..\n");
            return retCode;
		}

		// Check wheather enough memory was allocated

		if (retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET )) {
			printf("Invalid Size for X\n");
            return retCode;
		}
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
			return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}

		kargs->N = N;
		kargs->A = X;
		kargs->offBX = offx;
		kargs->ldb.vector = incx;	// Will be using this as incx

		if(incx < 0) {    // According to Netlib - return for negative incx
		    return clblasSuccess;
		}

		#ifdef DEBUG_SCAL
		printf("Calling makeSolutionSeq from DoScal: SCAL\n");
		#endif

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_SCAL, kargs, numCommandQueues, commandQueues,
        					        numEventsInWaitList, eventWaitList, events, &seq);
		if (err == CL_SUCCESS) {
       		err = executeSolutionSeq(&seq);
		}

		freeSolutionSeq(&seq);

		return (clblasStatus)err;
	}
Beispiel #9
0
static clblasStatus
doSHbmv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
    size_t N,
    size_t K,
    const cl_mem A,
    size_t offa,
    size_t lda,
    const cl_mem x,
    size_t offx,
    int incx,
    cl_mem y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    if ((commandQueues == NULL) || (numCommandQueues == 0))
    {
        return clblasInvalidValue;
    }

    if (commandQueues[0] == NULL)
    {
        return clblasInvalidCommandQueue;
    }

    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }
    /* Validate arguments */

    if ((retCode = checkMemObjects(A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET)))
    {
        return retCode;
    }

    if ((retCode = checkBandedMatrixSizes(kargs->dtype, order, clblasNoTrans,
                                          N, N, K, 0, A, offa, lda, A_MAT_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, y, offy, incy, Y_VEC_ERRSET))) {
        return retCode;
    }

    /* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
    numCommandQueues = 1;

    kargs->order = order;
    kargs->uplo = uplo;
    kargs->transA = clblasNoTrans;
    kargs->N = N;
    kargs->M = N;
    kargs->KL = K;
    kargs->KU = K;
    kargs->A = A;
    kargs->offA = offa;
    kargs->offa = offa;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->offBX = offx;
    kargs->ldb.vector = incx;
    kargs->C = y;
    kargs->offCY = offy;
    kargs->ldc.vector = incy;

    listInitHead(&seq);
    err = makeSolutionSeq(CLBLAS_GBMV, kargs, numCommandQueues, commandQueues,
        numEventsInWaitList, eventWaitList, events, &seq);

    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq);
    }

    freeSolutionSeq(&seq);

    return (clblasStatus)err;
}
Beispiel #10
0
static clblasStatus
doGemv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasTranspose transA,
    size_t M,
    size_t N,
    const cl_mem A,
    size_t offA,
    size_t lda,
    const cl_mem x,
    size_t offx,
    int incx,
    cl_mem y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq;
    size_t sizev;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects( A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET ))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans,
                                    M, N, A, offA, lda, A_MAT_ERRSET ))) {
        return retCode;
    }
    sizev = (transA == clblasNoTrans) ? N : M;
    if ((retCode = checkVectorSizes(kargs->dtype, sizev, x, offx, incx, X_VEC_ERRSET ))) {
        return retCode;
    }
    sizev = (transA == clblasNoTrans) ? M : N;
    if ((retCode = checkVectorSizes(kargs->dtype, sizev, y, offy, incy, Y_VEC_ERRSET))) {
        return retCode;
    }

    kargs->order = order;
    kargs->transA = transA;
    kargs->M = M;
    kargs->N = N;
    /*
     * store original height of the matrix A
     * FIXME: store it to a dedicated field
     */
    kargs->K = (transA == clblasNoTrans) ? M : N;
    kargs->A = A;
    kargs->offA = offA;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->offBX = offx;
    kargs->ldb.vector = incx;
    kargs->C = y;
    kargs->offCY = offy;
    kargs->ldc.vector = incy;

    listInitHead(&seq);
    err = makeSolutionSeq(CLBLAS_GEMV, kargs, numCommandQueues, commandQueues,
        numEventsInWaitList, eventWaitList, events, &seq);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq);
    }

    freeSolutionSeq(&seq);

    return (clblasStatus)err;
}
Beispiel #11
0
clblasStatus
doRotg(
	CLBlasKargs *kargs,
    cl_mem A,
    size_t offA,
    cl_mem B,
    size_t offB,
    cl_mem C,
    size_t offC,
    cl_mem S,
    size_t offS,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
	{
		cl_int err;
		ListHead seq;
        clblasStatus retCode = clblasSuccess;

        // C is of real type even for complex numbers
        DataType cType = (kargs->dtype == TYPE_COMPLEX_FLOAT)? TYPE_FLOAT :
                            ((kargs->dtype == TYPE_COMPLEX_DOUBLE)? TYPE_DOUBLE : (kargs->dtype));

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

        retCode = checkMemObjects(A, B, A, false, X_VEC_ERRSET, Y_VEC_ERRSET, X_VEC_ERRSET );
		if (retCode) {      // for mem objects A, B
			printf("Invalid mem object..\n");
            return retCode;
		}
		retCode = checkMemObjects(C, S, C, false, X_VEC_ERRSET, Y_VEC_ERRSET, X_VEC_ERRSET );
		if (retCode) {      // for mem objects C, S
			printf("Invalid mem object..\n");
            return retCode;
		}

		// Check wheather enough memory was allocated

		if ((retCode = checkVectorSizes(kargs->dtype, 1, A, offA, 1, X_VEC_ERRSET))) {
			printf("Invalid Size for A\n");
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, 1, B, offB, 1, Y_VEC_ERRSET))) {
			printf("Invalid Size for B\n");
            return retCode;
		}

		if ((retCode = checkVectorSizes(cType, 1, C, offC, 1, X_VEC_ERRSET))) {
			printf("Invalid Size for C\n");
            return retCode;
		}

		if ((retCode = checkVectorSizes(kargs->dtype, 1, S, offS, 1, Y_VEC_ERRSET))) {
			printf("Invalid Size for S\n");
            return retCode;
		}
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
			return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}

		kargs->A = A;
    	kargs->B = B;
		kargs->C = C;
    	kargs->D = S;
		kargs->offa = offA;
		kargs->offb = offB;
        kargs->offc = offC;
        kargs->offd = offS;

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_ROTG, kargs, numCommandQueues, commandQueues,
        					        numEventsInWaitList, eventWaitList, events, &seq);
		if (err == CL_SUCCESS) {
       		err = executeSolutionSeq(&seq);
		}

		freeSolutionSeq(&seq);

		return (clblasStatus)err;
	}
Beispiel #12
0
clblasStatus
doRotmg(
	CLBlasKargs *kargs,
    cl_mem D1,
    size_t offD1,
    cl_mem D2,
    size_t offD2,
    cl_mem X1,
    size_t offX1,
    cl_mem Y1,
    size_t offY1,
    cl_mem param,
    size_t offParam,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
	{
		cl_int err;
		ListHead seq;
        clblasStatus retCode = clblasSuccess;

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

        retCode = checkMemObjects(D1, D2, X1, true, X_VEC_ERRSET, Y_VEC_ERRSET, X_VEC_ERRSET );
		if (retCode) {      // for mem objects A, B
			#ifdef DEBUG_ROTMG
			printf("Invalid mem object..\n");
			#endif
            return retCode;
		}
		retCode = checkMemObjects(Y1, param, Y1, false, X_VEC_ERRSET, Y_VEC_ERRSET, X_VEC_ERRSET );
		if (retCode) {      // for mem objects C, S
			#ifdef DEBUG_ROTMG
			printf("Invalid mem object..\n");
			#endif
            return retCode;
		}

		// Check wheather enough memory was allocated

		if ((retCode = checkVectorSizes(kargs->dtype, 1, D1, offD1, 1, X_VEC_ERRSET))) {
			#ifdef DEBUG_ROTMG
			printf("Invalid Size for D1\n");
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, 1, D2, offD2, 1, Y_VEC_ERRSET))) {
			#ifdef DEBUG_ROTMG
			printf("Invalid Size for D2\n");
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, 1, X1, offX1, 1, X_VEC_ERRSET))) {
			#ifdef DEBUG_ROTMG
			printf("Invalid Size for X1\n");
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, 1, Y1, offY1, 1, Y_VEC_ERRSET))) {
			#ifdef DEBUG_ROTMG
			printf("Invalid Size for Y1\n");
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, 1, param, offParam, 1, Y_VEC_ERRSET))) {
			#ifdef DEBUG_ROTMG
			printf("Invalid Size for PARAM\n");
			#endif
            return retCode;
		}
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
			return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}

		kargs->A = D1;
    	kargs->B = D2;
		kargs->C = X1;
    	kargs->D = Y1;
    	kargs->E = param;
		kargs->offa = offD1;
		kargs->offb = offD2;
        kargs->offc = offX1;
        kargs->offd = offY1;
        kargs->offe = offParam;

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_ROTMG, kargs, numCommandQueues, commandQueues,
        					        numEventsInWaitList, eventWaitList, events, &seq);
		if (err == CL_SUCCESS) {
       		err = executeSolutionSeq(&seq);
		}

		freeSolutionSeq(&seq);

		return (clblasStatus)err;
	}
Beispiel #13
0
clblasStatus
doAsum(
	CLBlasKargs *kargs,
    size_t N,
    cl_mem asum,
    size_t offAsum,
    const cl_mem X,
    size_t offx,
    int incx,
    cl_mem scratchBuff,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
        cl_int err;
		ListHead seq, seq2;
        clblasStatus retCode = clblasSuccess;
        cl_event firstAsumCall;
        CLBlasKargs redctnArgs;
        ListNode *listNodePtr;
        SolutionStep *step;

        DataType asumType = (kargs->dtype == TYPE_COMPLEX_FLOAT) ? TYPE_FLOAT:
                                ((kargs->dtype == TYPE_COMPLEX_DOUBLE) ? TYPE_DOUBLE: kargs->dtype);

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

		retCode = checkMemObjects(scratchBuff, asum, X, true, X_VEC_ERRSET, X_VEC_ERRSET, X_VEC_ERRSET );
		if (retCode) {
			printf("Invalid mem object..\n");
            return retCode;
		}

		// Check wheather enough memory was allocated

		if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET ))) {
			printf("Invalid Size for X\n");
            return retCode;
		}
		// Minimum size of scratchBuff is N
		if ((retCode = checkVectorSizes(kargs->dtype, N, scratchBuff, 0, 1, X_VEC_ERRSET ))) {
			printf("Insufficient ScratchBuff\n");
            return retCode;
		}

		if ((retCode = checkVectorSizes(asumType, 1, asum, offAsum, 1, X_VEC_ERRSET ))) {
			printf("Invalid Size for asum\n");
            return retCode;
		}
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
			return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}

		kargs->N = N;
		kargs->A = asum;
        kargs->offA = offAsum;
		kargs->B = X;
		kargs->offBX = offx;
		kargs->ldb.vector = incx;   // Will be using this as incx
        if(incx <1){
            kargs->N = 1;
        }
        kargs->D = scratchBuff;
        kargs->redctnType = REDUCE_BY_SUM;
        memcpy(&redctnArgs, kargs, sizeof(CLBlasKargs));

        redctnArgs.dtype = asumType;

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_ASUM, kargs, numCommandQueues, commandQueues,
        					  numEventsInWaitList, eventWaitList, &firstAsumCall, &seq);
		if (err == CL_SUCCESS)
        {
            /** The second kernel call needs to know the number of work-groups used
                in the first kernel call. This number of work-groups is calculated here
                and passed as N to second reduction kernel
            **/
            err = executeSolutionSeq(&seq);
            if (err == CL_SUCCESS)
            {
                listNodePtr = listNodeFirst(&seq);        // Get the node
                step = container_of(listNodePtr, node, SolutionStep);

                redctnArgs.N = step->pgran.numWGSpawned[0];     // 1D block was used

                listInitHead(&seq2);
                err = makeSolutionSeq(CLBLAS_REDUCTION_EPILOGUE, &redctnArgs, numCommandQueues, commandQueues,
                           1, &firstAsumCall, events, &seq2);

                if (err == CL_SUCCESS)
                {
                    err = executeSolutionSeq(&seq2);
                }
                freeSolutionSeq(&seq2);
            }
		}

		freeSolutionSeq(&seq);
		return (clblasStatus)err;
}
Beispiel #14
0
clblasStatus
doiAmax(
	CLBlasKargs *kargs,
    size_t N,
    cl_mem iMax,
    size_t offiMax,
    const cl_mem X,
    size_t offx,
    int incx,
    cl_mem scratchBuf,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
        cl_int err;
		ListHead seq, seq2;
        clblasStatus retCode = clblasSuccess;
        cl_event firstiAmaxCall;
        CLBlasKargs redctnArgs;
        ListNode *listNodePtr;
        SolutionStep *step;

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

		retCode = checkMemObjects(X, scratchBuf, iMax, true, X_VEC_ERRSET, A_MAT_ERRSET, X_VEC_ERRSET );
		if (retCode) {
			printf("Invalid mem object..\n");
            return retCode;
		}

		// Check wheather enough memory was allocated

		if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET ))) {
			printf("Invalid Size for X\n");
            return retCode;
		}
		// Minimum size of scratchBuff is 2 * N
		if ((retCode = checkVectorSizes(kargs->dtype, (2 * N), scratchBuf, 0, 1, A_MAT_ERRSET ))) {
			printf("Insufficient ScratchBuff A\n");
            return retCode;
		}
		if ((retCode = checkVectorSizes(TYPE_UNSIGNED_INT, 1, iMax, offiMax, 1, X_VEC_ERRSET ))) {
			printf("Invalid Size for iX\n");
            return retCode;
	    }
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
            return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}

        // cl_mem D is scratch buffer
        // cl_mem A is the output Buffer i.e. iMAX, offA for offiMax
        // cl_mem B is the input Buffer containing N Values
		kargs->N = N;
		kargs->B = X;
        kargs->offb = offx;
		kargs->ldb.vector = incx;   // Will be using this as incx
        if(incx < 1) {              // According to netlib, if incx<1, NRM2 will be zero
            kargs->N = 1;           // Makeing it launch only 1 work-group
        }
		kargs->D = scratchBuf;
		kargs->A = iMax;
		kargs->offA = offiMax;
#ifdef IAMAX_USE_ATOMIC_MIN
        kargs->redctnType = REDUCE_MAX_WITH_INDEX_ATOMICS;
#else
        kargs->redctnType = REDUCE_MAX_WITH_INDEX;
#endif
        memcpy(&redctnArgs, kargs, sizeof(CLBlasKargs));

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_iAMAX, kargs, numCommandQueues, commandQueues,
        					  numEventsInWaitList, eventWaitList, &firstiAmaxCall, &seq);
		if (err == CL_SUCCESS)
        {
            // The second kernel call needs to know the number of work-groups used
            //    in the first kernel call. This number of work-groups is calculated here
            //    and passed as N to second reduction kernel

            err = executeSolutionSeq(&seq);
            if (err == CL_SUCCESS)
            {
                listNodePtr = listNodeFirst(&seq);        // Get the node
                step = container_of(listNodePtr, node, SolutionStep);

                redctnArgs.N = step->pgran.numWGSpawned[0];     // 1D block was used
                redctnArgs.dtype = (redctnArgs.dtype == TYPE_COMPLEX_FLOAT) ? TYPE_FLOAT :
                    ((redctnArgs.dtype == TYPE_COMPLEX_DOUBLE) ? TYPE_DOUBLE : redctnArgs.dtype);

                listInitHead(&seq2);
                err = makeSolutionSeq(CLBLAS_REDUCTION_EPILOGUE, &redctnArgs, numCommandQueues, commandQueues,
                           1, &firstiAmaxCall, events, &seq2);

                if (err == CL_SUCCESS)
                {
                    err = executeSolutionSeq(&seq2);
                }
                freeSolutionSeq(&seq2);
            }
		}

		freeSolutionSeq(&seq);
		return (clblasStatus)err;
}
Beispiel #15
0
clblasStatus
doTbsv(
	CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
    clblasTranspose trans,
    clblasDiag diag,
    size_t N,
    size_t K,
    const cl_mem A,
    size_t offa,
    size_t lda,
    cl_mem x,
    size_t offx,
    int incx,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err = clblasNotImplemented;
    ListHead seq;
	CLBlasKargs gbmvKargs;
	ListHead gbmvSeq;
	//cl_context c;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    retCode = checkMemObjects(A, x, (cl_mem) NULL, false, A_MAT_ERRSET, X_VEC_ERRSET, END_ERRSET);
    if (retCode != clblasSuccess) {
		#ifdef DEBUG_TBSV
		printf("Invalid mem object..\n");
		#endif
        return retCode;
    }

	/*
	 * PENDING:
 	 * checkMatrixSizes() does not account for "offa" argument.
 	 * Need to pass "offa" when "checkMatrixSizes()" is changed.
	 */
    retCode = checkBandedMatrixSizes(kargs->dtype, order, trans, N, N, K, 0, A, offa, lda, A_MAT_ERRSET );
    if (retCode != clblasSuccess) {
		#ifdef DEBUG_TBSV
		printf("Invalid Size for A\n");
		#endif
        return retCode;
    }
    retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET );
    if (retCode != clblasSuccess) {
		#ifdef DEBUG_TBSV
		printf("Invalid Size for X\n");
		#endif
        return retCode;
    }

	#ifdef DEBUG_TBSV
	printf("DoTbsv being called...\n");
	#endif

	if ((commandQueues == NULL) || (numCommandQueues == 0))
	{
		return clblasInvalidValue;
	}

	if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
	{
		return clblasInvalidEventWaitList;
	}

    if (commandQueues[0] == NULL)
	{
		return clblasInvalidCommandQueue;
	}


	numCommandQueues = 1; // NOTE: Hard-coding the number of command queues to 1i
    kargs->order = order;
    kargs->uplo = uplo;
    kargs->transA = trans;
	kargs->diag = diag;
    kargs->M = N; // store Original N
    kargs->N = N;
    kargs->K = K;
    kargs->A = A;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->ldb.vector = incx;
    kargs->offBX = offx;
	kargs->offa = offa;
	kargs->offA = offa;
    kargs->C = x;
    kargs->offCY = offx;
    kargs->ldc.vector = incx;
    kargs->startRow = 0;

    if(trans == clblasNoTrans)
    {
        kargs->endRow = (order == clblasRowMajor) ?  N-1 : N;
    }
    else
    {
        kargs->endRow = (order == clblasRowMajor) ?  N : N-1;
    }

    memcpy(&gbmvKargs, kargs, sizeof(CLBlasKargs));
    gbmvKargs.pigFuncID = CLBLAS_GBMV;

    listInitHead(&seq);
    listInitHead(&gbmvSeq);

    err = makeSolutionSeq(CLBLAS_TRSV, kargs, numCommandQueues, commandQueues,
                          numEventsInWaitList, eventWaitList, events, &seq);

    if (err == CL_SUCCESS) {

        err = makeSolutionSeq(CLBLAS_GBMV, &gbmvKargs, numCommandQueues, commandQueues,
                                0, NULL, NULL, &gbmvSeq);
        if (err == CL_SUCCESS)
        {
            err = orchestrateTBSV(kargs, &seq, &gbmvSeq, numEventsInWaitList, eventWaitList, events);
        }
    }

    freeSolutionSeq(&seq);
    freeSolutionSeq(&gbmvSeq);
    return (clblasStatus)err;
}