Exemplo n.º 1
0
  static void* convertible(PyObject *obj_ptr)
  {
    // Check for a null pointer.
    if(!obj_ptr)
      {
        //THROW_TYPE_ERROR("PyObject pointer was null");
        return 0;
      }

    // Make sure this is a numpy array.
    if (!PyArray_Check(obj_ptr))
      {
        //THROW_TYPE_ERROR("Conversion is only defined for numpy array and matrix types");
        return 0;
      }

    // Check the type of the array.
    int npyType = PyArray_ObjectType(obj_ptr, 0);
    
    if(!TypeToNumPy<scalar_t>::canConvert(npyType))
      {
        //THROW_TYPE_ERROR("Can not convert " << npyArrayTypeString(obj_ptr) << " to " << toString() 
        //                 << ". Mismatched types.");
        return 0;
      }

    

    // Check the array dimensions.
    int nd = PyArray_NDIM(obj_ptr);
    
    if(nd != 1 && nd != 2)
      {
	THROW_TYPE_ERROR("Conversion is only valid for arrays with 1 or 2 dimensions. Argument has " << nd << " dimensions");
      }

    if(nd == 1)
      {
	checkVectorSizes(obj_ptr);
      }
    else 
      {
	// Two-dimensional matrix type.
	checkMatrixSizes(obj_ptr);
      }


    return obj_ptr;
  }
Exemplo n.º 2
0
static clblasStatus
doHemv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
	size_t N,
    const cl_mem A,
    size_t offA,
    size_t lda,
    const cl_mem x,
    size_t offx,
    int incx,
    cl_mem y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq1, seq2;
	cl_event first_event;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects(A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N,
                                    A, offA, lda, A_MAT_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, y, offy, incy, Y_VEC_ERRSET))) {
        return retCode;
    }
	if ((commandQueues == NULL) || (numCommandQueues == 0))
    {
        return clblasInvalidValue;
    }
    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

	numCommandQueues = 1;
    kargs->order = order;
    kargs->uplo = uplo;
    kargs->N = N;
    kargs->A = A;
    kargs->offA = offA;
	kargs->offa = offA;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->offBX = offx;
    kargs->ldb.Vector = incx;
    kargs->C = y;
    kargs->offCY = offy;
    kargs->ldc.Vector = incy;
	kargs->transA = clblasNoTrans;
	kargs->diag = clblasNonUnit;

	listInitHead(&seq1);
    err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
        numEventsInWaitList, eventWaitList, &first_event, &seq1);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq1);
		if (err == CL_SUCCESS)
		{
			listInitHead(&seq2);
			kargs->transA = clblasConjTrans;
		    kargs->diag   = clblasUnit;
			err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
			       1, &first_event, events, &seq2);
			if (err == CL_SUCCESS)
			{
				err = executeSolutionSeq(&seq2);
			}
			freeSolutionSeq(&seq2);
		}
    }

    freeSolutionSeq(&seq1);
    return (clblasStatus)err;

	//printf("doHemv called\n");
	//return 0;
}
Exemplo n.º 3
0
clblasStatus
doHer2(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
    size_t N,
    const cl_mem X,
    size_t offx,
    int incx,
	const cl_mem Y,
	size_t offy,
	int incy,
    cl_mem A,
    size_t offa,
    size_t lda,
    cl_uint numCommandQueues,
    cl_command_queue* commandQueue,
    cl_uint numEventsInWaitList,
    const cl_event* eventWaitList,
    cl_event* events)
{
    cl_int err;
    ListHead seq;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    #ifdef DEBUG_HER2
    printf("doHer2 called\n");
    #endif

    /* Validate arguments */

    if ((retCode = checkMemObjects(A, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid mem object..\n");
        #endif
        return retCode;
    }

    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, A, offa, lda, A_MAT_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid Size for A\n");
        #endif
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid Size for X\n");
        #endif
        return retCode;
    }

	if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
        #ifdef DEBUG_HER2
        printf("Invalid Size for Y\n");
        #endif
        return retCode;
    }

    if ((commandQueue == NULL) || (numCommandQueues == 0))
    {
        return clblasInvalidValue;
    }

    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

    kargs->order = order;
	if(order == clblasRowMajor)		// Handling row-major. Invert X, Y and uplo
	{
		kargs->uplo = (uplo == clblasUpper) ? clblasLower : clblasUpper;
		kargs->B = Y;
		kargs->ldb.vector = incy;
		kargs->offBX = offy;
		kargs->C = X;
		kargs->ldc.vector = incx;
		kargs->offCY = offx;
	}
	else
	{
		kargs->uplo = uplo;
		kargs->B = X;
		kargs->ldb.vector = incx;
		kargs->offBX = offx;
		kargs->C = Y;
		kargs->ldc.vector = incy;
		kargs->offCY = offy;
	}
    kargs->N = N;
    kargs->A = A;
    kargs->lda.matrix = lda;
    kargs->offa = offa;
	kargs->offA = offa;

    #ifdef DEBUG_HER2
    printf("Calling makeSolutionSeq : HER2\n");
    #endif

    /*
     * Always use CommandQueue (0)
     * PENDING:
     * 1. No Multi-GPU / Multi-command queue support
     * 2. This can be optimized to use the commandQ with the higher
     *    memmory bandwidth that supports the data-type and the LDA
     */
    numCommandQueues = 1;

    listInitHead(&seq);
    err = makeSolutionSeq(CLBLAS_HER2, kargs, numCommandQueues, commandQueue,
                          numEventsInWaitList, eventWaitList, events, &seq);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq);
    }

    freeSolutionSeq(&seq);
    return (clblasStatus)err;
}
Exemplo n.º 4
0
clblasStatus
doGer(
	CLBlasKargs *kargs,
	clblasOrder order,
    size_t M,
    size_t N,
    const cl_mem X,
    size_t offx,
    int incx,
    const cl_mem Y,
    size_t offy,
    int incy,
    cl_mem  A,
    size_t offa,
    size_t lda,
	int doConj,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
	{
		cl_int err;
		ListHead seq;
        clblasStatus retCode = clblasSuccess;

		if (!clblasInitialized) {
        return clblasNotInitialized;
		}

		/* Validate arguments */

		if ((retCode = checkMemObjects(A, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
			#ifdef DEBUG_GER
			printf("Invalid mem object..\n");
			#endif
            return retCode;
		}

		// Check wheather enough memory was allocated

		if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, M, N, A, offa, lda, A_MAT_ERRSET))) {

			#ifdef DEBUG_GER
			printf("Invalid Size for A %d\n",retCode );
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, M, X, offx, incx, X_VEC_ERRSET))) {
			#ifdef DEBUG_GER
			printf("Invalid Size for X\n");
			#endif
            return retCode;
		}
		if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
			#ifdef DEBUG_GER
			printf("Invalid Size for Y\n");
			#endif
            return retCode;
		}
		///////////////////////////////////////////////////////////////

		if ((commandQueues == NULL) || (numCommandQueues == 0))
		{
			return clblasInvalidValue;
		}

		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
		numCommandQueues = 1;
		if (commandQueues[0] == NULL)
		{
			return clblasInvalidCommandQueue;
		}

		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
		{
			return clblasInvalidEventWaitList;
		}

		/*
 		 * ASSUMPTION:
 		 * doTRMV assumes "commandQueue" of 0. The same is reflected in
		 * "makeSolutionSeq" as well. If either of them changes in future,
		 * this code needs to be revisited.
  		 */

		kargs->order = order;
		kargs->M = M;
		kargs->N = N;
		kargs->A = A;
		kargs->offa = offa;
		kargs->offA = offa;
		kargs->lda.matrix = lda;
		kargs->B = X;
		kargs->offBX = offx;
		kargs->ldb.vector = incx;	// Will be using this as incx
		kargs->C = Y;
		kargs->offCY = offy;
		kargs->ldc.vector = incy;	// Will be using this as incy
		kargs->offsetM = 0;
		kargs->offsetN = 0;
		kargs->scimage[0] = 0;
		kargs->scimage[1] = 0;
		kargs->K = (size_t)doConj; // Will be using K as doConj parameter

		#ifdef DEBUG_GER
		printf("Calling makeSolutionSeq from DoGer: GER\n");
		#endif

		listInitHead(&seq);
		err = makeSolutionSeq(CLBLAS_GER, kargs, numCommandQueues, commandQueues,
        					  numEventsInWaitList, eventWaitList, events, &seq);
		if (err == CL_SUCCESS) {
       		err = executeSolutionSeq(&seq);
		}

		freeSolutionSeq(&seq);

		return (clblasStatus)err;
	}
Exemplo n.º 5
0
static clblasStatus
doSymv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
    size_t N,
    const cl_mem A,
    size_t offA,
    size_t lda,
    const cl_mem x,
    size_t offx,
    int incx,
    cl_mem y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq;
    clblasStatus retCode = clblasSuccess;
    #ifdef USE_SYMV
        ListHead seq2;
        ListNode *listNodePtr;
	    cl_event first_event;
    #endif

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects(A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N,
                                    A, offA, lda, A_MAT_ERRSET ))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET ))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, y, offy, incy, Y_VEC_ERRSET ))) {
        return retCode;
    }

    kargs->order = order;
    kargs->uplo = uplo;
    kargs->N = N;
    kargs->K = N; //store original N
    kargs->A = A;
    kargs->offA = offA;
    kargs->offa = offA;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->offBX = offx;
    kargs->ldb.vector = incx;
    kargs->C = y;
    kargs->offCY = offy;
    kargs->ldc.vector = incy;

    #ifndef USE_SYMV

        listInitHead(&seq);
        err = makeSolutionSeq(CLBLAS_SYMV, kargs, numCommandQueues, commandQueues,
            numEventsInWaitList, eventWaitList, events, &seq);
        if (err == CL_SUCCESS) {
            err = executeSolutionSeq(&seq);
        }

    #else   // version of SYMV using kprintf

        numCommandQueues = 1;
        listInitHead(&seq);

	    kargs->transA = clblasNoTrans;
	    kargs->diag = clblasNonUnit;
		err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
        					  numEventsInWaitList, eventWaitList, &first_event, &seq);
		if (err == CL_SUCCESS)
        {
            listInitHead(&seq2);

			kargs->transA = clblasTrans;
		    kargs->diag   = clblasUnit;
            err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues,
                       1, &first_event, events, &seq2);

            if (err == CL_SUCCESS)
            {
                // Adding node from seq2 to main seq
                listNodePtr = listNodeFirst(&seq2);
                listAddToTail(&seq, listNodePtr);

                err = executeSolutionSeq(&seq);     // Executes both kernels in the seq one after other
            }
		}

    #endif

    freeSolutionSeq(&seq);
    return (clblasStatus)err;
}
Exemplo n.º 6
0
static clblasStatus
doHpmv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
	size_t N,
    const cl_mem AP,
    size_t offa,
    const cl_mem X,
    size_t offx,
    int incx,
    cl_mem Y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq1, seq2;
	cl_event first_event;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects(AP, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N,
                                    AP, offa, 0, A_MAT_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET))) {
        return retCode;
    }
    if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
        return retCode;
    }
	if ((commandQueues == NULL) || (numCommandQueues == 0))
    {
        return clblasInvalidValue;
    }
    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

	numCommandQueues = 1;
    kargs->order = order;
    kargs->uplo = uplo;
    kargs->N = N;
    kargs->A = AP;
    kargs->offA = offa;
	kargs->offa = offa;
    kargs->lda.matrix = 0;      // Set lda as zero for packed matrices
    kargs->B = X;
    kargs->offBX = offx;
    kargs->ldb.vector = incx;
    kargs->C = Y;
    kargs->offCY = offy;
    kargs->ldc.vector = incy;
	kargs->transA = clblasNoTrans;
	kargs->diag = clblasNonUnit;

    kargs->pigFuncID = CLBLAS_HPMV;

	listInitHead(&seq1);
    err = makeSolutionSeq(CLBLAS_TRMV, kargs, numCommandQueues, commandQueues,
                            numEventsInWaitList, eventWaitList, &first_event, &seq1);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq1);
		if (err == CL_SUCCESS)
		{
			listInitHead(&seq2);
			kargs->transA = clblasConjTrans;
		    kargs->diag   = clblasUnit;
			err = makeSolutionSeq(CLBLAS_TRMV, kargs, numCommandQueues, commandQueues,
			                            1, &first_event, events, &seq2);
			if (err == CL_SUCCESS)
			{
				err = executeSolutionSeq(&seq2);
			}
			freeSolutionSeq(&seq2);
		}
    }

    freeSolutionSeq(&seq1);
    return (clblasStatus)err;
}
Exemplo n.º 7
0
static clblasStatus
doGemv(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasTranspose transA,
    size_t M,
    size_t N,
    const cl_mem A,
    size_t offA,
    size_t lda,
    const cl_mem x,
    size_t offx,
    int incx,
    cl_mem y,
    size_t offy,
    int incy,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    cl_int err;
    ListHead seq;
    size_t sizev;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects( A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET ))) {
        return retCode;
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans,
                                    M, N, A, offA, lda, A_MAT_ERRSET ))) {
        return retCode;
    }
    sizev = (transA == clblasNoTrans) ? N : M;
    if ((retCode = checkVectorSizes(kargs->dtype, sizev, x, offx, incx, X_VEC_ERRSET ))) {
        return retCode;
    }
    sizev = (transA == clblasNoTrans) ? M : N;
    if ((retCode = checkVectorSizes(kargs->dtype, sizev, y, offy, incy, Y_VEC_ERRSET))) {
        return retCode;
    }

    kargs->order = order;
    kargs->transA = transA;
    kargs->M = M;
    kargs->N = N;
    /*
     * store original height of the matrix A
     * FIXME: store it to a dedicated field
     */
    kargs->K = (transA == clblasNoTrans) ? M : N;
    kargs->A = A;
    kargs->offA = offA;
    kargs->lda.matrix = lda;
    kargs->B = x;
    kargs->offBX = offx;
    kargs->ldb.vector = incx;
    kargs->C = y;
    kargs->offCY = offy;
    kargs->ldc.vector = incy;

    listInitHead(&seq);
    err = makeSolutionSeq(CLBLAS_GEMV, kargs, numCommandQueues, commandQueues,
        numEventsInWaitList, eventWaitList, events, &seq);
    if (err == CL_SUCCESS) {
        err = executeSolutionSeq(&seq);
    }

    freeSolutionSeq(&seq);

    return (clblasStatus)err;
}
Exemplo n.º 8
0
static clblasStatus
doGemm(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasTranspose transA,
    clblasTranspose transB,
    size_t M,
    size_t N,
    size_t K,
    const cl_mem A,
    size_t offA,
    size_t lda,
    const cl_mem B,
    size_t offB,
    size_t ldb,
    cl_mem C,
    size_t offC,
    size_t ldc,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    clblasStatus err;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    /* Validate arguments */

    if ((retCode = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET))) {
        return retCode;
    }
    if (K != 0) {
        if ((retCode = checkMatrixSizes(kargs->dtype, order, transA, M, K, A, offA, lda, A_MAT_ERRSET))) {
            return retCode;
        }
        if ((retCode = checkMatrixSizes(kargs->dtype, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET))) {
            return retCode;
        }
    }
    if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, M, N, C, offC, ldc, C_MAT_ERRSET))) {
            return retCode;
    }

	numCommandQueues = 1;
	#ifdef DEBUG_2
	printf("DoGemm being called...\n");
	#endif
    kargs->pigFuncID = CLBLAS_GEMM2;
    kargs->order = order;
    kargs->transA = transA;
    kargs->transB = transB;
    kargs->M = M;
    kargs->N = N;
    kargs->K = K;
    kargs->A = A;
    kargs->offA = offA;
    kargs->offa = offA;
    kargs->lda.matrix = lda;
    kargs->B = B;
    kargs->offBX = offB;
    kargs->ldb.matrix = ldb;
    kargs->C = C;
    kargs->offCY = offC;
    kargs->ldc.matrix = ldc;

    kargs->offsetM = 0;
    kargs->offsetN = 0;
    kargs->scimage[0] = 0;
    kargs->scimage[1] = 0;

    err = executeGEMM(kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
    return err;
			}
Exemplo n.º 9
0
clblasStatus
doHer2k(
    CLBlasKargs *kargs,
    clblasOrder order,
    clblasUplo uplo,
    clblasTranspose transA,
    size_t N,
    size_t K,
    const cl_mem A,
    size_t offa,
    size_t lda,
    const cl_mem B,
    size_t offb,
    size_t ldb,
    cl_mem C,
    size_t offc,
    size_t ldc,
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_uint numEventsInWaitList,
    const cl_event *eventWaitList,
    cl_event *events)
{
    clblasStatus err;
    clblasUplo fUplo;
    clblasTranspose fTransA;
    cl_event firstHerkCall;
    clblasStatus retCode = clblasSuccess;

    if (!clblasInitialized) {
        return clblasNotInitialized;
    }

    if (numCommandQueues == 0 || commandQueues == NULL) {
        return clblasInvalidValue;
    }
    numCommandQueues = 1;

    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

    // Validate arguments
    if (retCode = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET )) {
        return retCode;
    }

    if (transA == clblasTrans) {
        return clblasInvalidValue;
    }

    if (retCode = checkMatrixSizes(kargs->dtype, order, transA, N, K, A, offa, lda, A_MAT_ERRSET )) {
        return retCode;
    }

    if (retCode = checkMatrixSizes(kargs->dtype, order, transA, N, K, B, offb, ldb, B_MAT_ERRSET )) {
        return retCode;
    }

    if (retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, C, offc, ldc, C_MAT_ERRSET )) {
        return retCode;
    }

    if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
    {
        return clblasInvalidEventWaitList;
    }

    fUplo = (order == clblasRowMajor) ? ((uplo == clblasLower) ? clblasUpper : clblasLower) : uplo;
    fTransA = (order == clblasRowMajor) ? ((transA == clblasNoTrans) ? clblasConjTrans : clblasNoTrans) : transA;
    kargs->order = (order == clblasRowMajor) ? clblasColumnMajor : order;

    kargs->transA = fTransA;
    kargs->transB = (fTransA == clblasNoTrans) ? clblasConjTrans : clblasNoTrans;

    kargs->uplo = fUplo;
    kargs->M = N;
    kargs->N = N;
    kargs->K = K;
    kargs->A = A;
    kargs->offA = offa;
    kargs->offa = offa;
    kargs->lda.matrix = lda;
    kargs->B = B;
    kargs->offBX = offb;
    kargs->ldb.matrix = ldb;
    kargs->C = C;
    kargs->offCY = offc;
    kargs->ldc.matrix = ldc;
    kargs->pigFuncID = CLBLAS_HERK;

    err = executeGEMM(kargs,  numCommandQueues, commandQueues,
                            numEventsInWaitList, eventWaitList, &firstHerkCall);

    if( err == CL_SUCCESS )
    {
        kargs->A = B;
        kargs->offA = offb;
        kargs->offa = offb;
        kargs->lda.matrix = ldb;
        kargs->B = A;
        kargs->offBX = offa;
        kargs->ldb.matrix = lda;

        if( kargs->dtype == TYPE_COMPLEX_FLOAT )
        {
            CIMAG( kargs->alpha.argFloatComplex ) *= -1.0;
            CREAL( kargs->beta.argFloatComplex ) = 1.0;
            CIMAG( kargs->beta.argFloatComplex ) = 0.0;
        }
        else
        {
            CIMAG( kargs->alpha.argDoubleComplex ) *= -1.0;
            CREAL( kargs->beta.argDoubleComplex ) = 1.0;
            CIMAG( kargs->beta.argDoubleComplex ) = 0.0;
        }

        err = executeGEMM(kargs,  numCommandQueues, commandQueues, 1, &firstHerkCall, events);
    }

    return (clblasStatus)err;
}