static void* convertible(PyObject *obj_ptr) { // Check for a null pointer. if(!obj_ptr) { //THROW_TYPE_ERROR("PyObject pointer was null"); return 0; } // Make sure this is a numpy array. if (!PyArray_Check(obj_ptr)) { //THROW_TYPE_ERROR("Conversion is only defined for numpy array and matrix types"); return 0; } // Check the type of the array. int npyType = PyArray_ObjectType(obj_ptr, 0); if(!TypeToNumPy<scalar_t>::canConvert(npyType)) { //THROW_TYPE_ERROR("Can not convert " << npyArrayTypeString(obj_ptr) << " to " << toString() // << ". Mismatched types."); return 0; } // Check the array dimensions. int nd = PyArray_NDIM(obj_ptr); if(nd != 1 && nd != 2) { THROW_TYPE_ERROR("Conversion is only valid for arrays with 1 or 2 dimensions. Argument has " << nd << " dimensions"); } if(nd == 1) { checkVectorSizes(obj_ptr); } else { // Two-dimensional matrix type. checkMatrixSizes(obj_ptr); } return obj_ptr; }
static clblasStatus doHemv( CLBlasKargs *kargs, clblasOrder order, clblasUplo uplo, size_t N, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) { cl_int err; ListHead seq1, seq2; cl_event first_event; clblasStatus retCode = clblasSuccess; if (!clblasInitialized) { return clblasNotInitialized; } /* Validate arguments */ if ((retCode = checkMemObjects(A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) { return retCode; } if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, A, offA, lda, A_MAT_ERRSET))) { return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET))) { return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, y, offy, incy, Y_VEC_ERRSET))) { return retCode; } if ((commandQueues == NULL) || (numCommandQueues == 0)) { return clblasInvalidValue; } if ((numEventsInWaitList !=0) && (eventWaitList == NULL)) { return clblasInvalidEventWaitList; } numCommandQueues = 1; kargs->order = order; kargs->uplo = uplo; kargs->N = N; kargs->A = A; kargs->offA = offA; kargs->offa = offA; kargs->lda.matrix = lda; kargs->B = x; kargs->offBX = offx; kargs->ldb.Vector = incx; kargs->C = y; kargs->offCY = offy; kargs->ldc.Vector = incy; kargs->transA = clblasNoTrans; kargs->diag = clblasNonUnit; listInitHead(&seq1); err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, &first_event, &seq1); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq1); if (err == CL_SUCCESS) { listInitHead(&seq2); kargs->transA = clblasConjTrans; kargs->diag = clblasUnit; err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues, 1, &first_event, events, &seq2); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq2); } freeSolutionSeq(&seq2); } } freeSolutionSeq(&seq1); return (clblasStatus)err; //printf("doHemv called\n"); //return 0; }
clblasStatus doHer2( CLBlasKargs *kargs, clblasOrder order, clblasUplo uplo, size_t N, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueue, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events) { cl_int err; ListHead seq; clblasStatus retCode = clblasSuccess; if (!clblasInitialized) { return clblasNotInitialized; } #ifdef DEBUG_HER2 printf("doHer2 called\n"); #endif /* Validate arguments */ if ((retCode = checkMemObjects(A, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) { #ifdef DEBUG_HER2 printf("Invalid mem object..\n"); #endif return retCode; } if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, A, offa, lda, A_MAT_ERRSET))) { #ifdef DEBUG_HER2 printf("Invalid Size for A\n"); #endif return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET))) { #ifdef DEBUG_HER2 printf("Invalid Size for X\n"); #endif return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) { #ifdef DEBUG_HER2 printf("Invalid Size for Y\n"); #endif return retCode; } if ((commandQueue == NULL) || (numCommandQueues == 0)) { return clblasInvalidValue; } if ((numEventsInWaitList !=0) && (eventWaitList == NULL)) { return clblasInvalidEventWaitList; } kargs->order = order; if(order == clblasRowMajor) // Handling row-major. Invert X, Y and uplo { kargs->uplo = (uplo == clblasUpper) ? clblasLower : clblasUpper; kargs->B = Y; kargs->ldb.vector = incy; kargs->offBX = offy; kargs->C = X; kargs->ldc.vector = incx; kargs->offCY = offx; } else { kargs->uplo = uplo; kargs->B = X; kargs->ldb.vector = incx; kargs->offBX = offx; kargs->C = Y; kargs->ldc.vector = incy; kargs->offCY = offy; } kargs->N = N; kargs->A = A; kargs->lda.matrix = lda; kargs->offa = offa; kargs->offA = offa; #ifdef DEBUG_HER2 printf("Calling makeSolutionSeq : HER2\n"); #endif /* * Always use CommandQueue (0) * PENDING: * 1. No Multi-GPU / Multi-command queue support * 2. This can be optimized to use the commandQ with the higher * memmory bandwidth that supports the data-type and the LDA */ numCommandQueues = 1; listInitHead(&seq); err = makeSolutionSeq(CLBLAS_HER2, kargs, numCommandQueues, commandQueue, numEventsInWaitList, eventWaitList, events, &seq); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq); } freeSolutionSeq(&seq); return (clblasStatus)err; }
clblasStatus doGer( CLBlasKargs *kargs, clblasOrder order, size_t M, size_t N, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, int doConj, cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) { cl_int err; ListHead seq; clblasStatus retCode = clblasSuccess; if (!clblasInitialized) { return clblasNotInitialized; } /* Validate arguments */ if ((retCode = checkMemObjects(A, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) { #ifdef DEBUG_GER printf("Invalid mem object..\n"); #endif return retCode; } // Check wheather enough memory was allocated if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, M, N, A, offa, lda, A_MAT_ERRSET))) { #ifdef DEBUG_GER printf("Invalid Size for A %d\n",retCode ); #endif return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, M, X, offx, incx, X_VEC_ERRSET))) { #ifdef DEBUG_GER printf("Invalid Size for X\n"); #endif return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) { #ifdef DEBUG_GER printf("Invalid Size for Y\n"); #endif return retCode; } /////////////////////////////////////////////////////////////// if ((commandQueues == NULL) || (numCommandQueues == 0)) { return clblasInvalidValue; } /* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */ numCommandQueues = 1; if (commandQueues[0] == NULL) { return clblasInvalidCommandQueue; } if ((numEventsInWaitList !=0) && (eventWaitList == NULL)) { return clblasInvalidEventWaitList; } /* * ASSUMPTION: * doTRMV assumes "commandQueue" of 0. The same is reflected in * "makeSolutionSeq" as well. If either of them changes in future, * this code needs to be revisited. */ kargs->order = order; kargs->M = M; kargs->N = N; kargs->A = A; kargs->offa = offa; kargs->offA = offa; kargs->lda.matrix = lda; kargs->B = X; kargs->offBX = offx; kargs->ldb.vector = incx; // Will be using this as incx kargs->C = Y; kargs->offCY = offy; kargs->ldc.vector = incy; // Will be using this as incy kargs->offsetM = 0; kargs->offsetN = 0; kargs->scimage[0] = 0; kargs->scimage[1] = 0; kargs->K = (size_t)doConj; // Will be using K as doConj parameter #ifdef DEBUG_GER printf("Calling makeSolutionSeq from DoGer: GER\n"); #endif listInitHead(&seq); err = makeSolutionSeq(CLBLAS_GER, kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events, &seq); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq); } freeSolutionSeq(&seq); return (clblasStatus)err; }
static clblasStatus doSymv( CLBlasKargs *kargs, clblasOrder order, clblasUplo uplo, size_t N, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) { cl_int err; ListHead seq; clblasStatus retCode = clblasSuccess; #ifdef USE_SYMV ListHead seq2; ListNode *listNodePtr; cl_event first_event; #endif if (!clblasInitialized) { return clblasNotInitialized; } /* Validate arguments */ if ((retCode = checkMemObjects(A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) { return retCode; } if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, A, offA, lda, A_MAT_ERRSET ))) { return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, x, offx, incx, X_VEC_ERRSET ))) { return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, y, offy, incy, Y_VEC_ERRSET ))) { return retCode; } kargs->order = order; kargs->uplo = uplo; kargs->N = N; kargs->K = N; //store original N kargs->A = A; kargs->offA = offA; kargs->offa = offA; kargs->lda.matrix = lda; kargs->B = x; kargs->offBX = offx; kargs->ldb.vector = incx; kargs->C = y; kargs->offCY = offy; kargs->ldc.vector = incy; #ifndef USE_SYMV listInitHead(&seq); err = makeSolutionSeq(CLBLAS_SYMV, kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events, &seq); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq); } #else // version of SYMV using kprintf numCommandQueues = 1; listInitHead(&seq); kargs->transA = clblasNoTrans; kargs->diag = clblasNonUnit; err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, &first_event, &seq); if (err == CL_SUCCESS) { listInitHead(&seq2); kargs->transA = clblasTrans; kargs->diag = clblasUnit; err = makeSolutionSeq(CLBLAS_HEMV, kargs, numCommandQueues, commandQueues, 1, &first_event, events, &seq2); if (err == CL_SUCCESS) { // Adding node from seq2 to main seq listNodePtr = listNodeFirst(&seq2); listAddToTail(&seq, listNodePtr); err = executeSolutionSeq(&seq); // Executes both kernels in the seq one after other } } #endif freeSolutionSeq(&seq); return (clblasStatus)err; }
static clblasStatus doHpmv( CLBlasKargs *kargs, clblasOrder order, clblasUplo uplo, size_t N, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) { cl_int err; ListHead seq1, seq2; cl_event first_event; clblasStatus retCode = clblasSuccess; if (!clblasInitialized) { return clblasNotInitialized; } /* Validate arguments */ if ((retCode = checkMemObjects(AP, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) { return retCode; } if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, AP, offa, 0, A_MAT_ERRSET))) { return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, X, offx, incx, X_VEC_ERRSET))) { return retCode; } if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) { return retCode; } if ((commandQueues == NULL) || (numCommandQueues == 0)) { return clblasInvalidValue; } if ((numEventsInWaitList !=0) && (eventWaitList == NULL)) { return clblasInvalidEventWaitList; } numCommandQueues = 1; kargs->order = order; kargs->uplo = uplo; kargs->N = N; kargs->A = AP; kargs->offA = offa; kargs->offa = offa; kargs->lda.matrix = 0; // Set lda as zero for packed matrices kargs->B = X; kargs->offBX = offx; kargs->ldb.vector = incx; kargs->C = Y; kargs->offCY = offy; kargs->ldc.vector = incy; kargs->transA = clblasNoTrans; kargs->diag = clblasNonUnit; kargs->pigFuncID = CLBLAS_HPMV; listInitHead(&seq1); err = makeSolutionSeq(CLBLAS_TRMV, kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, &first_event, &seq1); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq1); if (err == CL_SUCCESS) { listInitHead(&seq2); kargs->transA = clblasConjTrans; kargs->diag = clblasUnit; err = makeSolutionSeq(CLBLAS_TRMV, kargs, numCommandQueues, commandQueues, 1, &first_event, events, &seq2); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq2); } freeSolutionSeq(&seq2); } } freeSolutionSeq(&seq1); return (clblasStatus)err; }
static clblasStatus doGemv( CLBlasKargs *kargs, clblasOrder order, clblasTranspose transA, size_t M, size_t N, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) { cl_int err; ListHead seq; size_t sizev; clblasStatus retCode = clblasSuccess; if (!clblasInitialized) { return clblasNotInitialized; } /* Validate arguments */ if ((retCode = checkMemObjects( A, x, y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET ))) { return retCode; } if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, M, N, A, offA, lda, A_MAT_ERRSET ))) { return retCode; } sizev = (transA == clblasNoTrans) ? N : M; if ((retCode = checkVectorSizes(kargs->dtype, sizev, x, offx, incx, X_VEC_ERRSET ))) { return retCode; } sizev = (transA == clblasNoTrans) ? M : N; if ((retCode = checkVectorSizes(kargs->dtype, sizev, y, offy, incy, Y_VEC_ERRSET))) { return retCode; } kargs->order = order; kargs->transA = transA; kargs->M = M; kargs->N = N; /* * store original height of the matrix A * FIXME: store it to a dedicated field */ kargs->K = (transA == clblasNoTrans) ? M : N; kargs->A = A; kargs->offA = offA; kargs->lda.matrix = lda; kargs->B = x; kargs->offBX = offx; kargs->ldb.vector = incx; kargs->C = y; kargs->offCY = offy; kargs->ldc.vector = incy; listInitHead(&seq); err = makeSolutionSeq(CLBLAS_GEMV, kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events, &seq); if (err == CL_SUCCESS) { err = executeSolutionSeq(&seq); } freeSolutionSeq(&seq); return (clblasStatus)err; }
static clblasStatus doGemm( CLBlasKargs *kargs, clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) { clblasStatus err; clblasStatus retCode = clblasSuccess; if (!clblasInitialized) { return clblasNotInitialized; } /* Validate arguments */ if ((retCode = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET))) { return retCode; } if (K != 0) { if ((retCode = checkMatrixSizes(kargs->dtype, order, transA, M, K, A, offA, lda, A_MAT_ERRSET))) { return retCode; } if ((retCode = checkMatrixSizes(kargs->dtype, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET))) { return retCode; } } if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, M, N, C, offC, ldc, C_MAT_ERRSET))) { return retCode; } numCommandQueues = 1; #ifdef DEBUG_2 printf("DoGemm being called...\n"); #endif kargs->pigFuncID = CLBLAS_GEMM2; kargs->order = order; kargs->transA = transA; kargs->transB = transB; kargs->M = M; kargs->N = N; kargs->K = K; kargs->A = A; kargs->offA = offA; kargs->offa = offA; kargs->lda.matrix = lda; kargs->B = B; kargs->offBX = offB; kargs->ldb.matrix = ldb; kargs->C = C; kargs->offCY = offC; kargs->ldc.matrix = ldc; kargs->offsetM = 0; kargs->offsetN = 0; kargs->scimage[0] = 0; kargs->scimage[1] = 0; err = executeGEMM(kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); return err; }
clblasStatus doHer2k( CLBlasKargs *kargs, clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events) { clblasStatus err; clblasUplo fUplo; clblasTranspose fTransA; cl_event firstHerkCall; clblasStatus retCode = clblasSuccess; if (!clblasInitialized) { return clblasNotInitialized; } if (numCommandQueues == 0 || commandQueues == NULL) { return clblasInvalidValue; } numCommandQueues = 1; if ((numEventsInWaitList !=0) && (eventWaitList == NULL)) { return clblasInvalidEventWaitList; } // Validate arguments if (retCode = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET )) { return retCode; } if (transA == clblasTrans) { return clblasInvalidValue; } if (retCode = checkMatrixSizes(kargs->dtype, order, transA, N, K, A, offa, lda, A_MAT_ERRSET )) { return retCode; } if (retCode = checkMatrixSizes(kargs->dtype, order, transA, N, K, B, offb, ldb, B_MAT_ERRSET )) { return retCode; } if (retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, N, N, C, offc, ldc, C_MAT_ERRSET )) { return retCode; } if ((numEventsInWaitList !=0) && (eventWaitList == NULL)) { return clblasInvalidEventWaitList; } fUplo = (order == clblasRowMajor) ? ((uplo == clblasLower) ? clblasUpper : clblasLower) : uplo; fTransA = (order == clblasRowMajor) ? ((transA == clblasNoTrans) ? clblasConjTrans : clblasNoTrans) : transA; kargs->order = (order == clblasRowMajor) ? clblasColumnMajor : order; kargs->transA = fTransA; kargs->transB = (fTransA == clblasNoTrans) ? clblasConjTrans : clblasNoTrans; kargs->uplo = fUplo; kargs->M = N; kargs->N = N; kargs->K = K; kargs->A = A; kargs->offA = offa; kargs->offa = offa; kargs->lda.matrix = lda; kargs->B = B; kargs->offBX = offb; kargs->ldb.matrix = ldb; kargs->C = C; kargs->offCY = offc; kargs->ldc.matrix = ldc; kargs->pigFuncID = CLBLAS_HERK; err = executeGEMM(kargs, numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, &firstHerkCall); if( err == CL_SUCCESS ) { kargs->A = B; kargs->offA = offb; kargs->offa = offb; kargs->lda.matrix = ldb; kargs->B = A; kargs->offBX = offa; kargs->ldb.matrix = lda; if( kargs->dtype == TYPE_COMPLEX_FLOAT ) { CIMAG( kargs->alpha.argFloatComplex ) *= -1.0; CREAL( kargs->beta.argFloatComplex ) = 1.0; CIMAG( kargs->beta.argFloatComplex ) = 0.0; } else { CIMAG( kargs->alpha.argDoubleComplex ) *= -1.0; CREAL( kargs->beta.argDoubleComplex ) = 1.0; CIMAG( kargs->beta.argDoubleComplex ) = 0.0; } err = executeGEMM(kargs, numCommandQueues, commandQueues, 1, &firstHerkCall, events); } return (clblasStatus)err; }