void Mouse::setLuggage(uint32 resId, uint32 rate) { _currentLuggageId = resId; _frame = 0; _activeFrame = -1; createPointer(_currentPtrId, resId); }
Domain * Constructors::createGetElementPtr(const llvm::ConstantExpr &value, const std::vector<const Domain*> &operands, const llvm::Value &place) const { std::vector<Domain*> offsets; std::vector<const Domain*>::const_iterator it = operands.begin() + 1, itend = operands.end(); for (; it != itend; ++it) { unsigned bitWidth = Integer::Utils::getBitWidth(**it); CANAL_ASSERT_MSG(bitWidth <= 64, "Cannot handle GetElementPtr offset" " with more than 64 bits."); if (bitWidth < 64) { Domain *offset = createInteger(64); offset->zext(**it); offsets.push_back(offset); } else offsets.push_back((*it)->clone()); } const llvm::PointerType &pointerType = checkedCast<const llvm::PointerType>(*value.getType()); // GetElementPtr on a Pointer const Pointer::Pointer *pointer = dynCast<Pointer::Pointer>(*operands.begin()); if (pointer) { return pointer->getElementPtr(offsets, pointerType, *this); } else { // GetElementPtr on anything except a pointer. For example, it is // called on arrays and structures. Domain *result = createPointer(pointerType); Pointer::Utils::addTarget(*result, Pointer::Target::Block, &place, *value.op_begin(), offsets, NULL); return result; } }
void Mouse::setPointer(uint32 resId, uint32 rate) { _currentPtrId = resId; _frame = 0; createPointer(resId, _currentLuggageId); if ((resId == 0) || (!(Logic::_scriptVars[MOUSE_STATUS] & 1) && (!_mouseOverride))) { CursorMan.showMouse(false); } else { animate(); CursorMan.showMouse(true); } }
Domain * Constructors::create(const llvm::Type &type) const { CANAL_ASSERT_MSG(!type.isVoidTy(), "Cannot create value of type Void."); if (type.isIntegerTy()) { llvm::IntegerType &integerType = checkedCast<llvm::IntegerType>(type); return createInteger(integerType.getBitWidth()); } if (type.isFloatingPointTy()) { const llvm::fltSemantics &semantics = Float::Utils::getSemantics(type); return createFloat(semantics); } if (type.isPointerTy()) { const llvm::PointerType &pointerType = checkedCast<llvm::PointerType>(type); return createPointer(pointerType); } if (type.isArrayTy() || type.isVectorTy()) { const llvm::SequentialType &stype = checkedCast<llvm::SequentialType>(type); return createArray(stype); } if (type.isStructTy()) { const llvm::StructType &structType = checkedCast<llvm::StructType>(type); std::vector<Domain*> members; for (unsigned i = 0; i < structType.getNumElements(); i ++) members.push_back(create(*structType.getElementType(i))); return createStructure(structType, members); } CANAL_DIE_MSG("Unsupported llvm::Type::TypeID: " << type.getTypeID()); }
void Mouse::initialize() { _numObjs = 0; Logic::_scriptVars[MOUSE_STATUS] = 0; // mouse off and unlocked _getOff = 0; _inTopMenu = false; _lastState = 0; _mouseOverride = false; _currentPtrId = _currentLuggageId = 0; for (uint8 cnt = 0; cnt < 17; cnt++) // force res manager to keep mouse _resMan->resOpen(MSE_POINTER + cnt); // cursors in memory all the time CursorMan.showMouse(false); createPointer(0, 0); }
Domain * Constructors::createBitCast(const llvm::ConstantExpr &value, const std::vector<const Domain*> &operands, const llvm::Value &place) const { // BitCast from Pointer. It is always a bitcast to some other // pointer. const Pointer::Pointer *pointer = dynCast<Pointer::Pointer>(*operands.begin()); const llvm::PointerType *pointerType = checkedCast<llvm::PointerType>(value.getType()); if (pointer) { CANAL_ASSERT(pointerType); return pointer->bitcast(*pointerType); } // BitCast from anything to a pointer. if (pointerType) { Domain *result; result = createPointer(*pointerType); Pointer::Utils::addTarget(*result, Pointer::Target::Block, &place, *value.op_begin(), std::vector<Domain*>(), NULL); return result; } // BitCast from non-pointer to another non-pointer. CANAL_NOT_IMPLEMENTED(); }
int sci_umf_lufact(char* fname, void* pvApiCtx) { SciErr sciErr; int stat = 0; SciSparse AA; CcsSparse A; int mA = 0; // rows int nA = 0; // cols int iNbItem = 0; int* piNbItemRow = NULL; int* piColPos = NULL; double* pdblSpReal = NULL; double* pdblSpImg = NULL; /* umfpack stuff */ double* Control = NULL; double* Info = NULL; void* Symbolic = NULL; void* Numeric = NULL; int* piAddr1 = NULL; int iComplex = 0; int iType1 = 0; /* Check numbers of input/output arguments */ CheckInputArgument(pvApiCtx, 1, 1); CheckOutputArgument(pvApiCtx, 1, 1); /* get A the sparse matrix to factorize */ sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr1); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } /* check if the first argument is a sparse matrix */ sciErr = getVarType(pvApiCtx, piAddr1, &iType1); if (sciErr.iErr || iType1 != sci_sparse) { printError(&sciErr, 0); Scierror(999, _("%s: Wrong type for input argument #%d: A sparse matrix expected.\n"), fname, 1); return 1; } if (isVarComplex(pvApiCtx, piAddr1)) { iComplex = 1; sciErr = getComplexSparseMatrix(pvApiCtx, piAddr1, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal, &pdblSpImg); } else { sciErr = getSparseMatrix(pvApiCtx, piAddr1, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal); } if (sciErr.iErr) { FREE(piNbItemRow); FREE(piColPos); FREE(pdblSpReal); if (pdblSpImg) { FREE(pdblSpImg); } printError(&sciErr, 0); return 1; } // fill struct sparse AA.m = mA; AA.n = nA; AA.it = iComplex; AA.nel = iNbItem; AA.mnel = piNbItemRow; AA.icol = piColPos; AA.R = pdblSpReal; AA.I = pdblSpImg; if (nA <= 0 || mA <= 0) { FREE(piNbItemRow); FREE(piColPos); FREE(pdblSpReal); if (pdblSpImg) { FREE(pdblSpImg); } Scierror(999, _("%s: Wrong size for input argument #%d.\n"), fname, 1); return 1; } SciSparseToCcsSparse(&AA, &A); FREE(piNbItemRow); FREE(piColPos); FREE(pdblSpReal); if (pdblSpImg) { FREE(pdblSpImg); } /* symbolic factorization */ if (A.it == 1) { stat = umfpack_zi_symbolic(nA, mA, A.p, A.irow, A.R, A.I, &Symbolic, Control, Info); } else { stat = umfpack_di_symbolic(nA, mA, A.p, A.irow, A.R, &Symbolic, Control, Info); } if (stat != UMFPACK_OK) { freeCcsSparse(A); Scierror(999, _("%s: An error occurred: %s: %s\n"), fname, _("symbolic factorization"), UmfErrorMes(stat)); return 1; } /* numeric factorization */ if (A.it == 1) { stat = umfpack_zi_numeric(A.p, A.irow, A.R, A.I, Symbolic, &Numeric, Control, Info); } else { stat = umfpack_di_numeric(A.p, A.irow, A.R, Symbolic, &Numeric, Control, Info); } if (A.it == 1) { umfpack_zi_free_symbolic(&Symbolic); } else { umfpack_di_free_symbolic(&Symbolic); } if ( stat != UMFPACK_OK && stat != UMFPACK_WARNING_singular_matrix ) { freeCcsSparse(A); Scierror(999, _("%s: An error occurred: %s: %s\n"), fname, _("symbolic factorization"), UmfErrorMes(stat)); return 1; } if ( stat == UMFPACK_WARNING_singular_matrix && mA == nA ) { if (getWarningMode()) { Sciwarning("\n%s:%s\n", _("Warning"), _("The (square) matrix appears to be singular.")); } } /* add the pointer in the list ListNumeric */ if (! AddAdrToList(Numeric, A.it, &ListNumeric)) { /* AddAdrToList return 0 if malloc have failed : as it is just for storing 2 pointers this is unlikely to occurs but ... */ if (A.it == 1) { umfpack_zi_free_numeric(&Numeric); } else { umfpack_di_free_numeric(&Numeric); } freeCcsSparse(A); Scierror(999, _("%s: An error occurred: %s\n"), fname, _("no place to store the LU pointer in ListNumeric.")); return 1; } freeCcsSparse(A); /* create the scilab object to store the pointer onto the LU factors */ sciErr = createPointer(pvApiCtx, 2, Numeric); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } /* return the pointer */ AssignOutputVariable(pvApiCtx, 1) = 2; ReturnArguments(pvApiCtx); return 0; }
int sci_gpuDotMult(char *fname) { CheckRhs(2, 2); CheckLhs(1, 1); SciErr sciErr; int* piAddr_A = NULL; int* piAddr_B = NULL; GpuPointer* gpuPtrA = NULL; GpuPointer* gpuPtrB = NULL; GpuPointer* gpuPtrC = NULL; double* h = NULL; double* hi = NULL; int rows = 0; int cols = 0; void* pvPtrA = NULL; void* pvPtrB = NULL; int inputType_A; int inputType_B; try { if (!isGpuInit()) { throw "gpu is not initialised. Please launch gpuInit() before use this function."; } sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A); if (sciErr.iErr) { throw sciErr; } sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_B); if (sciErr.iErr) { throw sciErr; } /* ---- Check type of arguments and get data ---- */ /* */ /* Pointer to host / Pointer to device */ /* Matrix real / Matrix complex */ /* */ /* ---------------------------------------------- */ sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A); if (sciErr.iErr) { throw sciErr; } sciErr = getVarType(pvApiCtx, piAddr_B, &inputType_B); if (sciErr.iErr) { throw sciErr; } if (inputType_A == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtrA); if (sciErr.iErr) { throw sciErr; } gpuPtrA = (GpuPointer*)pvPtrA; if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrA)) { throw "gpuDotMult : Bad type for input argument #1: Variables created with GPU functions expected."; } if (useCuda() && gpuPtrA->getGpuType() != GpuPointer::CudaType) { throw "gpuDotMult : Bad type for input argument #1: A Cuda pointer expected."; } if (useCuda() == false && gpuPtrA->getGpuType() != GpuPointer::OpenCLType) { throw "gpuDotMult : Bad type for input argument #1: A OpenCL pointer expected."; } } else if (inputType_A == sci_matrix) { if (isVarComplex(pvApiCtx, piAddr_A)) { sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h, &hi); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, hi, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuDotMult: not implemented with OpenCL."; } #endif } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuDotMult: not implemented with OpenCL."; } #endif } } else { throw "gpuDotMult : Bad type for input argument #1: A GPU or CPU matrix expected."; } if (inputType_B == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_B, (void**)&pvPtrB); if (sciErr.iErr) { throw sciErr; } gpuPtrB = (GpuPointer*)pvPtrB; if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrB)) { throw "gpuDotMult : Bad type for input argument #2: Variables created with GPU functions expected."; } if (useCuda() && gpuPtrB->getGpuType() != GpuPointer::CudaType) { throw "gpuDotMult : Bad type for input argument #2: A Cuda pointer expected."; } if (useCuda() == false && gpuPtrB->getGpuType() != GpuPointer::OpenCLType) { throw "gpuDotMult : Bad type for input argument #2: A OpenCL pointer expected."; } } else if (inputType_B == sci_matrix) { if (isVarComplex(pvApiCtx, piAddr_B)) { sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_B, &rows, &cols, &h, &hi); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrB = new PointerCuda(h, hi, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuDotMult: not implemented with OpenCL."; } #endif } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_B, &rows, &cols, &h); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrB = new PointerCuda(h, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuDotMult: not implemented with OpenCL."; } #endif } } else { throw "gpuDotMult : Bad type for input argument #2: A GPU or CPU matrix expected."; } //performe operation. if (gpuPtrA->getSize() == 1 || gpuPtrB->getSize() == 1) { gpuPtrC = *gpuPtrA * *gpuPtrB; } else if (gpuPtrA->getRows() == gpuPtrB->getRows() && gpuPtrA->getCols() == gpuPtrB->getCols()) { #ifdef WITH_CUDA if (useCuda()) { gpuPtrC = cudaDotMult(dynamic_cast<PointerCuda*>(gpuPtrA), dynamic_cast<PointerCuda*>(gpuPtrB)); } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuDotMult: not implemented with OpenCL."; } #endif } else { throw "gpuDotMult : Bad size for inputs arguments: Same sizes expected."; } // Keep the result on the Device. PointerManager::getInstance()->addGpuPointerInManager(gpuPtrC); sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpuPtrC); if (sciErr.iErr) { throw sciErr; } LhsVar(1) = Rhs + 1; if (inputType_A == sci_matrix && gpuPtrA != NULL) { delete gpuPtrA; } if (inputType_B == sci_matrix && gpuPtrB != NULL) { delete gpuPtrB; } PutLhsVar(); return 0; } catch (const char* str) { Scierror(999, "%s\n", str); } catch (SciErr E) { printError(&E, 0); } if (inputType_A == sci_matrix && gpuPtrA != NULL) { delete gpuPtrA; } if (inputType_B == sci_matrix && gpuPtrB != NULL) { delete gpuPtrB; } if (gpuPtrC != NULL) { delete gpuPtrC; } return EXIT_FAILURE; }
int sci_taucs_chfact(char* fname, void* pvApiCtx) { SciErr sciErr; int stat = 0; int* perm = NULL; int* invperm = NULL; taucs_ccs_matrix *PAPT; taucs_ccs_matrix B; void *C = NULL; taucs_handle_factors *pC; SciSparse A; int mA = 0; // rows int nA = 0; // cols int iNbItem = 0; int* piNbItemRow = NULL; int* piColPos = NULL; double* pdblSpReal = NULL; double* pdblSpImg = NULL; int iComplex = 0; int* piAddr1 = NULL; /* Check numbers of input/output arguments */ CheckInputArgument(pvApiCtx, 1, 1); CheckOutputArgument(pvApiCtx, 1, 1); /* get A the sparse matrix to factorize */ sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr1); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } if (isVarComplex(pvApiCtx, piAddr1)) { iComplex = 1; sciErr = getComplexSparseMatrix(pvApiCtx, piAddr1, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal, &pdblSpImg); } else { sciErr = getSparseMatrix(pvApiCtx, piAddr1, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal); } if (sciErr.iErr) { printError(&sciErr, 0); return 1; } // fill struct sparse A.m = mA; A.n = nA; A.it = iComplex; A.nel = iNbItem; A.mnel = piNbItemRow; A.icol = piColPos; A.R = pdblSpReal; A.I = pdblSpImg; stat = spd_sci_sparse_to_taucs_sparse(&A, &B); if ( stat != A_PRIORI_OK ) { if ( stat == MAT_IS_NOT_SPD ) { freeTaucsSparse(B); Scierror(999, _("%s: Wrong value for input argument #%d: Must be symmetric positive definite matrix."), fname, 1); } /* the message for the other problem (not enough memory in stk) is treated automaticaly */ return 1; } /* find the permutation */ taucs_ccs_genmmd(&B, &perm, &invperm); if ( !perm ) { freeTaucsSparse(B); Scierror(999, _("%s: No more memory.\n") , fname); return 1; } /* apply permutation */ PAPT = taucs_ccs_permute_symmetrically(&B, perm, invperm); FREE(invperm); freeTaucsSparse(B); /* factor */ C = taucs_ccs_factor_llt_mf(PAPT); taucs_ccs_free(PAPT); if (C == NULL) { /* Note : an error indicator is given in the main scilab window * (out of memory, no positive definite matrix , etc ...) */ Scierror(999, _("%s: An error occurred: %s\n"), fname, _("factorization")); return 1; } /* put in an handle (Chol fact + perm + size) */ pC = (taucs_handle_factors*)MALLOC( sizeof(taucs_handle_factors) ); pC->p = perm; pC->C = C; pC->n = A.n; /* add in the list of Chol Factors */ AddAdrToList((Adr) pC, 0, &ListCholFactors); /* FIXME add a test here .. */ /* create the scilab object to store the pointer onto the Chol handle */ sciErr = createPointer(pvApiCtx, 2, (void *)pC); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } /* return the pointer */ AssignOutputVariable(pvApiCtx, 1) = 2; ReturnArguments(pvApiCtx); return 0; }
Domain * Constructors::create(const llvm::Constant &value, const llvm::Value &place, const State *state) const { if (llvm::isa<llvm::UndefValue>(value)) return create(*value.getType()); if (llvm::isa<llvm::ConstantInt>(value)) { const llvm::ConstantInt &intValue = checkedCast<llvm::ConstantInt>(value); const llvm::APInt &i = intValue.getValue(); return createInteger(i); } if (llvm::isa<llvm::ConstantPointerNull>(value)) { const llvm::ConstantPointerNull &nullValue = checkedCast<llvm::ConstantPointerNull>(value); const llvm::PointerType &pointerType = *nullValue.getType(); Domain *constPointer = createPointer(pointerType); constPointer->setZero(&place); return constPointer; } if (llvm::isa<llvm::ConstantExpr>(value)) { const llvm::ConstantExpr &exprValue = checkedCast<llvm::ConstantExpr>(value); return createConstantExpr(exprValue, place, state); } if (llvm::isa<llvm::ConstantFP>(value)) { const llvm::ConstantFP &fp = checkedCast<llvm::ConstantFP>(value); const llvm::APFloat &number = fp.getValueAPF(); return createFloat(number); } if (llvm::isa<llvm::ConstantStruct>(value)) { const llvm::ConstantStruct &structValue = checkedCast<llvm::ConstantStruct>(value); uint64_t elementCount = structValue.getType()->getNumElements(); std::vector<Domain*> members; for (uint64_t i = 0; i < elementCount; ++i) { members.push_back(create(*structValue.getOperand(i), *structValue.getOperand(i), state)); } return createStructure(*structValue.getType(), members); } if (llvm::isa<llvm::ConstantVector>(value)) { const llvm::ConstantVector &vectorValue = checkedCast<llvm::ConstantVector>(value); // VectorType::getNumElements returns unsigned int. unsigned elementCount = vectorValue.getType()->getNumElements(); std::vector<Domain*> values; for (unsigned i = 0; i < elementCount; ++i) { values.push_back(create(*vectorValue.getOperand(i), *vectorValue.getOperand(i), state)); } return createArray(*vectorValue.getType(), values); } if (llvm::isa<llvm::ConstantArray>(value)) { const llvm::ConstantArray &arrayValue = checkedCast<llvm::ConstantArray>(value); // ArrayType::getNumElements returns uint64_t. uint64_t elementCount = arrayValue.getType()->getNumElements(); std::vector<Domain*> values; for (uint64_t i = 0; i < elementCount; ++i) { values.push_back(create(*arrayValue.getOperand(i), *arrayValue.getOperand(i), state)); } return createArray(*arrayValue.getType(), values); } #if (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 1) || LLVM_VERSION_MAJOR > 3 // llvm::isa<llvm::ConstantDataSequential> returns false for an // llvm::ConstantDataArray/Vector instance at least on on LLVM // 3.1. if (llvm::isa<llvm::ConstantDataVector>(value) || llvm::isa<llvm::ConstantDataArray>(value)) { const llvm::ConstantDataSequential &sequentialValue = checkedCast<llvm::ConstantDataSequential>(value); unsigned elementCount = sequentialValue.getNumElements(); std::vector<Domain*> values; for (unsigned i = 0; i < elementCount; ++i) { values.push_back(create(*sequentialValue.getElementAsConstant(i), place, state)); } return createArray(*sequentialValue.getType(), values); } #endif if (llvm::isa<llvm::ConstantAggregateZero>(value)) { const llvm::Type *type = value.getType(); Domain *result = Constructors::create(*type); result->setZero(&place); return result; } if (llvm::isa<llvm::Function>(value)) { const llvm::Function &functionValue = checkedCast<llvm::Function>(value); Domain *constPointer; constPointer = createPointer(*llvm::PointerType::getUnqual( functionValue.getFunctionType())); Pointer::Utils::addTarget(*constPointer, Pointer::Target::Function, &place, &value, std::vector<Domain*>(), NULL); return constPointer; } CANAL_NOT_IMPLEMENTED(); }
/* ========================================================================== */ int sci_gpuLU(char *fname) { CheckRhs(1,2); CheckLhs(2,2); #ifdef WITH_CUDA cublasStatus status; #endif SciErr sciErr; int* piAddr_A = NULL; double* h_A = NULL; double* hi_A = NULL; int rows_A; int cols_A; int* piAddr_Opt = NULL; double* option = NULL; int rows_Opt; int cols_Opt; void* d_A = NULL; int na; void* pvPtr = NULL; int size_A = sizeof(double); bool bComplex_A = FALSE; int inputType_A; int inputType_Opt; double res; int posOutput = 1; try { sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A); if(sciErr.iErr) throw sciErr; if(Rhs == 2) { sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_Opt); if(sciErr.iErr) throw sciErr; sciErr = getVarType(pvApiCtx, piAddr_Opt, &inputType_Opt); if(sciErr.iErr) throw sciErr; if(inputType_Opt == sci_matrix) { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_Opt, &rows_Opt, &cols_Opt, &option); if(sciErr.iErr) throw sciErr; } else throw "Option syntax is [number,number]."; } else { rows_Opt=1; cols_Opt=2; option = (double*)malloc(2*sizeof(double)); option[0]=0; option[1]=0; } if(rows_Opt != 1 || cols_Opt != 2) throw "Option syntax is [number,number]."; if((int)option[1] == 1 && !isGpuInit()) throw "gpu is not initialised. Please launch gpuInit() before use this function."; sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A); if(sciErr.iErr) throw sciErr; #ifdef WITH_CUDA if (useCuda()) { if(inputType_A == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtr); if(sciErr.iErr) throw sciErr; gpuMat_CUDA* gmat; gmat = static_cast<gpuMat_CUDA*>(pvPtr); if(!gmat->useCuda) throw "Please switch to OpenCL mode before use this data."; rows_A=gmat->rows; cols_A=gmat->columns; if(gmat->complex) { bComplex_A = TRUE; size_A = sizeof(cuDoubleComplex); d_A=(cuDoubleComplex*)gmat->ptr->get_ptr(); } else d_A=(double*)gmat->ptr->get_ptr(); // Initialize CUBLAS status = cublasInit(); if (status != CUBLAS_STATUS_SUCCESS) throw status; na = rows_A * cols_A; } else if(inputType_A == 1) { // Get size and data if(isVarComplex(pvApiCtx, piAddr_A)) { sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows_A, &cols_A, &h_A, &hi_A); if(sciErr.iErr) throw sciErr; size_A = sizeof(cuDoubleComplex); bComplex_A = TRUE; } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows_A, &cols_A, &h_A); if(sciErr.iErr) throw sciErr; } na = rows_A * cols_A; // Initialize CUBLAS status = cublasInit(); if (status != CUBLAS_STATUS_SUCCESS) throw status; // Allocate device memory status = cublasAlloc(na, size_A, (void**)&d_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; // Initialize the device matrices with the host matrices if(!bComplex_A) { status = cublasSetMatrix(rows_A,cols_A, sizeof(double), h_A, rows_A, (double*)d_A, rows_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; } else writecucomplex(h_A, hi_A, rows_A, cols_A, (cuDoubleComplex *)d_A); } else throw "Bad argument type."; cuDoubleComplex resComplex; // Performs operation if(!bComplex_A) status = decomposeBlockedLU(rows_A, cols_A, rows_A, (double*)d_A, 1); // else // resComplex = cublasZtrsm(na,(cuDoubleComplex*)d_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; // Put the result in scilab switch((int)option[0]) { case 2 : case 1 : sciprint("The first option must be 0 for this function. Considered as 0.\n"); case 0 : // Keep the result on the Host. { // Put the result in scilab if(!bComplex_A) { double* h_res = NULL; sciErr=allocMatrixOfDouble(pvApiCtx, Rhs + posOutput, rows_A, cols_A, &h_res); if(sciErr.iErr) throw sciErr; status = cublasGetMatrix(rows_A,cols_A, sizeof(double), (double*)d_A, rows_A, h_res, rows_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; } else { sciErr = createComplexMatrixOfDouble(pvApiCtx, Rhs + posOutput, 1, 1, &resComplex.x,&resComplex.y); if(sciErr.iErr) throw sciErr; } LhsVar(posOutput)=Rhs+posOutput; posOutput++; break; } default : throw "First option argument must be 0 or 1 or 2."; } switch((int)option[1]) { case 0 : // Don't keep the data input on Device. { if(inputType_A == sci_matrix) { status = cublasFree(d_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; d_A = NULL; } break; } case 1 : // Keep data of the fisrt argument on Device and return the Device pointer. { if(inputType_A == sci_matrix) { gpuMat_CUDA* dptr; gpuMat_CUDA tmp={getCudaContext()->genMatrix<double>(getCudaQueue(),rows_A*cols_A),rows_A,cols_A}; dptr=new gpuMat_CUDA(tmp); dptr->useCuda = true; dptr->ptr->set_ptr((double*)d_A); if(bComplex_A) dptr->complex=TRUE; else dptr->complex=FALSE; sciErr = createPointer(pvApiCtx,Rhs+posOutput, (void*)dptr); if(sciErr.iErr) throw sciErr; LhsVar(posOutput)=Rhs+posOutput; } else throw "The first input argument is already a GPU variable."; posOutput++; break; } default : throw "Second option argument must be 0 or 1."; } // Shutdown status = cublasShutdown(); if (status != CUBLAS_STATUS_SUCCESS) throw status; } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "not implemented with OpenCL."; } #endif if(Rhs == 1) { free(option); option = NULL; } if(posOutput < Lhs+1) throw "Too many output arguments."; if(posOutput > Lhs+1) throw "Too few output arguments."; PutLhsVar(); return 0; } catch(const char* str) { Scierror(999,"%s\n",str); } catch(SciErr E) { printError(&E, 0); } #ifdef WITH_CUDA catch(cudaError_t cudaE) { GpuError::treat_error<CUDAmode>((CUDAmode::Status)cudaE); } catch(cublasStatus CublasE) { GpuError::treat_error<CUDAmode>((CUDAmode::Status)CublasE,1); } if (useCuda()) { if(inputType_A == 1 && d_A != NULL) cudaFree(d_A); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999,"not implemented with OpenCL.\n"); } #endif if(Rhs == 1 && option != NULL) free(option); return EXIT_FAILURE; }
int sci_gpuOnes(char *fname) { CheckLhs(1, 1); void* pvPtr = NULL; int* piAddr = NULL; SciErr sciErr; int inputType; int iRows = 0; int iCols = 0; GpuPointer* gpOut = NULL; try { if (!isGpuInit()) { throw "gpu is not initialised. Please launch gpuInit() before use this function."; } if (Rhs == 1) { sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr); if (sciErr.iErr) { throw sciErr; } sciErr = getVarType(pvApiCtx, piAddr, &inputType); if (inputType == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr, (void**)&pvPtr); if (sciErr.iErr) { throw sciErr; } GpuPointer* gmat = (GpuPointer*)(pvPtr); if (!PointerManager::getInstance()->findGpuPointerInManager(gmat)) { throw "gpuOnes : Bad type for input argument #1. Only variables created with GPU functions allowed."; } if (useCuda() && gmat->getGpuType() != GpuPointer::CudaType) { throw "gpuOnes : Bad type for input argument #1: A Cuda pointer expected."; } if (useCuda() == false && gmat->getGpuType() != GpuPointer::OpenCLType) { throw "gpuOnes : Bad type for input argument #1: A OpenCL pointer expected."; } if (gmat->getDims() > 2) { throw "gpuOnes : Hypermatrix not yet implemented."; } iRows = gmat->getRows(); iCols = gmat->getCols(); } else if (inputType == sci_matrix) { // Get size and data double* h; sciErr = getMatrixOfDouble(pvApiCtx, piAddr, &iRows, &iCols, &h); } else { throw "gpuOnes : Bad type for input argument #1 : A Matrix or GPU pointer expected."; } } else { if (Rhs > 2) { throw "gpuOnes : Hypermatrix not yet implemented."; } int* piDimsArray = new int[Rhs]; for (int i = 0; i < Rhs; i++) { sciErr = getVarAddressFromPosition(pvApiCtx, i + 1, &piAddr); if (sciErr.iErr) { throw sciErr; } sciErr = getVarType(pvApiCtx, piAddr, &inputType); if (inputType != sci_matrix) { throw "gpuOnes : Bad type for input argument #%d : A Matrix expected."; } double* h; sciErr = getMatrixOfDouble(pvApiCtx, piAddr, &iRows, &iCols, &h); if (iRows * iCols != 1) { char str[100]; sprintf(str, "gpuOnes : Wrong size for input argument #%d : A scalar expected.", i + 1); throw str; } piDimsArray[i] = (int)h[0]; } iRows = piDimsArray[0]; iCols = piDimsArray[1]; delete piDimsArray; } #ifdef WITH_CUDA if (useCuda()) { gpOut = new PointerCuda(iRows, iCols, false); gpOut->initMatrix(1); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuOnes: not implemented with OpenCL.\n"); } #endif PointerManager::getInstance()->addGpuPointerInManager(gpOut); sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpOut); if (sciErr.iErr) { throw sciErr; } LhsVar(1) = Rhs + 1; PutLhsVar(); return 0; } #ifdef WITH_CUDA catch (cudaError_t cudaE) { GpuError::treat_error<CUDAmode>((CUDAmode::Status)cudaE); } #endif catch (const char* str) { Scierror(999, "%s\n", str); } catch (SciErr E) { printError(&E, 0); } return EXIT_FAILURE; }
int sci_gpuMatrix(char *fname) { CheckRhs(2, 3); CheckLhs(1, 1); SciErr sciErr; int* piAddr_A = NULL; int inputType_A = 0; int* piAddr_R = NULL; int inputType_R = 0; int* piAddr_C = NULL; int inputType_C = 0; int rows = 0; int cols = 0; int newRows = 0; int newCols = 0; void* pvPtr = NULL; GpuPointer* gpuPtrA = NULL; try { if (!isGpuInit()) { throw "gpu is not initialised. Please launch gpuInit() before use this function."; } //--- Get input matrix --- sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A); if (sciErr.iErr) { throw sciErr; } // Get size and data sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A); if (sciErr.iErr) { throw sciErr; } //--- Get new Rows size or vector of sizes--- sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_R); if (sciErr.iErr) { throw sciErr; } // Get size and data sciErr = getVarType(pvApiCtx, piAddr_R, &inputType_R); if (sciErr.iErr) { throw sciErr; } if (inputType_R != sci_matrix) { throw "gpuMatrix : Bad type for input argument #2: A real scalar or row vector expected."; } if (isVarComplex(pvApiCtx, piAddr_A)) { throw "gpuMatrix : Bad type for input argument #2: A real scalar or row vector expected."; } else { double* dRows = NULL; sciErr = getMatrixOfDouble(pvApiCtx, piAddr_R, &rows, &cols, &dRows); if (sciErr.iErr) { throw sciErr; } if (nbInputArgument(pvApiCtx) == 2) { if (rows != 1 || cols != 2) { throw "gpuMatrix : Bad size for input argument #2: A row vector of size two expected."; } newRows = (int)dRows[0]; newCols = (int)dRows[1]; if (newCols < -1 || newCols == 0) { throw "gpuMatrix : Wrong value for input argument #3: -1 or positive value expected."; } } else { newRows = (int)(*dRows); } if (newRows < -1 || newRows == 0) { throw "gpuMatrix : Wrong value for input argument #2: -1 or positive value expected."; } } if (nbInputArgument(pvApiCtx) == 3) { //--- Get new Cols size--- sciErr = getVarAddressFromPosition(pvApiCtx, 3, &piAddr_C); if (sciErr.iErr) { throw sciErr; } // Get size and data sciErr = getVarType(pvApiCtx, piAddr_C, &inputType_C); if (sciErr.iErr) { throw sciErr; } if (inputType_C != sci_matrix) { throw "gpuMatrix : Bad type for input argument #3: A real scalar expected."; } if (isVarComplex(pvApiCtx, piAddr_A)) { throw "gpuMatrix : Bad type for input argument #3: A real scalar expected."; } else { double* dCols = NULL; sciErr = getMatrixOfDouble(pvApiCtx, piAddr_C, &rows, &cols, &dCols); if (sciErr.iErr) { throw sciErr; } newCols = (int)(*dCols); if (newCols < -1 || newCols == 0) { throw "gpuMatrix : Wrong value for input argument #3: -1 or positive value expected."; } } } if (inputType_A == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtr); if (sciErr.iErr) { throw sciErr; } gpuPtrA = (GpuPointer*)pvPtr; if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrA)) { throw "gpuMatrix : Bad type for input argument #1: Variables created with GPU functions expected."; } if (useCuda() && gpuPtrA->getGpuType() != GpuPointer::CudaType) { throw "gpuMatrix : Bad type for input argument #1: A Cuda pointer expected."; } if (useCuda() == false && gpuPtrA->getGpuType() != GpuPointer::OpenCLType) { throw "gpuMatrix : Bad type for input argument #1: A OpenCL pointer expected."; } rows = gpuPtrA->getRows(); cols = gpuPtrA->getCols(); } else if (inputType_A == sci_matrix) { double* h = NULL; if (isVarComplex(pvApiCtx, piAddr_A)) { double* hi = NULL; sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h, &hi); #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, hi, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuMatrix: not implemented with OpenCL.\n"); } #endif } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h); #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuMatrix: not implemented with OpenCL.\n"); } #endif } if (sciErr.iErr) { throw sciErr; } } else { throw "gpuMatrix : Bad type for input argument #1: A GPU or CPU matrix expected."; } if (newRows == -1 && newCols != -1) { newRows = rows * cols / newCols; } else if (newRows != -1 && newCols == -1) { newCols = rows * cols / newRows; } if (rows * cols != newRows * newCols) { throw "gpuMatrix : Wrong value for input arguments #2 and 3: Correct size expected."; } #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuMatrix: not implemented with OpenCL.\n"); } #endif GpuPointer* gpuOut = gpuPtrA->clone(); gpuOut->setRows(newRows); gpuOut->setCols(newCols); // Put the result in scilab PointerManager::getInstance()->addGpuPointerInManager(gpuOut); sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpuOut); LhsVar(1) = Rhs + 1; if (inputType_A == 1 && gpuPtrA != NULL) { delete gpuPtrA; } PutLhsVar(); return 0; } catch (const char* str) { Scierror(999, "%s\n", str); } catch (SciErr E) { printError(&E, 0); } if (inputType_A == 1 && gpuPtrA != NULL) { delete gpuPtrA; } return EXIT_FAILURE; }