inline const char* PropertyAliases::getPropertyName(EnumValue prop, UPropertyNameChoice choice) const { NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset); return chooseNameInGroup(e2n->getOffset(prop), choice); }
Point* Ant::getLocation() const { return getPointer(); }
const ValueMap* PropertyAliases::getValueMap(EnumValue prop) const { NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset); Offset a = e2o->getOffset(prop); return (const ValueMap*) (a ? getPointerNull(a) : NULL); }
/* ========================================================================== */ int sci_gpuLU(char *fname) { CheckRhs(1,2); CheckLhs(2,2); #ifdef WITH_CUDA cublasStatus status; #endif SciErr sciErr; int* piAddr_A = NULL; double* h_A = NULL; double* hi_A = NULL; int rows_A; int cols_A; int* piAddr_Opt = NULL; double* option = NULL; int rows_Opt; int cols_Opt; void* d_A = NULL; int na; void* pvPtr = NULL; int size_A = sizeof(double); bool bComplex_A = FALSE; int inputType_A; int inputType_Opt; double res; int posOutput = 1; try { sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A); if(sciErr.iErr) throw sciErr; if(Rhs == 2) { sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_Opt); if(sciErr.iErr) throw sciErr; sciErr = getVarType(pvApiCtx, piAddr_Opt, &inputType_Opt); if(sciErr.iErr) throw sciErr; if(inputType_Opt == sci_matrix) { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_Opt, &rows_Opt, &cols_Opt, &option); if(sciErr.iErr) throw sciErr; } else throw "Option syntax is [number,number]."; } else { rows_Opt=1; cols_Opt=2; option = (double*)malloc(2*sizeof(double)); option[0]=0; option[1]=0; } if(rows_Opt != 1 || cols_Opt != 2) throw "Option syntax is [number,number]."; if((int)option[1] == 1 && !isGpuInit()) throw "gpu is not initialised. Please launch gpuInit() before use this function."; sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A); if(sciErr.iErr) throw sciErr; #ifdef WITH_CUDA if (useCuda()) { if(inputType_A == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtr); if(sciErr.iErr) throw sciErr; gpuMat_CUDA* gmat; gmat = static_cast<gpuMat_CUDA*>(pvPtr); if(!gmat->useCuda) throw "Please switch to OpenCL mode before use this data."; rows_A=gmat->rows; cols_A=gmat->columns; if(gmat->complex) { bComplex_A = TRUE; size_A = sizeof(cuDoubleComplex); d_A=(cuDoubleComplex*)gmat->ptr->get_ptr(); } else d_A=(double*)gmat->ptr->get_ptr(); // Initialize CUBLAS status = cublasInit(); if (status != CUBLAS_STATUS_SUCCESS) throw status; na = rows_A * cols_A; } else if(inputType_A == 1) { // Get size and data if(isVarComplex(pvApiCtx, piAddr_A)) { sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows_A, &cols_A, &h_A, &hi_A); if(sciErr.iErr) throw sciErr; size_A = sizeof(cuDoubleComplex); bComplex_A = TRUE; } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows_A, &cols_A, &h_A); if(sciErr.iErr) throw sciErr; } na = rows_A * cols_A; // Initialize CUBLAS status = cublasInit(); if (status != CUBLAS_STATUS_SUCCESS) throw status; // Allocate device memory status = cublasAlloc(na, size_A, (void**)&d_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; // Initialize the device matrices with the host matrices if(!bComplex_A) { status = cublasSetMatrix(rows_A,cols_A, sizeof(double), h_A, rows_A, (double*)d_A, rows_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; } else writecucomplex(h_A, hi_A, rows_A, cols_A, (cuDoubleComplex *)d_A); } else throw "Bad argument type."; cuDoubleComplex resComplex; // Performs operation if(!bComplex_A) status = decomposeBlockedLU(rows_A, cols_A, rows_A, (double*)d_A, 1); // else // resComplex = cublasZtrsm(na,(cuDoubleComplex*)d_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; // Put the result in scilab switch((int)option[0]) { case 2 : case 1 : sciprint("The first option must be 0 for this function. Considered as 0.\n"); case 0 : // Keep the result on the Host. { // Put the result in scilab if(!bComplex_A) { double* h_res = NULL; sciErr=allocMatrixOfDouble(pvApiCtx, Rhs + posOutput, rows_A, cols_A, &h_res); if(sciErr.iErr) throw sciErr; status = cublasGetMatrix(rows_A,cols_A, sizeof(double), (double*)d_A, rows_A, h_res, rows_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; } else { sciErr = createComplexMatrixOfDouble(pvApiCtx, Rhs + posOutput, 1, 1, &resComplex.x,&resComplex.y); if(sciErr.iErr) throw sciErr; } LhsVar(posOutput)=Rhs+posOutput; posOutput++; break; } default : throw "First option argument must be 0 or 1 or 2."; } switch((int)option[1]) { case 0 : // Don't keep the data input on Device. { if(inputType_A == sci_matrix) { status = cublasFree(d_A); if (status != CUBLAS_STATUS_SUCCESS) throw status; d_A = NULL; } break; } case 1 : // Keep data of the fisrt argument on Device and return the Device pointer. { if(inputType_A == sci_matrix) { gpuMat_CUDA* dptr; gpuMat_CUDA tmp={getCudaContext()->genMatrix<double>(getCudaQueue(),rows_A*cols_A),rows_A,cols_A}; dptr=new gpuMat_CUDA(tmp); dptr->useCuda = true; dptr->ptr->set_ptr((double*)d_A); if(bComplex_A) dptr->complex=TRUE; else dptr->complex=FALSE; sciErr = createPointer(pvApiCtx,Rhs+posOutput, (void*)dptr); if(sciErr.iErr) throw sciErr; LhsVar(posOutput)=Rhs+posOutput; } else throw "The first input argument is already a GPU variable."; posOutput++; break; } default : throw "Second option argument must be 0 or 1."; } // Shutdown status = cublasShutdown(); if (status != CUBLAS_STATUS_SUCCESS) throw status; } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "not implemented with OpenCL."; } #endif if(Rhs == 1) { free(option); option = NULL; } if(posOutput < Lhs+1) throw "Too many output arguments."; if(posOutput > Lhs+1) throw "Too few output arguments."; PutLhsVar(); return 0; } catch(const char* str) { Scierror(999,"%s\n",str); } catch(SciErr E) { printError(&E, 0); } #ifdef WITH_CUDA catch(cudaError_t cudaE) { GpuError::treat_error<CUDAmode>((CUDAmode::Status)cudaE); } catch(cublasStatus CublasE) { GpuError::treat_error<CUDAmode>((CUDAmode::Status)CublasE,1); } if (useCuda()) { if(inputType_A == 1 && d_A != NULL) cudaFree(d_A); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999,"not implemented with OpenCL.\n"); } #endif if(Rhs == 1 && option != NULL) free(option); return EXIT_FAILURE; }
float AnimationCurve::getValue(float time) const { const AnimatedKeys* pKeys = getPointer(); if (!pKeys) return m_constantValue; const AnimatedKeys& ak = *pKeys; size_t keyCount = ak.keys.size(); if (keyCount == 1) // it's constant, so return that return ak.keys.begin()->second; float value = 0.0f; // see if it exists std::map<float, float>::const_iterator itFind = ak.keys.find(time); if (itFind != ak.keys.end()) { value = itFind->second; return value; } std::map<float, float>::const_iterator findLower = ak.keys.lower_bound(time); std::map<float, float>::const_iterator findUpper = ak.keys.upper_bound(time); if (findLower == ak.keys.end()) { // after last frame std::map<float, float>::const_iterator itLastKey = --ak.keys.rbegin().base(); value = itLastKey->second; return value; } if (findUpper == ak.keys.begin()) { // before first frame value = ak.keys.begin()->second; return value; } if (findUpper != ak.keys.end()) { std::map<float, float>::const_iterator itPrevKey = findUpper; --itPrevKey; float lowerTime = (*itPrevKey).first; float lowerValue = (*itPrevKey).second; float upperTime = (*findUpper).first; float upperValue = (*findUpper).second; float timeRatio = (time - lowerTime) / (upperTime - lowerTime); switch (ak.interpolationType) { default: case eLinearInterpolation: value = linearTween(timeRatio, lowerValue, upperValue); break; case eCubicInterpolation: value = cubicTween(timeRatio, lowerValue, upperValue); break; case eQuadraticInterpolation: value = quadraticTween(timeRatio, lowerValue, upperValue); break; } return value; } else { assert(false); } return 0.0f; }
string Atom::getString () const { stringstream buffer; switch (getType ()) { case T_BROKENHEART: buffer << "#[broken-heart " << getValue ().mPointer << "]"; break; case T_EOF: buffer << "#[eof " << getValue ().mPointer << "]"; break; case T_NULL: buffer << "#[null " << getValue ().mPointer << "]"; break; case T_CHAR: buffer << "#[char "; buffer << getChar (); buffer << "]"; break; case T_INTEGER: buffer << "#[integer " << getInteger () << "]"; break; case T_BOOLEAN: buffer << "#[boolean " << getBoolean () << "]"; break; case T_PRIMITIVE_PROCEDURE: buffer << "#[primitive-procedure " << getValue ().mInteger << "]"; break; case T_PORT: buffer << "#[port " << getValue ().mInteger << "]"; break; case T_POINTER: buffer << "#[pointer " << getPointer () << "]"; break; case T_PAIR: buffer << "#[pair " << getPointer () << "]"; break; case T_VECTOR: buffer << "#[vector " << getPointer () << "]"; break; case T_STRING: buffer << "#[string " << getPointer () << "]"; break; case T_SYMBOL: buffer << "#[symbol " << getPointer () << "]"; break; case T_PROCEDURE: buffer << "#[procedure " << getPointer () << "]"; break; default: buffer << "#[unknown]"; break; } return buffer.str (); }
int sci_gpuOnes(char *fname) { CheckLhs(1, 1); void* pvPtr = NULL; int* piAddr = NULL; SciErr sciErr; int inputType; int iRows = 0; int iCols = 0; GpuPointer* gpOut = NULL; try { if (!isGpuInit()) { throw "gpu is not initialised. Please launch gpuInit() before use this function."; } if (Rhs == 1) { sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr); if (sciErr.iErr) { throw sciErr; } sciErr = getVarType(pvApiCtx, piAddr, &inputType); if (inputType == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr, (void**)&pvPtr); if (sciErr.iErr) { throw sciErr; } GpuPointer* gmat = (GpuPointer*)(pvPtr); if (!PointerManager::getInstance()->findGpuPointerInManager(gmat)) { throw "gpuOnes : Bad type for input argument #1. Only variables created with GPU functions allowed."; } if (useCuda() && gmat->getGpuType() != GpuPointer::CudaType) { throw "gpuOnes : Bad type for input argument #1: A Cuda pointer expected."; } if (useCuda() == false && gmat->getGpuType() != GpuPointer::OpenCLType) { throw "gpuOnes : Bad type for input argument #1: A OpenCL pointer expected."; } if (gmat->getDims() > 2) { throw "gpuOnes : Hypermatrix not yet implemented."; } iRows = gmat->getRows(); iCols = gmat->getCols(); } else if (inputType == sci_matrix) { // Get size and data double* h; sciErr = getMatrixOfDouble(pvApiCtx, piAddr, &iRows, &iCols, &h); } else { throw "gpuOnes : Bad type for input argument #1 : A Matrix or GPU pointer expected."; } } else { if (Rhs > 2) { throw "gpuOnes : Hypermatrix not yet implemented."; } int* piDimsArray = new int[Rhs]; for (int i = 0; i < Rhs; i++) { sciErr = getVarAddressFromPosition(pvApiCtx, i + 1, &piAddr); if (sciErr.iErr) { throw sciErr; } sciErr = getVarType(pvApiCtx, piAddr, &inputType); if (inputType != sci_matrix) { throw "gpuOnes : Bad type for input argument #%d : A Matrix expected."; } double* h; sciErr = getMatrixOfDouble(pvApiCtx, piAddr, &iRows, &iCols, &h); if (iRows * iCols != 1) { char str[100]; sprintf(str, "gpuOnes : Wrong size for input argument #%d : A scalar expected.", i + 1); throw str; } piDimsArray[i] = (int)h[0]; } iRows = piDimsArray[0]; iCols = piDimsArray[1]; delete piDimsArray; } #ifdef WITH_CUDA if (useCuda()) { gpOut = new PointerCuda(iRows, iCols, false); gpOut->initMatrix(1); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuOnes: not implemented with OpenCL.\n"); } #endif PointerManager::getInstance()->addGpuPointerInManager(gpOut); sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpOut); if (sciErr.iErr) { throw sciErr; } LhsVar(1) = Rhs + 1; PutLhsVar(); return 0; } #ifdef WITH_CUDA catch (cudaError_t cudaE) { GpuError::treat_error<CUDAmode>((CUDAmode::Status)cudaE); } #endif catch (const char* str) { Scierror(999, "%s\n", str); } catch (SciErr E) { printError(&E, 0); } return EXIT_FAILURE; }
int sci_gpuMatrix(char *fname) { CheckRhs(2, 3); CheckLhs(1, 1); SciErr sciErr; int* piAddr_A = NULL; int inputType_A = 0; int* piAddr_R = NULL; int inputType_R = 0; int* piAddr_C = NULL; int inputType_C = 0; int rows = 0; int cols = 0; int newRows = 0; int newCols = 0; void* pvPtr = NULL; GpuPointer* gpuPtrA = NULL; try { if (!isGpuInit()) { throw "gpu is not initialised. Please launch gpuInit() before use this function."; } //--- Get input matrix --- sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A); if (sciErr.iErr) { throw sciErr; } // Get size and data sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A); if (sciErr.iErr) { throw sciErr; } //--- Get new Rows size or vector of sizes--- sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_R); if (sciErr.iErr) { throw sciErr; } // Get size and data sciErr = getVarType(pvApiCtx, piAddr_R, &inputType_R); if (sciErr.iErr) { throw sciErr; } if (inputType_R != sci_matrix) { throw "gpuMatrix : Bad type for input argument #2: A real scalar or row vector expected."; } if (isVarComplex(pvApiCtx, piAddr_A)) { throw "gpuMatrix : Bad type for input argument #2: A real scalar or row vector expected."; } else { double* dRows = NULL; sciErr = getMatrixOfDouble(pvApiCtx, piAddr_R, &rows, &cols, &dRows); if (sciErr.iErr) { throw sciErr; } if (nbInputArgument(pvApiCtx) == 2) { if (rows != 1 || cols != 2) { throw "gpuMatrix : Bad size for input argument #2: A row vector of size two expected."; } newRows = (int)dRows[0]; newCols = (int)dRows[1]; if (newCols < -1 || newCols == 0) { throw "gpuMatrix : Wrong value for input argument #3: -1 or positive value expected."; } } else { newRows = (int)(*dRows); } if (newRows < -1 || newRows == 0) { throw "gpuMatrix : Wrong value for input argument #2: -1 or positive value expected."; } } if (nbInputArgument(pvApiCtx) == 3) { //--- Get new Cols size--- sciErr = getVarAddressFromPosition(pvApiCtx, 3, &piAddr_C); if (sciErr.iErr) { throw sciErr; } // Get size and data sciErr = getVarType(pvApiCtx, piAddr_C, &inputType_C); if (sciErr.iErr) { throw sciErr; } if (inputType_C != sci_matrix) { throw "gpuMatrix : Bad type for input argument #3: A real scalar expected."; } if (isVarComplex(pvApiCtx, piAddr_A)) { throw "gpuMatrix : Bad type for input argument #3: A real scalar expected."; } else { double* dCols = NULL; sciErr = getMatrixOfDouble(pvApiCtx, piAddr_C, &rows, &cols, &dCols); if (sciErr.iErr) { throw sciErr; } newCols = (int)(*dCols); if (newCols < -1 || newCols == 0) { throw "gpuMatrix : Wrong value for input argument #3: -1 or positive value expected."; } } } if (inputType_A == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtr); if (sciErr.iErr) { throw sciErr; } gpuPtrA = (GpuPointer*)pvPtr; if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrA)) { throw "gpuMatrix : Bad type for input argument #1: Variables created with GPU functions expected."; } if (useCuda() && gpuPtrA->getGpuType() != GpuPointer::CudaType) { throw "gpuMatrix : Bad type for input argument #1: A Cuda pointer expected."; } if (useCuda() == false && gpuPtrA->getGpuType() != GpuPointer::OpenCLType) { throw "gpuMatrix : Bad type for input argument #1: A OpenCL pointer expected."; } rows = gpuPtrA->getRows(); cols = gpuPtrA->getCols(); } else if (inputType_A == sci_matrix) { double* h = NULL; if (isVarComplex(pvApiCtx, piAddr_A)) { double* hi = NULL; sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h, &hi); #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, hi, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuMatrix: not implemented with OpenCL.\n"); } #endif } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h); #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuMatrix: not implemented with OpenCL.\n"); } #endif } if (sciErr.iErr) { throw sciErr; } } else { throw "gpuMatrix : Bad type for input argument #1: A GPU or CPU matrix expected."; } if (newRows == -1 && newCols != -1) { newRows = rows * cols / newCols; } else if (newRows != -1 && newCols == -1) { newCols = rows * cols / newRows; } if (rows * cols != newRows * newCols) { throw "gpuMatrix : Wrong value for input arguments #2 and 3: Correct size expected."; } #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuMatrix: not implemented with OpenCL.\n"); } #endif GpuPointer* gpuOut = gpuPtrA->clone(); gpuOut->setRows(newRows); gpuOut->setCols(newCols); // Put the result in scilab PointerManager::getInstance()->addGpuPointerInManager(gpuOut); sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpuOut); LhsVar(1) = Rhs + 1; if (inputType_A == 1 && gpuPtrA != NULL) { delete gpuPtrA; } PutLhsVar(); return 0; } catch (const char* str) { Scierror(999, "%s\n", str); } catch (SciErr E) { printError(&E, 0); } if (inputType_A == 1 && gpuPtrA != NULL) { delete gpuPtrA; } return EXIT_FAILURE; }
/* * Subtract From Pointer Immediate * Pn <- Pn - XX, X: {0 .. 9}. * * 02 Pn XX * * Pointer decremented by integer XX */ void o2(Vm* vm){ setPointer(vm, charToInt(vm->IR[3]), getPointer(vm, charToInt(vm->IR[3])) - opToInt(getOp(4,vm->IR))); }
void RenderingEngine::generateShandowMaps(std::shared_ptr<BaseLight> light, GameObject* root){ auto shandowInfo = light->getShandowInfo(); int shandowMapIndex = 0; lightMatrix.identity(); if (shandowInfo) shandowMapIndex = shandowInfo->getShandowMapSizeAsPowerOf2() - 1; setTexture("transparencyShandowMap", transparencyShandowMaps.at(shandowMapIndex)); transparencyShandowMaps.at(shandowMapIndex)->bindAsRenderTarget(); glClearColor(1, 1, 0, 0); glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); setTexture("shandowMap", shandowMaps.at(shandowMapIndex)); shandowMaps.at(shandowMapIndex)->bindAsRenderTarget(); glClearColor(1, 1, 0, 0); glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); getTexture("transparencyShandowColorBuffer")->bindAsRenderTarget(); glClearColor(0, 0, 0, 0); glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); if (shandowInfo){ altCamera->setProjection(shandowInfo->getProjection()); altCamera->getTransform()->setPosition(light->getTransform()->getWorldPosition()); altCamera->getTransform()->setRotation(light->getTransform()->getWorldRotation()); lightMatrix = RenderingEngine::biasMatrix * altCamera->getViewProjection(); setFloat("shandowVarianceMin", shandowInfo->getMinVariance()); setFloat("shandowLightBleedReduction", shandowInfo->getLightBleedReduction()); bool flip = shandowInfo->getFlipfaces(); Camera* temp = mainCamera; mainCamera = altCamera.get(); glEnable(GL_CULL_FACE); if (flip) glCullFace(GL_FRONT); shandowMaps.at(shandowMapIndex)->bindAsRenderTarget(); renderAllAlpha(false, scast(getPointer("depthMapGenerator")), root); transparencyShandowMaps.at(shandowMapIndex)->bindAsRenderTarget(); renderAllAlpha(true, scast(getPointer("depthMapGenerator")), root); getTexture("transparencyShandowColorBuffer")->bindAsRenderTarget(); renderAllAlpha(true, scast(getPointer("defaultShader")), root); if (flip) glCullFace(GL_BACK); glDisable(GL_CULL_FACE); mainCamera = temp; if (shandowInfo->getShandowSoftness() != 0){ // blurShandowMap(shandowMapIndex, shandowInfo->getShandowSoftness(), pcast(getPointer("gausBlurFilter"))); } } else{ setFloat("shandowVarianceMin", 0.002f); setFloat("shandowLightBleedReduction", 0.2f); } }
Long QueuePtr::pack(void *space, short isSpacePtr) { if(getPointer()) getPointer()->pack(space); return packShallow(space,isSpacePtr); }
int sci_umf_lusolve(char* fname, unsigned long l) { SciErr sciErr; int mb = 0; int nb = 0; int it_flag = 0; int i = 0; int j = 0; int NoTranspose = 0; int NoRaffinement = 0; SciSparse AA; CcsSparse A; /* umfpack stuff */ double Info[UMFPACK_INFO]; // double *Info = (double *) NULL; double Control[UMFPACK_CONTROL]; void* Numeric = NULL; int lnz = 0, unz = 0, n = 0, n_col = 0, nz_udiag = 0, umf_flag = 0; int* Wi = NULL; int mW = 0; double *W = NULL; int iComplex = 0; int* piAddr1 = NULL; int* piAddr2 = NULL; int* piAddr3 = NULL; int* piAddr4 = NULL; double* pdblBR = NULL; double* pdblBI = NULL; double* pdblXR = NULL; double* pdblXI = NULL; int mA = 0; // rows int nA = 0; // cols int iNbItem = 0; int* piNbItemRow = NULL; int* piColPos = NULL; double* pdblSpReal = NULL; double* pdblSpImg = NULL; /* Check numbers of input/output arguments */ CheckInputArgument(pvApiCtx, 2, 4); CheckOutputArgument(pvApiCtx, 1, 1); /* First get arg #1 : the pointer to the LU factors */ sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr1); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } sciErr = getPointer(pvApiCtx, piAddr1, &Numeric); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } /* Check if this pointer is a valid ref to a umfpack LU numeric object */ if ( ! IsAdrInList(Numeric, ListNumeric, &it_flag) ) { Scierror(999, _("%s: Wrong value for input argument #%d: Must be a valid reference to (umf) LU factors.\n"), fname, 1); return 1; } /* get some parameters of the factorization (for some checking) */ if ( it_flag == 0 ) { umfpack_di_get_lunz(&lnz, &unz, &n, &n_col, &nz_udiag, Numeric); } else { iComplex = 1; umfpack_zi_get_lunz(&lnz, &unz, &n, &n_col, &nz_udiag, Numeric); } if ( n != n_col ) { Scierror(999, _("%s: An error occurred: %s.\n"), fname, _("This is not a factorization of a square matrix")); return 1; } if ( nz_udiag < n ) { Scierror(999, _("%s: An error occurred: %s.\n"), fname, _("This is a factorization of a singular matrix")); return 1; } /* Get now arg #2 : the vector b */ sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr2); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } if (isVarComplex(pvApiCtx, piAddr2)) { iComplex = 1; sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr2, &mb, &nb, &pdblBR, &pdblBI); } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr2, &mb, &nb, &pdblBR); } if (sciErr.iErr) { printError(&sciErr, 0); return 1; } if (mb != n || nb < 1) /* test if the right hand side is compatible */ { Scierror(999, _("%s: Wrong size for input argument #%d.\n"), fname, 2); return 1; } /* allocate memory for the solution x */ if (iComplex) { sciErr = allocComplexMatrixOfDouble(pvApiCtx, nbInputArgument(pvApiCtx) + 1, mb, nb, &pdblXR, &pdblXI); } else { sciErr = allocMatrixOfDouble(pvApiCtx, nbInputArgument(pvApiCtx) + 1, mb, nb, &pdblXR); } if (sciErr.iErr) { printError(&sciErr, 0); return 1; } /* selection between the different options : * -- solving Ax=b or A'x=b (Note: we could add A.'x=b) * -- with or without raffinement */ if (nbInputArgument(pvApiCtx) == 2) { NoTranspose = 1; NoRaffinement = 1; } else /* 3 or 4 input arguments but the third must be a string */ { char* pStr = NULL; sciErr = getVarAddressFromPosition(pvApiCtx, 3, &piAddr3); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } getAllocatedSingleString(pvApiCtx, piAddr3, &pStr); if (strcmp(pStr, "Ax=b") == 0) { NoTranspose = 1; } else if ( strcmp(pStr, "A'x=b") == 0 ) { NoTranspose = 0; } else { Scierror(999, _("%s: Wrong input argument #%d: '%s' or '%s' expected.\n"), fname, 3, "Ax=b", "A'x=b"); return 1; } if (nbInputArgument(pvApiCtx) == 4) { sciErr = getVarAddressFromPosition(pvApiCtx, 4, &piAddr4); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } if (isVarComplex(pvApiCtx, piAddr4)) { AA.it = 1; sciErr = getComplexSparseMatrix(pvApiCtx, piAddr4, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal, &pdblSpImg); } else { AA.it = 0; sciErr = getSparseMatrix(pvApiCtx, piAddr4, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal); } if (sciErr.iErr) { printError(&sciErr, 0); return 1; } // fill struct sparse AA.m = mA; AA.n = nA; AA.nel = iNbItem; AA.mnel = piNbItemRow; AA.icol = piColPos; AA.R = pdblSpReal; AA.I = pdblSpImg; /* some check... but we can't be sure that the matrix corresponds to the LU factors */ if ( mA != nA || mA != n || AA.it != it_flag ) { Scierror(999, _("%s: Wrong size for input argument #%d: %s.\n"), fname, 4, _("Matrix is not compatible with the given LU factors")); return 1; } NoRaffinement = 0; } else { NoRaffinement = 1; /* only 3 input var => no raffinement */ } } /* allocate memory for umfpack_di_wsolve usage or umfpack_zi_wsolve usage*/ Wi = (int*)MALLOC(n * sizeof(int)); if (it_flag == 1) { if (NoRaffinement) { mW = 4 * n; } else { mW = 10 * n; } } else { if (NoRaffinement) { mW = n; } else { mW = 5 * n; } } W = (double*)MALLOC(mW * sizeof(double)); if (NoRaffinement == 0) { SciSparseToCcsSparse(&AA, &A); } else { A.p = NULL; A.irow = NULL; A.R = NULL; A.I = NULL; } /* get the pointer for b */ if (it_flag == 1 && pdblBI == NULL) { int iSize = mb * nb * sizeof(double); pdblBI = (double*)MALLOC(iSize); memset(pdblBI, 0x00, iSize); } /* init Control */ if (it_flag == 0) { umfpack_di_defaults(Control); } else { umfpack_zi_defaults(Control); } if (NoRaffinement) { Control[UMFPACK_IRSTEP] = 0; } if (NoTranspose) { umf_flag = UMFPACK_A; } else { umf_flag = UMFPACK_At; } if (it_flag == 0) { for (j = 0; j < nb ; j++) { umfpack_di_wsolve(umf_flag, A.p, A.irow, A.R, &pdblXR[j * mb], &pdblBR[j * mb], Numeric, Control, Info, Wi, W); } if (iComplex == 1) { for (j = 0; j < nb ; j++) { umfpack_di_wsolve(umf_flag, A.p, A.irow, A.R, &pdblXI[j * mb], &pdblBI[j * mb], Numeric, Control, Info, Wi, W); } } } else { for (j = 0; j < nb ; j++) { umfpack_zi_wsolve(umf_flag, A.p, A.irow, A.R, A.I, &pdblXR[j * mb], &pdblXI[j * mb], &pdblBR[j * mb], &pdblBI[j * mb], Numeric, Control, Info, Wi, W); } } if (isVarComplex(pvApiCtx, piAddr2) == 0) { FREE(pdblBI); } freeCcsSparse(A); FREE(W); FREE(Wi); AssignOutputVariable(pvApiCtx, 1) = nbInputArgument(pvApiCtx) + 1; ReturnArguments(pvApiCtx); return 0; }
void CodeGenFunction::EmitAggregateAssignment(const Expr *LHS, const Expr *RHS) { auto Val = EmitAggregateExpr(RHS); auto Dest = EmitLValue(LHS); Builder.CreateStore(Builder.CreateLoad(Val.getAggregateAddr(), Val.isVolatileQualifier()), Dest.getPointer(), Dest.isVolatileQualifier()); }
bool AnimationCurve::isAnimated() const { return (getPointer() != NULL); }
inline EnumValue PropertyAliases::getPropertyEnum(const char* alias) const { NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset); return n2e->getEnum(alias, *this); }
/* * Store Accumulator Register Addresing * M(Pn) <- AC. * * 06 Pn -- * * Store contents of accumulator to the memory location held in Pn */ void o6(Vm* vm){ intToCharArray(vm->ACC, vm->memory[getPointer(vm, charToInt(vm->IR[3]))]); }
JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMACH_LSTMbwd (JNIEnv *env, jobject obj, jobject jinC, jobject jLIN1, jobject jLIN2, jobject jLIN3, jobject jLIN4, jobject jdoutC, jobject jdoutH, jobject jdinC, jobject jdLIN1, jobject jdLIN2, jobject jdLIN3, jobject jdLIN4, jint n) { float *inC = (float*)getPointer(env, jinC); float *LIN1 = (float*)getPointer(env, jLIN1); float *LIN2 = (float*)getPointer(env, jLIN2); float *LIN3 = (float*)getPointer(env, jLIN3); float *LIN4 = (float*)getPointer(env, jLIN4); float *doutC = (float*)getPointer(env, jdoutC); float *doutH = (float*)getPointer(env, jdoutH); float *dinC = (float*)getPointer(env, jdinC); float *dLIN1 = (float*)getPointer(env, jdLIN1); float *dLIN2 = (float*)getPointer(env, jdLIN2); float *dLIN3 = (float*)getPointer(env, jdLIN3); float *dLIN4 = (float*)getPointer(env, jdLIN4); return lstm_bwd(inC, LIN1, LIN2, LIN3, LIN4, doutC, doutH, dinC, dLIN1, dLIN2, dLIN3, dLIN4, n); }
/* * Store Register to memory: Register Addressing * M(Pn) <- Rn. * * 08 Rn Pn * * Store contents of Register Rn into memory address pointed to by Pn, n{0..3} */ void o8(Vm* vm){ intToCharArray(getRegister(vm, charToInt(vm->IR[3])), vm->memory[getPointer(vm, charToInt(vm->IR[5]))]); }
void rbffi_SetupCallParams(int argc, VALUE* argv, int paramCount, Type** paramTypes, FFIStorage* paramStorage, void** ffiValues, VALUE* callbackParameters, int callbackCount, VALUE enums) { VALUE callbackProc = Qnil; FFIStorage* param = ¶mStorage[0]; int i, argidx, cbidx, argCount; if (unlikely(paramCount != -1 && paramCount != argc)) { if (argc == (paramCount - 1) && callbackCount == 1 && rb_block_given_p()) { callbackProc = rb_block_proc(); } else { rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, paramCount); } } argCount = paramCount != -1 ? paramCount : argc; for (i = 0, argidx = 0, cbidx = 0; i < argCount; ++i) { Type* paramType = paramTypes[i]; int type; if (unlikely(paramType->nativeType == NATIVE_MAPPED)) { VALUE values[] = { argv[argidx], Qnil }; argv[argidx] = rb_funcall2(((MappedType *) paramType)->rbConverter, id_to_native, 2, values); paramType = ((MappedType *) paramType)->type; } type = argidx < argc ? TYPE(argv[argidx]) : T_NONE; ffiValues[i] = param; switch (paramType->nativeType) { case NATIVE_INT8: param->s8 = NUM2INT(argv[argidx]); ++argidx; ADJ(param, INT8); break; case NATIVE_INT16: param->s16 = NUM2INT(argv[argidx]); ++argidx; ADJ(param, INT16); break; case NATIVE_INT32: if (unlikely(type == T_SYMBOL && enums != Qnil)) { VALUE value = rb_funcall(enums, id_map_symbol, 1, argv[argidx]); param->s32 = NUM2INT(value); } else { param->s32 = NUM2INT(argv[argidx]); } ++argidx; ADJ(param, INT32); break; case NATIVE_BOOL: if (type != T_TRUE && type != T_FALSE) { rb_raise(rb_eTypeError, "wrong argument type (expected a boolean parameter)"); } param->s8 = argv[argidx++] == Qtrue; ADJ(param, INT8); break; case NATIVE_UINT8: param->u8 = NUM2UINT(argv[argidx]); ADJ(param, INT8); ++argidx; break; case NATIVE_UINT16: param->u16 = NUM2UINT(argv[argidx]); ADJ(param, INT16); ++argidx; break; case NATIVE_UINT32: param->u32 = NUM2UINT(argv[argidx]); ADJ(param, INT32); ++argidx; break; case NATIVE_INT64: param->i64 = NUM2LL(argv[argidx]); ADJ(param, INT64); ++argidx; break; case NATIVE_UINT64: param->u64 = NUM2ULL(argv[argidx]); ADJ(param, INT64); ++argidx; break; case NATIVE_LONG: *(ffi_sarg *) param = NUM2LONG(argv[argidx]); ADJ(param, LONG); ++argidx; break; case NATIVE_ULONG: *(ffi_arg *) param = NUM2ULONG(argv[argidx]); ADJ(param, LONG); ++argidx; break; case NATIVE_FLOAT32: param->f32 = (float) NUM2DBL(argv[argidx]); ADJ(param, FLOAT32); ++argidx; break; case NATIVE_FLOAT64: param->f64 = NUM2DBL(argv[argidx]); ADJ(param, FLOAT64); ++argidx; break; case NATIVE_LONGDOUBLE: param->ld = rbffi_num2longdouble(argv[argidx]); ADJ(param, LONGDOUBLE); ++argidx; break; case NATIVE_STRING: if (type == T_NIL) { param->ptr = NULL; } else { if (rb_safe_level() >= 1 && OBJ_TAINTED(argv[argidx])) { rb_raise(rb_eSecurityError, "Unsafe string parameter"); } param->ptr = StringValueCStr(argv[argidx]); } ADJ(param, ADDRESS); ++argidx; break; case NATIVE_POINTER: case NATIVE_BUFFER_IN: case NATIVE_BUFFER_OUT: case NATIVE_BUFFER_INOUT: param->ptr = getPointer(argv[argidx++], type); ADJ(param, ADDRESS); break; case NATIVE_FUNCTION: case NATIVE_CALLBACK: if (callbackProc != Qnil) { param->ptr = callback_param(callbackProc, callbackParameters[cbidx++]); } else { param->ptr = callback_param(argv[argidx], callbackParameters[cbidx++]); ++argidx; } ADJ(param, ADDRESS); break; case NATIVE_STRUCT: ffiValues[i] = getPointer(argv[argidx++], type); break; default: rb_raise(rb_eArgError, "Invalid parameter type: %d", paramType->nativeType); } } }
/* * Load Register from memory: Register Addressing * Rn <- M(Pn). * * 10 Rn Pn * * Load Register Rn with the contents of memory location pointed to by Pn, n{0..3} */ void o10(Vm* vm){ setRegister(vm, charToInt(vm->IR[3]), charArrayToInt(0,6,vm->memory[getPointer(vm, charToInt(vm->IR[5]))])); }
int sci_taucs_chsolve(char* fname, void* pvApiCtx) { SciErr sciErr; int mb = 0, nb = 0; int i = 0, j = 0, n = 0, it_flag = 0, Refinement = 0; double norm_res = 0., norm_res_bis = 0.; long double *wk = NULL; int A_is_upper_triangular = 0; taucs_handle_factors * pC = NULL; SciSparse A; int mA = 0; // rows int nA = 0; // cols int iNbItem = 0; int* piNbItemRow = NULL; int* piColPos = NULL; double* pdblSpReal = NULL; double* pdblSpImg = NULL; int iComplex = 0; int* piAddr1 = NULL; int* piAddr2 = NULL; int* piAddr3 = NULL; void* pvPtr = NULL; double* pdblB = NULL; double* pdblX = NULL; double* pdblV = NULL; double* pdblRes = NULL; /* Check numbers of input/output arguments */ CheckInputArgument(pvApiCtx, 2, 3); CheckOutputArgument(pvApiCtx, 1, 1); /* First get arg #1 : the pointer to the Cholesky factors */ sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr1); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } sciErr = getPointer(pvApiCtx, piAddr1, &pvPtr); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } pC = (taucs_handle_factors *)pvPtr; /* Check if this pointer is a valid ref to a Cholesky factor object */ if ( ! IsAdrInList( (Adr)pC, ListCholFactors, &it_flag) ) { Scierror(999, _("%s: Wrong value for input argument #%d: not a valid reference to Cholesky factors"), fname, 1); return 1; } /* the number of rows/lines of the matrix */ n = pC->n; /* Get now arg #2 : the vector b */ sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr2); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } sciErr = getMatrixOfDouble(pvApiCtx, piAddr2, &mb, &nb, &pdblB); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } /* test if the right hand side is compatible */ if (mb != n || nb < 1) { Scierror(999, _("%s: Wrong size for input argument #%d.\n"), fname, 2); return 1; } if (Rhs == 3) { sciErr = getVarAddressFromPosition(pvApiCtx, 3, &piAddr3); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } if (isVarComplex(pvApiCtx, piAddr3)) { Scierror(999, _("%s: Wrong type for input argument #%d: not compatible with the Cholesky factorization.\n"), fname, 3); return 1; } sciErr = getSparseMatrix(pvApiCtx, piAddr3, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } // fill struct sparse A.m = mA; A.n = nA; A.it = iComplex; A.nel = iNbItem; A.mnel = piNbItemRow; A.icol = piColPos; A.R = pdblSpReal; A.I = pdblSpImg; if (mA != nA || mA != n) { Scierror(999, _("%s: Wrong size for input argument #%d: not compatible with the Cholesky factorization.\n"), fname, 3); return 1; } Refinement = 1; A_is_upper_triangular = is_sparse_upper_triangular(&A); } else { Refinement = 0; } /* allocate memory for the solution x */ sciErr = allocMatrixOfDouble(pvApiCtx, nbInputArgument(pvApiCtx) + 1, mb, nb, &pdblX); if (sciErr.iErr) { printError(&sciErr, 0); return 1; } if (Refinement) { pdblRes = (double*)MALLOC(mb * sizeof(double)); if ( A_is_upper_triangular ) { if ( (wk = (long double*)MALLOC( n * sizeof(long double))) == NULL ) { if (pdblRes) { FREE(pdblRes); } Scierror(999, _("%s: not enough memory.\n"), fname); return 1; } } } /* allocate memory for a temporary vector v */ pdblV = (double*)MALLOC(mb * sizeof(double)); for (j = 0; j < nb ; j++) { taucs_vec_permute(n, &pdblB[j * mb], &pdblX[j * mb], pC->p); taucs_supernodal_solve_llt(pC->C, pdblV, &pdblX[j * mb]); /* FIXME : add a test here */ taucs_vec_ipermute(n, pdblV, &pdblX[j * mb], pC->p); if (Refinement) { /* do one iterative refinement */ residu_with_prec_for_chol(&A, &pdblX[j * mb], &pdblV[j * mb], pdblRes, &norm_res, A_is_upper_triangular, wk); /* FIXME: do a test if the norm_res has an anormal value and send a warning * (the user has certainly not give the good matrix A */ taucs_vec_permute(n, pdblRes, pdblV, pC->p); taucs_supernodal_solve_llt(pC->C, pdblRes, pdblV); /* FIXME : add a test here */ taucs_vec_ipermute(n, pdblRes, pdblV, pC->p); for ( i = 0 ; i < n ; i++ ) { pdblV[i] = pdblX[j * mb + i] - pdblV[i]; /* v is the refined solution */ } residu_with_prec_for_chol(&A, pdblV, &pdblB[j * mb], pdblRes, &norm_res_bis, A_is_upper_triangular, wk); /* accept it if the 2 norm of the residual is improved */ if ( norm_res_bis < norm_res ) { for ( i = 0 ; i < n ; i++ ) { pdblX[j * mb + i] = pdblV[i]; } } } } FREE(wk); FREE(pdblV); FREE(pdblRes); AssignOutputVariable(pvApiCtx, 1) = nbInputArgument(pvApiCtx) + 1; ReturnArguments(pvApiCtx); return 0; }
/* * Subtract from Accumulator Register Addressing * AC <- AC - M(Pn). * * 22 Pn -- * * Subtract from accumulator contents of memory location XX */ void o22(Vm* vm){ vm->ACC -= charArrayToInt(0, 6, vm->memory[getPointer(vm, charToInt(vm->IR[3]))]); }
int sci_gpuKronecker(char *fname) { CheckRhs(2, 2); CheckLhs(1, 1); SciErr sciErr; int* piAddr_A = NULL; int* piAddr_B = NULL; GpuPointer* gpuPtrA = NULL; GpuPointer* gpuPtrB = NULL; GpuPointer* gpuPtrC = NULL; double* h = NULL; double* hi = NULL; int rows = 0; int cols = 0; void* pvPtrA = NULL; void* pvPtrB = NULL; int inputType_A; int inputType_B; try { if (!isGpuInit()) { throw "gpu is not initialised. Please launch gpuInit() before use this function."; } sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A); if (sciErr.iErr) { throw sciErr; } sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_B); if (sciErr.iErr) { throw sciErr; } /* ---- Check type of arguments and get data ---- */ /* */ /* Pointer to host / Pointer to device */ /* Matrix real / Matrix complex */ /* */ /* ---------------------------------------------- */ sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A); if (sciErr.iErr) { throw sciErr; } sciErr = getVarType(pvApiCtx, piAddr_B, &inputType_B); if (sciErr.iErr) { throw sciErr; } if (inputType_A == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtrA); if (sciErr.iErr) { throw sciErr; } gpuPtrA = (GpuPointer*)pvPtrA; if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrA)) { throw "gpuKronecker : Bad type for input argument #1: Variables created with GPU functions expected."; } if (useCuda() && gpuPtrA->getGpuType() != GpuPointer::CudaType) { throw "gpuKronecker : Bad type for input argument #1: A Cuda pointer expected."; } if (useCuda() == false && gpuPtrA->getGpuType() != GpuPointer::OpenCLType) { throw "gpuKronecker : Bad type for input argument #1: A OpenCL pointer expected."; } } else if (inputType_A == sci_matrix) { if (isVarComplex(pvApiCtx, piAddr_A)) { sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h, &hi); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, hi, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuKronecker: not implemented with OpenCL."; } #endif } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrA = new PointerCuda(h, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuKronecker: not implemented with OpenCL."; } #endif } } else { throw "gpuKronecker : Bad type for input argument #1: A GPU or CPU matrix expected."; } if (inputType_B == sci_pointer) { sciErr = getPointer(pvApiCtx, piAddr_B, (void**)&pvPtrB); if (sciErr.iErr) { throw sciErr; } gpuPtrB = (GpuPointer*)pvPtrB; if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrB)) { throw "gpuKronecker : Bad type for input argument #2: Variables created with GPU functions expected."; } if (useCuda() && gpuPtrB->getGpuType() != GpuPointer::CudaType) { throw "gpuKronecker : Bad type for input argument #2: A Cuda pointer expected."; } if (useCuda() == false && gpuPtrB->getGpuType() != GpuPointer::OpenCLType) { throw "gpuKronecker : Bad type for input argument #2: A OpenCL pointer expected."; } } else if (inputType_B == sci_matrix) { if (isVarComplex(pvApiCtx, piAddr_B)) { sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_B, &rows, &cols, &h, &hi); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrB = new PointerCuda(h, hi, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuKronecker: not implemented with OpenCL.\n"); } #endif } else { sciErr = getMatrixOfDouble(pvApiCtx, piAddr_B, &rows, &cols, &h); if (sciErr.iErr) { throw sciErr; } #ifdef WITH_CUDA if (useCuda()) { gpuPtrB = new PointerCuda(h, rows, cols); } #endif #ifdef WITH_OPENCL if (!useCuda()) { Scierror(999, "gpuKronecker: not implemented with OpenCL.\n"); } #endif } } else { throw "gpuKronecker : Bad type for input argument #2: A GPU or CPU matrix expected."; } #ifdef WITH_OPENCL if (!useCuda()) { throw "gpuKronecker: not implemented with OpenCL."; } #endif //performe operation. gpuPtrC = gpuKronecker(gpuPtrA, gpuPtrB); // Keep the result on the Device. PointerManager::getInstance()->addGpuPointerInManager(gpuPtrC); sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpuPtrC); if (sciErr.iErr) { throw sciErr; } LhsVar(1) = Rhs + 1; if (inputType_A == sci_matrix && gpuPtrA != NULL) { delete gpuPtrA; } if (inputType_B == sci_matrix && gpuPtrB != NULL) { delete gpuPtrB; } PutLhsVar(); return 0; } catch (const char* str) { Scierror(999, "%s\n", str); } catch (SciErr E) { printError(&E, 0); } if (useCuda()) { if (inputType_A == sci_matrix && gpuPtrA != NULL) { delete gpuPtrA; } if (inputType_B == sci_matrix && gpuPtrB != NULL) { delete gpuPtrB; } if (gpuPtrC != NULL) { delete gpuPtrC; } } return EXIT_FAILURE; }
bool compile(const nstr& name, NNet& network, size_t threads){ RunNetwork* runNetwork = new RunNetwork; NNet::Layer* inputLayer = network.layer(0); size_t numLayers = network.numLayers(); RunLayer* lastRunLayer = 0; for(size_t l = 1; l < numLayers; ++l){ RunLayer* runLayer = new RunLayer; runLayer->queue = new Queue(threads); size_t inputLayerSize = inputLayer->size(); NNet::Layer* layer = network.layer(l); size_t layerSize = layer->size(); if(l > 1){ runLayer->inputVecStart = lastRunLayer->outputVecStart; runLayer->inputVec = lastRunLayer->outputVec; } if(l < numLayers - 1){ double* outputVecPtrStart; double* outputVecPtr; allocVector(layerSize, &outputVecPtrStart, &outputVecPtr); runLayer->outputVecStart = outputVecPtrStart; runLayer->outputVec = outputVecPtr; } TypeVec args; args.push_back(getPointer(doubleVecType(inputLayerSize))); args.push_back(getPointer(doubleVecType(inputLayerSize))); args.push_back(getPointer(doubleType())); args.push_back(int32Type()); FunctionType* ft = FunctionType::get(voidType(), args, false); Function* f = Function::Create(ft, Function::ExternalLinkage, name.c_str(), &module_); BasicBlock* entry = BasicBlock::Create(context_, "entry", f); builder_.SetInsertPoint(entry); auto aitr = f->arg_begin(); Value* inputVecPtr = aitr; inputVecPtr->setName("input_vec_ptr"); ++aitr; Value* weightVecPtr = aitr; weightVecPtr->setName("weight_vec_ptr"); ++aitr; Value* outputVecPtr = aitr; outputVecPtr->setName("output_vec_ptr"); ++aitr; Value* outputIndex = aitr; outputIndex->setName("output_index"); Value* inputVec = builder_.CreateLoad(inputVecPtr, "input_vec"); Value* weightVec = builder_.CreateLoad(weightVecPtr, "weight_vec"); Value* mulVec = builder_.CreateFMul(inputVec, weightVec, "mul_vec"); Value* sumActivation = builder_.CreateExtractElement(mulVec, getInt32(0), "sum_elem"); for(size_t i = 1; i < inputLayerSize; ++i){ Value* elem = builder_.CreateExtractElement(mulVec, getInt32(i), "sum_elem"); sumActivation = builder_.CreateFAdd(sumActivation, elem, "sum_activation"); } Value* output = getActivationOutput(layer->neuron(0), sumActivation); Value* outputElement = builder_.CreateGEP(outputVecPtr, outputIndex, "out_elem"); builder_.CreateStore(output, outputElement); builder_.CreateRetVoid(); runLayer->f = f; runLayer->fp = (void (*)(void*, void*, void*, int)) engine_->getPointerToFunction(f); for(size_t j = 0; j < layerSize; ++j){ NNet::Neuron* nj = layer->neuron(j); RunNeuron* runNeuron = new RunNeuron; runNeuron->layer = runLayer; runNeuron->outputIndex = j; double* weightVecPtrStart; double* weightVecPtr; allocVector(inputLayerSize, &weightVecPtrStart, &weightVecPtr); runNeuron->weightVecStart = weightVecPtrStart; runNeuron->weightVec = weightVecPtr; for(size_t i = 0; i < inputLayerSize; ++i){ NNet::Neuron* ni = inputLayer->neuron(i); weightVecPtr[i] = nj->weight(ni); } runLayer->queue->add(runNeuron); } runNetwork->layerVec.push_back(runLayer); inputLayer = layer; lastRunLayer = runLayer; } networkMap_.insert(make_pair(name, runNetwork)); return true; }