inline const char*
PropertyAliases::getPropertyName(EnumValue prop,
                                 UPropertyNameChoice choice) const {
    NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset);
    return chooseNameInGroup(e2n->getOffset(prop), choice);
}
Пример #2
0
Point* Ant::getLocation() const { return getPointer(); }
const ValueMap*
PropertyAliases::getValueMap(EnumValue prop) const {
    NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset);
    Offset a = e2o->getOffset(prop);
    return (const ValueMap*) (a ? getPointerNull(a) : NULL);
}
Пример #4
0
/* ========================================================================== */
int sci_gpuLU(char *fname)
{
    CheckRhs(1,2);
    CheckLhs(2,2);
    #ifdef WITH_CUDA
        cublasStatus status;
    #endif
    SciErr sciErr;
    int*    piAddr_A    = NULL;
    double* h_A         = NULL;
    double* hi_A        = NULL;
    int     rows_A;
    int     cols_A;

    int*    piAddr_Opt  = NULL;
    double* option      = NULL;
    int     rows_Opt;
    int     cols_Opt;

    void*   d_A         = NULL;
    int     na;
    void*   pvPtr       = NULL;

    int     size_A      = sizeof(double);
    bool    bComplex_A  = FALSE;
    int     inputType_A;
    int     inputType_Opt;
    double  res;
    int     posOutput   = 1;

    try
    {
        sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A);
        if(sciErr.iErr) throw sciErr;
        if(Rhs == 2)
        {
            sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_Opt);
            if(sciErr.iErr) throw sciErr;
            sciErr = getVarType(pvApiCtx, piAddr_Opt, &inputType_Opt);
            if(sciErr.iErr) throw sciErr;
            if(inputType_Opt == sci_matrix)
            {
                sciErr = getMatrixOfDouble(pvApiCtx, piAddr_Opt, &rows_Opt, &cols_Opt, &option);
                if(sciErr.iErr) throw sciErr;
            }
            else
                throw "Option syntax is [number,number].";
        }
        else
        {
            rows_Opt=1;
            cols_Opt=2;
            option = (double*)malloc(2*sizeof(double));
            option[0]=0;
            option[1]=0;
        }

        if(rows_Opt != 1 || cols_Opt != 2)
            throw "Option syntax is [number,number].";

        if((int)option[1] == 1 && !isGpuInit())
            throw "gpu is not initialised. Please launch gpuInit() before use this function.";

        sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A);
        if(sciErr.iErr) throw sciErr;

        #ifdef WITH_CUDA
        if (useCuda())
        {
            if(inputType_A == sci_pointer)
            {
                sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtr);
                if(sciErr.iErr) throw sciErr;

                gpuMat_CUDA* gmat;
                gmat = static_cast<gpuMat_CUDA*>(pvPtr);
				if(!gmat->useCuda)
					throw "Please switch to OpenCL mode before use this data.";
                rows_A=gmat->rows;
                cols_A=gmat->columns;
                if(gmat->complex)
                {
                    bComplex_A = TRUE;
                    size_A = sizeof(cuDoubleComplex);
                    d_A=(cuDoubleComplex*)gmat->ptr->get_ptr();
                }
                else
                    d_A=(double*)gmat->ptr->get_ptr();

                // Initialize CUBLAS
                status = cublasInit();
                if (status != CUBLAS_STATUS_SUCCESS) throw status;

                na = rows_A * cols_A;
            }
            else if(inputType_A == 1)
            {
                // Get size and data
                if(isVarComplex(pvApiCtx, piAddr_A))
                {
                    sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows_A, &cols_A, &h_A, &hi_A);
                    if(sciErr.iErr) throw sciErr;
                    size_A = sizeof(cuDoubleComplex);
                    bComplex_A = TRUE;
                }
                else
                {
                    sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows_A, &cols_A, &h_A);
                    if(sciErr.iErr) throw sciErr;
                }

                na = rows_A * cols_A;

                // Initialize CUBLAS
                status = cublasInit();
                if (status != CUBLAS_STATUS_SUCCESS) throw status;

                // Allocate device memory
                status = cublasAlloc(na, size_A, (void**)&d_A);
                if (status != CUBLAS_STATUS_SUCCESS) throw status;

                // Initialize the device matrices with the host matrices
                if(!bComplex_A)
                {
                    status = cublasSetMatrix(rows_A,cols_A, sizeof(double), h_A, rows_A, (double*)d_A, rows_A);
                    if (status != CUBLAS_STATUS_SUCCESS) throw status;
                }
                else
                    writecucomplex(h_A, hi_A, rows_A, cols_A, (cuDoubleComplex *)d_A);

            }
            else
                throw "Bad argument type.";

            cuDoubleComplex resComplex;
            // Performs operation
            if(!bComplex_A)
                status = decomposeBlockedLU(rows_A, cols_A, rows_A, (double*)d_A, 1);
       //     else
       //         resComplex = cublasZtrsm(na,(cuDoubleComplex*)d_A);

            if (status != CUBLAS_STATUS_SUCCESS) throw status;

            // Put the result in scilab
            switch((int)option[0])
            {
                case 2 :
                case 1 :    sciprint("The first option must be 0 for this function. Considered as 0.\n");

                case 0 :    // Keep the result on the Host.
                {           // Put the result in scilab
                    if(!bComplex_A)
                    {
                        double* h_res = NULL;
                        sciErr=allocMatrixOfDouble(pvApiCtx, Rhs + posOutput, rows_A, cols_A, &h_res);
                        if(sciErr.iErr) throw sciErr;
                        status = cublasGetMatrix(rows_A,cols_A, sizeof(double), (double*)d_A, rows_A, h_res, rows_A);
                        if (status != CUBLAS_STATUS_SUCCESS) throw status;
                    }
                    else
                    {
                        sciErr = createComplexMatrixOfDouble(pvApiCtx, Rhs + posOutput, 1, 1, &resComplex.x,&resComplex.y);
                        if(sciErr.iErr) throw sciErr;
                    }

                    LhsVar(posOutput)=Rhs+posOutput;
                    posOutput++;
                    break;
                }

                default : throw "First option argument must be 0 or 1 or 2.";
            }

            switch((int)option[1])
            {
                case 0 :    // Don't keep the data input on Device.
                {
                    if(inputType_A == sci_matrix)
                    {
                        status = cublasFree(d_A);
                        if (status != CUBLAS_STATUS_SUCCESS) throw status;
                        d_A = NULL;
                    }
                    break;
                }
                case 1 :    // Keep data of the fisrt argument on Device and return the Device pointer.
                {
                    if(inputType_A == sci_matrix)
                    {
                        gpuMat_CUDA* dptr;
                        gpuMat_CUDA tmp={getCudaContext()->genMatrix<double>(getCudaQueue(),rows_A*cols_A),rows_A,cols_A};
                        dptr=new gpuMat_CUDA(tmp);
						dptr->useCuda = true;
                        dptr->ptr->set_ptr((double*)d_A);
                        if(bComplex_A)
                            dptr->complex=TRUE;
                        else
                            dptr->complex=FALSE;

                        sciErr = createPointer(pvApiCtx,Rhs+posOutput, (void*)dptr);
                        if(sciErr.iErr) throw sciErr;
                        LhsVar(posOutput)=Rhs+posOutput;
                    }
                    else
                        throw "The first input argument is already a GPU variable.";

                    posOutput++;
                    break;
                }

                default : throw "Second option argument must be 0 or 1.";
            }
            // Shutdown
            status = cublasShutdown();
            if (status != CUBLAS_STATUS_SUCCESS) throw status;
        }
        #endif

        #ifdef WITH_OPENCL
        if (!useCuda())
        {
            throw "not implemented with OpenCL.";
        }
        #endif
        if(Rhs == 1)
        {
            free(option);
            option = NULL;
        }

        if(posOutput < Lhs+1)
            throw "Too many output arguments.";

        if(posOutput > Lhs+1)
            throw "Too few output arguments.";

        PutLhsVar();
        return 0;
    }
    catch(const char* str)
    {
        Scierror(999,"%s\n",str);
    }
    catch(SciErr E)
    {
        printError(&E, 0);
    }
    #ifdef WITH_CUDA
    catch(cudaError_t cudaE)
    {
        GpuError::treat_error<CUDAmode>((CUDAmode::Status)cudaE);
    }
    catch(cublasStatus CublasE)
    {
        GpuError::treat_error<CUDAmode>((CUDAmode::Status)CublasE,1);
    }
    if (useCuda())
    {
        if(inputType_A == 1 && d_A != NULL) cudaFree(d_A);
    }
    #endif
    #ifdef WITH_OPENCL
    if (!useCuda())
    {
        Scierror(999,"not implemented with OpenCL.\n");
    }
    #endif
    if(Rhs == 1 && option != NULL) free(option);
    return EXIT_FAILURE;
}
Пример #5
0
float AnimationCurve::getValue(float time) const
{
	const AnimatedKeys* pKeys = getPointer();

	if (!pKeys)
		return m_constantValue;

	const AnimatedKeys& ak = *pKeys;

	size_t keyCount = ak.keys.size();

	if (keyCount == 1) // it's constant, so return that
		return ak.keys.begin()->second;

	float value = 0.0f;

	// see if it exists
	std::map<float, float>::const_iterator itFind = ak.keys.find(time);
	if (itFind != ak.keys.end())
	{
		value = itFind->second;
		return value;
	}

	std::map<float, float>::const_iterator findLower = ak.keys.lower_bound(time);
	std::map<float, float>::const_iterator findUpper = ak.keys.upper_bound(time);

	if (findLower == ak.keys.end())
	{
		// after last frame
		std::map<float, float>::const_iterator itLastKey = --ak.keys.rbegin().base();
		value = itLastKey->second;

		return value;
	}

	if (findUpper == ak.keys.begin())
	{
		// before first frame
		value = ak.keys.begin()->second;
		return value;
	}

	if (findUpper != ak.keys.end())
	{
		std::map<float, float>::const_iterator itPrevKey = findUpper;
		--itPrevKey;

		float lowerTime = (*itPrevKey).first;
		float lowerValue = (*itPrevKey).second;

		float upperTime = (*findUpper).first;
		float upperValue = (*findUpper).second;

		float timeRatio = (time - lowerTime) / (upperTime - lowerTime);

		switch (ak.interpolationType)
		{
			default:
			case eLinearInterpolation:
				value = linearTween(timeRatio, lowerValue, upperValue);
				break;
			case eCubicInterpolation:
				value = cubicTween(timeRatio, lowerValue, upperValue);
				break;
			case eQuadraticInterpolation:
				value = quadraticTween(timeRatio, lowerValue, upperValue);
				break;
		}

		return value;
	}
	else
	{
		assert(false);
	}

	return 0.0f;
}
Пример #6
0
	string Atom::getString () const {
		stringstream buffer;

		switch (getType ()) {
		case T_BROKENHEART:
			buffer << "#[broken-heart "
			       << getValue ().mPointer
			       << "]";
			break;
		case T_EOF:
			buffer << "#[eof "
			       << getValue ().mPointer
			       << "]";
			break;
		case T_NULL:
			buffer << "#[null  "
			       << getValue ().mPointer
			       << "]";
			break;
		case T_CHAR:
			buffer << "#[char ";
			buffer << getChar ();
			buffer << "]";
			break;
		case T_INTEGER:
			buffer << "#[integer "
			       << getInteger ()
			       << "]";
			break;
		case T_BOOLEAN:
			buffer << "#[boolean "
			       << getBoolean ()
			       << "]";
			break;
		case T_PRIMITIVE_PROCEDURE:
			buffer << "#[primitive-procedure "
			       << getValue ().mInteger
			       << "]";
			break;
		case T_PORT:
			buffer << "#[port "
			       << getValue ().mInteger
			       << "]";
			break;
		case T_POINTER:
			buffer << "#[pointer "
			       << getPointer ()
			       << "]";
			break;
		case T_PAIR:
			buffer << "#[pair "
			       << getPointer ()
			       << "]";
			break;
		case T_VECTOR:
			buffer << "#[vector "
			       << getPointer ()
			       << "]";
			break;
		case T_STRING:
			buffer << "#[string "
			       << getPointer ()
			       << "]";
			break;
		case T_SYMBOL:
			buffer << "#[symbol "
			       << getPointer ()
			       << "]";
			break;
		case T_PROCEDURE:
			buffer << "#[procedure "
			       << getPointer ()
			       << "]";
			break;
		default:
			buffer << "#[unknown]";
			break;
		}
		return buffer.str ();
	}
Пример #7
0
int sci_gpuOnes(char *fname)
{
    CheckLhs(1, 1);

    void* pvPtr = NULL;
    int* piAddr = NULL;
    SciErr sciErr;
    int inputType;

    int iRows = 0;
    int iCols = 0;

    GpuPointer* gpOut = NULL;

    try
    {
        if (!isGpuInit())
        {
            throw "gpu is not initialised. Please launch gpuInit() before use this function.";
        }

        if (Rhs == 1)
        {
            sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr);
            if (sciErr.iErr)
            {
                throw sciErr;
            }

            sciErr = getVarType(pvApiCtx, piAddr, &inputType);
            if (inputType == sci_pointer)
            {
                sciErr = getPointer(pvApiCtx, piAddr, (void**)&pvPtr);
                if (sciErr.iErr)
                {
                    throw sciErr;
                }

                GpuPointer* gmat = (GpuPointer*)(pvPtr);
                if (!PointerManager::getInstance()->findGpuPointerInManager(gmat))
                {
                    throw "gpuOnes : Bad type for input argument #1. Only variables created with GPU functions allowed.";
                }

                if (useCuda() && gmat->getGpuType() != GpuPointer::CudaType)
                {
                    throw "gpuOnes : Bad type for input argument #1: A Cuda pointer expected.";
                }

                if (useCuda() == false && gmat->getGpuType() != GpuPointer::OpenCLType)
                {
                    throw "gpuOnes : Bad type for input argument #1: A OpenCL pointer expected.";
                }

                if (gmat->getDims() > 2)
                {
                    throw "gpuOnes : Hypermatrix not yet implemented.";
                }

                iRows = gmat->getRows();
                iCols = gmat->getCols();
            }
            else if (inputType == sci_matrix)
            {
                // Get size and data
                double* h;
                sciErr = getMatrixOfDouble(pvApiCtx, piAddr, &iRows, &iCols, &h);
            }
            else
            {
                throw "gpuOnes : Bad type for input argument #1 : A Matrix or GPU pointer expected.";
            }
        }
        else
        {
            if (Rhs > 2)
            {
                throw "gpuOnes : Hypermatrix not yet implemented.";
            }

            int* piDimsArray = new int[Rhs];
            for (int i = 0; i < Rhs; i++)
            {
                sciErr = getVarAddressFromPosition(pvApiCtx, i + 1, &piAddr);
                if (sciErr.iErr)
                {
                    throw sciErr;
                }

                sciErr = getVarType(pvApiCtx, piAddr, &inputType);
                if (inputType != sci_matrix)
                {
                    throw "gpuOnes : Bad type for input argument #%d : A Matrix expected.";
                }

                double* h;
                sciErr = getMatrixOfDouble(pvApiCtx, piAddr, &iRows, &iCols, &h);
                if (iRows * iCols != 1)
                {
                    char str[100];
                    sprintf(str, "gpuOnes : Wrong size for input argument #%d : A scalar expected.", i + 1);
                    throw str;
                }

                piDimsArray[i] = (int)h[0];
            }

            iRows = piDimsArray[0];
            iCols = piDimsArray[1];

            delete piDimsArray;
        }

#ifdef WITH_CUDA
        if (useCuda())
        {
            gpOut = new PointerCuda(iRows, iCols, false);
            gpOut->initMatrix(1);
        }
#endif
#ifdef WITH_OPENCL
        if (!useCuda())
        {
            Scierror(999, "gpuOnes: not implemented with OpenCL.\n");
        }
#endif

        PointerManager::getInstance()->addGpuPointerInManager(gpOut);
        sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpOut);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        LhsVar(1) = Rhs + 1;
        PutLhsVar();

        return 0;
    }
#ifdef WITH_CUDA
    catch (cudaError_t cudaE)
    {
        GpuError::treat_error<CUDAmode>((CUDAmode::Status)cudaE);
    }
#endif
    catch (const char* str)
    {
        Scierror(999, "%s\n", str);
    }
    catch (SciErr E)
    {
        printError(&E, 0);
    }

    return EXIT_FAILURE;
}
Пример #8
0
int sci_gpuMatrix(char *fname)
{
    CheckRhs(2, 3);
    CheckLhs(1, 1);

    SciErr sciErr;

    int*    piAddr_A    = NULL;
    int     inputType_A = 0;
    int*    piAddr_R    = NULL;
    int     inputType_R = 0;
    int*    piAddr_C    = NULL;
    int     inputType_C = 0;

    int     rows        = 0;
    int     cols        = 0;
    int     newRows     = 0;
    int     newCols     = 0;

    void*   pvPtr       = NULL;
    GpuPointer* gpuPtrA = NULL;

    try
    {
        if (!isGpuInit())
        {
            throw "gpu is not initialised. Please launch gpuInit() before use this function.";
        }

        //--- Get input matrix ---
        sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        // Get size and data
        sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        //--- Get new Rows size or vector of sizes---
        sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_R);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        // Get size and data
        sciErr = getVarType(pvApiCtx, piAddr_R, &inputType_R);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        if (inputType_R != sci_matrix)
        {
            throw "gpuMatrix : Bad type for input argument #2: A real scalar or row vector expected.";
        }

        if (isVarComplex(pvApiCtx, piAddr_A))
        {
            throw "gpuMatrix : Bad type for input argument #2: A real scalar or row vector expected.";
        }
        else
        {
            double* dRows = NULL;
            sciErr = getMatrixOfDouble(pvApiCtx, piAddr_R, &rows, &cols, &dRows);
            if (sciErr.iErr)
            {
                throw sciErr;
            }
            if (nbInputArgument(pvApiCtx) == 2)
            {
                if (rows != 1 || cols != 2)
                {
                    throw "gpuMatrix : Bad size for input argument #2: A row vector of size two expected.";
                }

                newRows = (int)dRows[0];
                newCols = (int)dRows[1];

                if (newCols < -1 || newCols == 0)
                {
                    throw "gpuMatrix : Wrong value for input argument #3: -1 or positive value expected.";
                }
            }
            else
            {
                newRows = (int)(*dRows);
            }

            if (newRows < -1 || newRows == 0)
            {
                throw "gpuMatrix : Wrong value for input argument #2: -1 or positive value expected.";
            }
        }

        if (nbInputArgument(pvApiCtx) == 3)
        {
            //--- Get new Cols size---
            sciErr = getVarAddressFromPosition(pvApiCtx, 3, &piAddr_C);
            if (sciErr.iErr)
            {
                throw sciErr;
            }

            // Get size and data
            sciErr = getVarType(pvApiCtx, piAddr_C, &inputType_C);
            if (sciErr.iErr)
            {
                throw sciErr;
            }

            if (inputType_C != sci_matrix)
            {
                throw "gpuMatrix : Bad type for input argument #3: A real scalar expected.";
            }

            if (isVarComplex(pvApiCtx, piAddr_A))
            {
                throw "gpuMatrix : Bad type for input argument #3: A real scalar expected.";
            }
            else
            {
                double* dCols = NULL;
                sciErr = getMatrixOfDouble(pvApiCtx, piAddr_C, &rows, &cols, &dCols);
                if (sciErr.iErr)
                {
                    throw sciErr;
                }

                newCols = (int)(*dCols);

                if (newCols < -1 || newCols == 0)
                {
                    throw "gpuMatrix : Wrong value for input argument #3: -1 or positive value expected.";
                }
            }
        }

        if (inputType_A == sci_pointer)
        {
            sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtr);
            if (sciErr.iErr)
            {
                throw sciErr;
            }

            gpuPtrA = (GpuPointer*)pvPtr;
            if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrA))
            {
                throw "gpuMatrix : Bad type for input argument #1: Variables created with GPU functions expected.";
            }

            if (useCuda() && gpuPtrA->getGpuType() != GpuPointer::CudaType)
            {
                throw "gpuMatrix : Bad type for input argument #1: A Cuda pointer expected.";
            }

            if (useCuda() == false && gpuPtrA->getGpuType() != GpuPointer::OpenCLType)
            {
                throw "gpuMatrix : Bad type for input argument #1: A OpenCL pointer expected.";
            }

            rows = gpuPtrA->getRows();
            cols = gpuPtrA->getCols();
        }
        else if (inputType_A == sci_matrix)
        {
            double* h = NULL;
            if (isVarComplex(pvApiCtx, piAddr_A))
            {
                double* hi = NULL;
                sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h, &hi);
#ifdef WITH_CUDA
                if (useCuda())
                {
                    gpuPtrA = new PointerCuda(h, hi, rows, cols);
                }
#endif
#ifdef WITH_OPENCL
                if (!useCuda())
                {
                    Scierror(999, "gpuMatrix: not implemented with OpenCL.\n");
                }
#endif
            }
            else
            {
                sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h);
#ifdef WITH_CUDA
                if (useCuda())
                {
                    gpuPtrA = new PointerCuda(h, rows, cols);
                }
#endif
#ifdef WITH_OPENCL
                if (!useCuda())
                {
                    Scierror(999, "gpuMatrix: not implemented with OpenCL.\n");
                }
#endif
            }

            if (sciErr.iErr)
            {
                throw sciErr;
            }
        }
        else
        {
            throw "gpuMatrix : Bad type for input argument #1: A GPU or CPU matrix expected.";
        }

        if (newRows == -1 && newCols != -1)
        {
            newRows = rows * cols / newCols;
        }
        else if (newRows != -1 && newCols == -1)
        {
            newCols = rows * cols / newRows;
        }

        if (rows * cols != newRows * newCols)
        {
            throw "gpuMatrix : Wrong value for input arguments #2 and 3: Correct size expected.";
        }

#ifdef WITH_OPENCL
        if (!useCuda())
        {
            Scierror(999, "gpuMatrix: not implemented with OpenCL.\n");
        }
#endif

        GpuPointer* gpuOut = gpuPtrA->clone();
        gpuOut->setRows(newRows);
        gpuOut->setCols(newCols);

        // Put the result in scilab
        PointerManager::getInstance()->addGpuPointerInManager(gpuOut);
        sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpuOut);
        LhsVar(1) = Rhs + 1;

        if (inputType_A == 1 && gpuPtrA != NULL)
        {
            delete gpuPtrA;
        }

        PutLhsVar();
        return 0;
    }
    catch (const char* str)
    {
        Scierror(999, "%s\n", str);
    }
    catch (SciErr E)
    {
        printError(&E, 0);
    }

    if (inputType_A == 1 && gpuPtrA != NULL)
    {
        delete gpuPtrA;
    }

    return EXIT_FAILURE;
}
/*  
 * Subtract From Pointer Immediate
 * Pn <- Pn - XX, X: {0 .. 9}.
 * 
 * 02 Pn XX
 *
 * Pointer decremented by integer XX
 */
void o2(Vm* vm){
    setPointer(vm, charToInt(vm->IR[3]), getPointer(vm, charToInt(vm->IR[3])) - opToInt(getOp(4,vm->IR)));
}
Пример #10
0
void RenderingEngine::generateShandowMaps(std::shared_ptr<BaseLight> light, GameObject* root){


	auto shandowInfo = light->getShandowInfo();


	int shandowMapIndex = 0;

	lightMatrix.identity();

	if (shandowInfo) shandowMapIndex = shandowInfo->getShandowMapSizeAsPowerOf2() - 1;

	setTexture("transparencyShandowMap", transparencyShandowMaps.at(shandowMapIndex));
	transparencyShandowMaps.at(shandowMapIndex)->bindAsRenderTarget();
	glClearColor(1, 1, 0, 0);
	glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);


	setTexture("shandowMap", shandowMaps.at(shandowMapIndex));
	shandowMaps.at(shandowMapIndex)->bindAsRenderTarget();
	glClearColor(1, 1, 0, 0);
	glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);

	getTexture("transparencyShandowColorBuffer")->bindAsRenderTarget();
	glClearColor(0, 0, 0, 0);
	glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);


	if (shandowInfo){

		altCamera->setProjection(shandowInfo->getProjection());
		altCamera->getTransform()->setPosition(light->getTransform()->getWorldPosition());
		altCamera->getTransform()->setRotation(light->getTransform()->getWorldRotation());


		lightMatrix = RenderingEngine::biasMatrix * altCamera->getViewProjection();

		setFloat("shandowVarianceMin", shandowInfo->getMinVariance());
		setFloat("shandowLightBleedReduction", shandowInfo->getLightBleedReduction());
		bool flip = shandowInfo->getFlipfaces();


		


		Camera* temp = mainCamera;
		mainCamera = altCamera.get();
		glEnable(GL_CULL_FACE);

		if (flip) glCullFace(GL_FRONT);

		shandowMaps.at(shandowMapIndex)->bindAsRenderTarget();

		renderAllAlpha(false, scast(getPointer("depthMapGenerator")), root);



		transparencyShandowMaps.at(shandowMapIndex)->bindAsRenderTarget();

		renderAllAlpha(true, scast(getPointer("depthMapGenerator")), root);

		getTexture("transparencyShandowColorBuffer")->bindAsRenderTarget();

		renderAllAlpha(true, scast(getPointer("defaultShader")), root);



		if (flip) glCullFace(GL_BACK);

		glDisable(GL_CULL_FACE);
		mainCamera = temp;

		if (shandowInfo->getShandowSoftness() != 0){
		//	blurShandowMap(shandowMapIndex, shandowInfo->getShandowSoftness(), pcast(getPointer("gausBlurFilter")));
		}

	}
	else{
		setFloat("shandowVarianceMin", 0.002f);
		setFloat("shandowLightBleedReduction", 0.2f);


	}






}
Пример #11
0
Long QueuePtr::pack(void *space, short isSpacePtr)
{
  if(getPointer()) getPointer()->pack(space);
  return packShallow(space,isSpacePtr);
}
Пример #12
0
int sci_umf_lusolve(char* fname, unsigned long l)
{
    SciErr sciErr;

    int mb      = 0;
    int nb      = 0;
    int it_flag = 0;
    int i       = 0;
    int j       = 0;

    int NoTranspose = 0;
    int NoRaffinement = 0;
    SciSparse AA;
    CcsSparse A;

    /* umfpack stuff */
    double Info[UMFPACK_INFO]; // double *Info = (double *) NULL;
    double Control[UMFPACK_CONTROL];
    void* Numeric = NULL;
    int lnz = 0, unz = 0, n = 0, n_col = 0, nz_udiag = 0, umf_flag = 0;
    int* Wi = NULL;
    int mW = 0;
    double *W = NULL;

    int iComplex = 0;

    int* piAddr1 = NULL;
    int* piAddr2 = NULL;
    int* piAddr3 = NULL;
    int* piAddr4 = NULL;

    double* pdblBR = NULL;
    double* pdblBI = NULL;
    double* pdblXR = NULL;
    double* pdblXI = NULL;

    int mA              = 0; // rows
    int nA              = 0; // cols
    int iNbItem         = 0;
    int* piNbItemRow    = NULL;
    int* piColPos       = NULL;
    double* pdblSpReal  = NULL;
    double* pdblSpImg   = NULL;

    /* Check numbers of input/output arguments */
    CheckInputArgument(pvApiCtx, 2, 4);
    CheckOutputArgument(pvApiCtx, 1, 1);

    /* First get arg #1 : the pointer to the LU factors */
    sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr1);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    sciErr = getPointer(pvApiCtx, piAddr1, &Numeric);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    /* Check if this pointer is a valid ref to a umfpack LU numeric object */
    if ( ! IsAdrInList(Numeric, ListNumeric, &it_flag) )
    {
        Scierror(999, _("%s: Wrong value for input argument #%d: Must be a valid reference to (umf) LU factors.\n"), fname, 1);
        return 1;
    }

    /*  get some parameters of the factorization (for some checking) */
    if ( it_flag == 0 )
    {
        umfpack_di_get_lunz(&lnz, &unz, &n, &n_col, &nz_udiag, Numeric);
    }
    else
    {
        iComplex = 1;
        umfpack_zi_get_lunz(&lnz, &unz, &n, &n_col, &nz_udiag, Numeric);
    }

    if ( n != n_col )
    {
        Scierror(999, _("%s: An error occurred: %s.\n"), fname, _("This is not a factorization of a square matrix"));
        return 1;
    }

    if ( nz_udiag < n )
    {
        Scierror(999, _("%s: An error occurred: %s.\n"), fname, _("This is a factorization of a singular matrix"));
        return 1;
    }

    /* Get now arg #2 : the vector b */
    sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr2);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    if (isVarComplex(pvApiCtx, piAddr2))
    {
        iComplex = 1;
        sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr2, &mb, &nb, &pdblBR, &pdblBI);
    }
    else
    {
        sciErr = getMatrixOfDouble(pvApiCtx, piAddr2, &mb, &nb, &pdblBR);
    }

    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    if (mb != n || nb < 1)    /* test if the right hand side is compatible */
    {
        Scierror(999, _("%s: Wrong size for input argument #%d.\n"), fname, 2);
        return 1;
    }

    /* allocate memory for the solution x */
    if (iComplex)
    {
        sciErr = allocComplexMatrixOfDouble(pvApiCtx, nbInputArgument(pvApiCtx) + 1, mb, nb, &pdblXR, &pdblXI);
    }
    else
    {
        sciErr = allocMatrixOfDouble(pvApiCtx, nbInputArgument(pvApiCtx) + 1, mb, nb, &pdblXR);
    }

    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    /*  selection between the different options :
     *   -- solving Ax=b or A'x=b (Note: we could add  A.'x=b)
     *   -- with or without raffinement
     */

    if (nbInputArgument(pvApiCtx) == 2)
    {
        NoTranspose = 1;
        NoRaffinement = 1;
    }
    else  /* 3 or 4 input arguments but the third must be a string */
    {
        char* pStr = NULL;
        sciErr = getVarAddressFromPosition(pvApiCtx, 3, &piAddr3);
        if (sciErr.iErr)
        {
            printError(&sciErr, 0);
            return 1;
        }

        getAllocatedSingleString(pvApiCtx, piAddr3, &pStr);
        if (strcmp(pStr, "Ax=b") == 0)
        {
            NoTranspose = 1;
        }
        else if ( strcmp(pStr, "A'x=b") == 0 )
        {
            NoTranspose = 0;
        }
        else
        {
            Scierror(999, _("%s: Wrong input argument #%d: '%s' or '%s' expected.\n"), fname, 3, "Ax=b", "A'x=b");
            return 1;
        }

        if (nbInputArgument(pvApiCtx) == 4)
        {
            sciErr = getVarAddressFromPosition(pvApiCtx, 4, &piAddr4);
            if (sciErr.iErr)
            {
                printError(&sciErr, 0);
                return 1;
            }

            if (isVarComplex(pvApiCtx, piAddr4))
            {
                AA.it = 1;
                sciErr = getComplexSparseMatrix(pvApiCtx, piAddr4, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal, &pdblSpImg);
            }
            else
            {
                AA.it = 0;
                sciErr = getSparseMatrix(pvApiCtx, piAddr4, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal);
            }

            if (sciErr.iErr)
            {
                printError(&sciErr, 0);
                return 1;
            }

            // fill struct sparse
            AA.m     = mA;
            AA.n     = nA;
            AA.nel   = iNbItem;
            AA.mnel  = piNbItemRow;
            AA.icol  = piColPos;
            AA.R     = pdblSpReal;
            AA.I     = pdblSpImg;

            /*  some check... but we can't be sure that the matrix corresponds to the LU factors */
            if ( mA != nA || mA != n || AA.it != it_flag )
            {
                Scierror(999, _("%s: Wrong size for input argument #%d: %s.\n"), fname, 4, _("Matrix is not compatible with the given LU factors"));
                return 1;
            }

            NoRaffinement = 0;
        }
        else
        {
            NoRaffinement = 1;   /* only 3 input var => no raffinement */
        }
    }

    /* allocate memory for umfpack_di_wsolve usage or umfpack_zi_wsolve usage*/
    Wi = (int*)MALLOC(n * sizeof(int));

    if (it_flag == 1)
    {
        if (NoRaffinement)
        {
            mW = 4 * n;
        }
        else
        {
            mW = 10 * n;
        }
    }
    else
    {
        if (NoRaffinement)
        {
            mW = n;
        }
        else
        {
            mW = 5 * n;
        }
    }

    W = (double*)MALLOC(mW * sizeof(double));

    if (NoRaffinement == 0)
    {
        SciSparseToCcsSparse(&AA, &A);
    }
    else
    {
        A.p = NULL;
        A.irow = NULL;
        A.R = NULL;
        A.I = NULL;
    }

    /* get the pointer for b */
    if (it_flag == 1  &&  pdblBI == NULL)
    {
        int iSize = mb * nb * sizeof(double);
        pdblBI = (double*)MALLOC(iSize);
        memset(pdblBI, 0x00, iSize);
    }

    /* init Control */
    if (it_flag == 0)
    {
        umfpack_di_defaults(Control);
    }
    else
    {
        umfpack_zi_defaults(Control);
    }

    if (NoRaffinement)
    {
        Control[UMFPACK_IRSTEP] = 0;
    }

    if (NoTranspose)
    {
        umf_flag = UMFPACK_A;
    }
    else
    {
        umf_flag = UMFPACK_At;
    }

    if (it_flag == 0)
    {
        for (j = 0; j < nb ; j++)
        {
            umfpack_di_wsolve(umf_flag, A.p, A.irow, A.R, &pdblXR[j * mb], &pdblBR[j * mb], Numeric, Control, Info, Wi, W);
        }

        if (iComplex == 1)
        {
            for (j = 0; j < nb ; j++)
            {
                umfpack_di_wsolve(umf_flag, A.p, A.irow, A.R, &pdblXI[j * mb], &pdblBI[j * mb], Numeric, Control, Info, Wi, W);
            }
        }
    }
    else
    {
        for (j = 0; j < nb ; j++)
        {
            umfpack_zi_wsolve(umf_flag, A.p, A.irow, A.R, A.I, &pdblXR[j * mb], &pdblXI[j * mb], &pdblBR[j * mb], &pdblBI[j * mb], Numeric, Control, Info, Wi, W);
        }
    }

    if (isVarComplex(pvApiCtx, piAddr2) == 0)
    {
        FREE(pdblBI);
    }

    freeCcsSparse(A);

    FREE(W);
    FREE(Wi);

    AssignOutputVariable(pvApiCtx, 1) = nbInputArgument(pvApiCtx) + 1;
    ReturnArguments(pvApiCtx);
    return 0;
}
Пример #13
0
void CodeGenFunction::EmitAggregateAssignment(const Expr *LHS, const Expr *RHS) {
  auto Val = EmitAggregateExpr(RHS);
  auto Dest = EmitLValue(LHS);
  Builder.CreateStore(Builder.CreateLoad(Val.getAggregateAddr(), Val.isVolatileQualifier()),
                      Dest.getPointer(), Dest.isVolatileQualifier());
}
Пример #14
0
bool AnimationCurve::isAnimated() const
{
	return (getPointer() != NULL);
}
inline EnumValue
PropertyAliases::getPropertyEnum(const char* alias) const {
    NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset);
    return n2e->getEnum(alias, *this);
}
/*  
 * Store Accumulator Register Addresing
 * M(Pn) <- AC.
 * 
 * 06 Pn --
 *
 * Store contents of accumulator to the memory location held in Pn
 */
void o6(Vm* vm){
    intToCharArray(vm->ACC, vm->memory[getPointer(vm, charToInt(vm->IR[3]))]);
}
Пример #17
0
  JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMACH_LSTMbwd
  (JNIEnv *env, jobject obj, jobject jinC, jobject jLIN1, jobject jLIN2, jobject jLIN3, jobject jLIN4, jobject jdoutC, jobject jdoutH, 
   jobject jdinC, jobject jdLIN1, jobject jdLIN2, jobject jdLIN3, jobject jdLIN4, jint n)
  {
    float *inC = (float*)getPointer(env, jinC);
    float *LIN1 = (float*)getPointer(env, jLIN1);
    float *LIN2 = (float*)getPointer(env, jLIN2);
    float *LIN3 = (float*)getPointer(env, jLIN3);
    float *LIN4 = (float*)getPointer(env, jLIN4);
    float *doutC = (float*)getPointer(env, jdoutC);
    float *doutH = (float*)getPointer(env, jdoutH);
    float *dinC = (float*)getPointer(env, jdinC);
    float *dLIN1 = (float*)getPointer(env, jdLIN1);
    float *dLIN2 = (float*)getPointer(env, jdLIN2);
    float *dLIN3 = (float*)getPointer(env, jdLIN3);
    float *dLIN4 = (float*)getPointer(env, jdLIN4);

    return lstm_bwd(inC, LIN1, LIN2, LIN3, LIN4, doutC, doutH, dinC, dLIN1, dLIN2, dLIN3, dLIN4, n);
  }
/*  
 * Store Register to memory: Register Addressing
 * M(Pn) <- Rn.
 * 
 * 08 Rn Pn
 *
 * Store contents of Register Rn into memory address pointed to by Pn, n{0..3}
 */
void o8(Vm* vm){
    intToCharArray(getRegister(vm, charToInt(vm->IR[3])), vm->memory[getPointer(vm, charToInt(vm->IR[5]))]);
}
Пример #19
0
void
rbffi_SetupCallParams(int argc, VALUE* argv, int paramCount, Type** paramTypes,
        FFIStorage* paramStorage, void** ffiValues,
        VALUE* callbackParameters, int callbackCount, VALUE enums)
{
    VALUE callbackProc = Qnil;
    FFIStorage* param = &paramStorage[0];
    int i, argidx, cbidx, argCount;

    if (unlikely(paramCount != -1 && paramCount != argc)) {
        if (argc == (paramCount - 1) && callbackCount == 1 && rb_block_given_p()) {
            callbackProc = rb_block_proc();
        } else {
            rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, paramCount);
        }
    }

    argCount = paramCount != -1 ? paramCount : argc;

    for (i = 0, argidx = 0, cbidx = 0; i < argCount; ++i) {
        Type* paramType = paramTypes[i];
        int type;

        
        if (unlikely(paramType->nativeType == NATIVE_MAPPED)) {
            VALUE values[] = { argv[argidx], Qnil };
            argv[argidx] = rb_funcall2(((MappedType *) paramType)->rbConverter, id_to_native, 2, values);
            paramType = ((MappedType *) paramType)->type;
        }

        type = argidx < argc ? TYPE(argv[argidx]) : T_NONE;
        ffiValues[i] = param;

        switch (paramType->nativeType) {

            case NATIVE_INT8:
                param->s8 = NUM2INT(argv[argidx]);
                ++argidx;
                ADJ(param, INT8);
                break;


            case NATIVE_INT16:
                param->s16 = NUM2INT(argv[argidx]);
                ++argidx;
                ADJ(param, INT16);
                break;


            case NATIVE_INT32:
                if (unlikely(type == T_SYMBOL && enums != Qnil)) {
                    VALUE value = rb_funcall(enums, id_map_symbol, 1, argv[argidx]);
                    param->s32 = NUM2INT(value);

                } else {
                    param->s32 = NUM2INT(argv[argidx]);
                }

                ++argidx;
                ADJ(param, INT32);
                break;


            case NATIVE_BOOL:
                if (type != T_TRUE && type != T_FALSE) {
                    rb_raise(rb_eTypeError, "wrong argument type  (expected a boolean parameter)");
                }
                param->s8 = argv[argidx++] == Qtrue;
                ADJ(param, INT8);
                break;


            case NATIVE_UINT8:
                param->u8 = NUM2UINT(argv[argidx]);
                ADJ(param, INT8);
                ++argidx;
                break;


            case NATIVE_UINT16:
                param->u16 = NUM2UINT(argv[argidx]);
                ADJ(param, INT16);
                ++argidx;
                break;


            case NATIVE_UINT32:
                param->u32 = NUM2UINT(argv[argidx]);
                ADJ(param, INT32);
                ++argidx;
                break;


            case NATIVE_INT64:
                param->i64 = NUM2LL(argv[argidx]);
                ADJ(param, INT64);
                ++argidx;
                break;


            case NATIVE_UINT64:
                param->u64 = NUM2ULL(argv[argidx]);
                ADJ(param, INT64);
                ++argidx;
                break;

            case NATIVE_LONG:
                *(ffi_sarg *) param = NUM2LONG(argv[argidx]);
                ADJ(param, LONG);
                ++argidx;
                break;

            case NATIVE_ULONG:
                *(ffi_arg *) param = NUM2ULONG(argv[argidx]);
                ADJ(param, LONG);
                ++argidx;
                break;

            case NATIVE_FLOAT32:
                param->f32 = (float) NUM2DBL(argv[argidx]);
                ADJ(param, FLOAT32);
                ++argidx;
                break;

            case NATIVE_FLOAT64:
                param->f64 = NUM2DBL(argv[argidx]);
                ADJ(param, FLOAT64);
                ++argidx;
                break;

            case NATIVE_LONGDOUBLE:
                param->ld = rbffi_num2longdouble(argv[argidx]);
                ADJ(param, LONGDOUBLE);
                ++argidx;
                break;


            case NATIVE_STRING:
                if (type == T_NIL) {
                    param->ptr = NULL; 
                
                } else {
                    if (rb_safe_level() >= 1 && OBJ_TAINTED(argv[argidx])) {
                        rb_raise(rb_eSecurityError, "Unsafe string parameter");
                    }

                    param->ptr = StringValueCStr(argv[argidx]);
                }

                ADJ(param, ADDRESS);
                ++argidx;
                break;

            case NATIVE_POINTER:
            case NATIVE_BUFFER_IN:
            case NATIVE_BUFFER_OUT:
            case NATIVE_BUFFER_INOUT:
                param->ptr = getPointer(argv[argidx++], type);
                ADJ(param, ADDRESS);
                break;


            case NATIVE_FUNCTION:
            case NATIVE_CALLBACK:
                if (callbackProc != Qnil) {
                    param->ptr = callback_param(callbackProc, callbackParameters[cbidx++]);
                } else {
                    param->ptr = callback_param(argv[argidx], callbackParameters[cbidx++]);
                    ++argidx;
                }
                ADJ(param, ADDRESS);
                break;

            case NATIVE_STRUCT:
                ffiValues[i] = getPointer(argv[argidx++], type);
                break;

            default:
                rb_raise(rb_eArgError, "Invalid parameter type: %d", paramType->nativeType);
        }
    }
}
/*  
 * Load Register from memory: Register Addressing
 * Rn <- M(Pn).
 * 
 * 10 Rn Pn
 *
 * Load Register Rn with the contents of memory location pointed to by Pn, n{0..3}
 */
void o10(Vm* vm){
    setRegister(vm, charToInt(vm->IR[3]), charArrayToInt(0,6,vm->memory[getPointer(vm, charToInt(vm->IR[5]))]));     
}
Пример #21
0
int sci_taucs_chsolve(char* fname, void* pvApiCtx)
{
    SciErr sciErr;

    int mb = 0, nb = 0;
    int i = 0, j = 0, n = 0, it_flag = 0, Refinement = 0;
    double norm_res = 0., norm_res_bis = 0.;
    long double *wk = NULL;
    int A_is_upper_triangular = 0;
    taucs_handle_factors * pC = NULL;

    SciSparse A;
    int mA              = 0; // rows
    int nA              = 0; // cols
    int iNbItem         = 0;
    int* piNbItemRow    = NULL;
    int* piColPos       = NULL;
    double* pdblSpReal  = NULL;
    double* pdblSpImg   = NULL;
    int iComplex        = 0;

    int* piAddr1 = NULL;
    int* piAddr2 = NULL;
    int* piAddr3 = NULL;

    void* pvPtr     = NULL;
    double* pdblB   = NULL;
    double* pdblX   = NULL;
    double* pdblV   = NULL;
    double* pdblRes = NULL;

    /* Check numbers of input/output arguments */
    CheckInputArgument(pvApiCtx, 2, 3);
    CheckOutputArgument(pvApiCtx, 1, 1);

    /* First get arg #1 : the pointer to the Cholesky factors */
    sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr1);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    sciErr = getPointer(pvApiCtx, piAddr1, &pvPtr);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    pC = (taucs_handle_factors *)pvPtr;

    /* Check if this pointer is a valid ref to a Cholesky factor object */
    if ( ! IsAdrInList( (Adr)pC, ListCholFactors, &it_flag) )
    {
        Scierror(999, _("%s: Wrong value for input argument #%d: not a valid reference to Cholesky factors"), fname, 1);
        return 1;
    }

    /*  the number of rows/lines of the matrix  */
    n = pC->n;
    /* Get now arg #2 : the vector b */
    sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr2);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    sciErr = getMatrixOfDouble(pvApiCtx, piAddr2, &mb, &nb, &pdblB);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    /* test if the right hand side is compatible */
    if (mb != n || nb < 1)
    {
        Scierror(999, _("%s: Wrong size for input argument #%d.\n"), fname, 2);
        return 1;
    }

    if (Rhs == 3)
    {
        sciErr = getVarAddressFromPosition(pvApiCtx, 3, &piAddr3);
        if (sciErr.iErr)
        {
            printError(&sciErr, 0);
            return 1;
        }

        if (isVarComplex(pvApiCtx, piAddr3))
        {
            Scierror(999, _("%s: Wrong type for input argument #%d: not compatible with the Cholesky factorization.\n"), fname, 3);
            return 1;
        }

        sciErr = getSparseMatrix(pvApiCtx, piAddr3, &mA, &nA, &iNbItem, &piNbItemRow, &piColPos, &pdblSpReal);

        if (sciErr.iErr)
        {
            printError(&sciErr, 0);
            return 1;
        }

        // fill struct sparse
        A.m     = mA;
        A.n     = nA;
        A.it    = iComplex;
        A.nel   = iNbItem;
        A.mnel  = piNbItemRow;
        A.icol  = piColPos;
        A.R     = pdblSpReal;
        A.I     = pdblSpImg;

        if (mA != nA || mA != n)
        {
            Scierror(999, _("%s: Wrong size for input argument #%d: not compatible with the Cholesky factorization.\n"), fname, 3);
            return 1;
        }

        Refinement = 1;
        A_is_upper_triangular = is_sparse_upper_triangular(&A);
    }
    else
    {
        Refinement = 0;
    }

    /* allocate memory for the solution x */
    sciErr = allocMatrixOfDouble(pvApiCtx, nbInputArgument(pvApiCtx) + 1, mb, nb, &pdblX);
    if (sciErr.iErr)
    {
        printError(&sciErr, 0);
        return 1;
    }

    if (Refinement)
    {
        pdblRes = (double*)MALLOC(mb * sizeof(double));
        if ( A_is_upper_triangular )
        {
            if ( (wk = (long double*)MALLOC( n * sizeof(long double))) == NULL )
            {
                if (pdblRes)
                {
                    FREE(pdblRes);
                }
                Scierror(999, _("%s: not enough memory.\n"), fname);
                return 1;
            }
        }
    }
    
    /* allocate memory for a temporary vector v */
    pdblV = (double*)MALLOC(mb * sizeof(double));

    for (j = 0; j < nb ; j++)
    {
        taucs_vec_permute(n, &pdblB[j * mb], &pdblX[j * mb], pC->p);
        taucs_supernodal_solve_llt(pC->C, pdblV, &pdblX[j * mb]); /* FIXME : add a test here */
        taucs_vec_ipermute(n, pdblV, &pdblX[j * mb], pC->p);
        if (Refinement)
        {
            /* do one iterative refinement */
            residu_with_prec_for_chol(&A, &pdblX[j * mb], &pdblV[j * mb], pdblRes, &norm_res, A_is_upper_triangular, wk);
            /*  FIXME: do a test if the norm_res has an anormal value and send a warning
             *         (the user has certainly not give the good matrix A
             */
            taucs_vec_permute(n, pdblRes, pdblV, pC->p);
            taucs_supernodal_solve_llt(pC->C, pdblRes, pdblV);  /* FIXME : add a test here */
            taucs_vec_ipermute(n, pdblRes, pdblV, pC->p);
            for ( i = 0 ; i < n ; i++ )
            {
                pdblV[i] = pdblX[j * mb + i] - pdblV[i]; /* v is the refined solution */
            }

            residu_with_prec_for_chol(&A, pdblV, &pdblB[j * mb], pdblRes, &norm_res_bis, A_is_upper_triangular, wk);
            /* accept it if the 2 norm of the residual is improved */
            if ( norm_res_bis < norm_res )
            {
                for ( i = 0 ; i < n ; i++ )
                {
                    pdblX[j * mb + i] = pdblV[i];
                }
            }
        }
    }

    FREE(wk);
    FREE(pdblV);
    FREE(pdblRes);

    AssignOutputVariable(pvApiCtx, 1) = nbInputArgument(pvApiCtx) + 1;
    ReturnArguments(pvApiCtx);
    return 0;
}
/*  
 * Subtract from Accumulator Register Addressing 
 * AC <- AC - M(Pn).
 * 
 * 22 Pn -- 
 *
 * Subtract from accumulator contents of memory location XX
 */
void o22(Vm* vm){
    vm->ACC -= charArrayToInt(0, 6, vm->memory[getPointer(vm, charToInt(vm->IR[3]))]);
}
Пример #23
0
int sci_gpuKronecker(char *fname)
{
    CheckRhs(2, 2);
    CheckLhs(1, 1);

    SciErr sciErr;

    int*    piAddr_A    = NULL;
    int*    piAddr_B    = NULL;

    GpuPointer* gpuPtrA = NULL;
    GpuPointer* gpuPtrB = NULL;
    GpuPointer* gpuPtrC = NULL;

    double* h           = NULL;
    double* hi          = NULL;
    int rows            = 0;
    int cols            = 0;

    void* pvPtrA        = NULL;
    void* pvPtrB        = NULL;

    int inputType_A;
    int inputType_B;

    try
    {
        if (!isGpuInit())
        {
            throw "gpu is not initialised. Please launch gpuInit() before use this function.";
        }

        sciErr = getVarAddressFromPosition(pvApiCtx, 1, &piAddr_A);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        sciErr = getVarAddressFromPosition(pvApiCtx, 2, &piAddr_B);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        /* ---- Check type of arguments and get data ---- */
        /*                                                */
        /*  Pointer to host / Pointer to device           */
        /*  Matrix real / Matrix complex                  */
        /*                                                */
        /* ---------------------------------------------- */

        sciErr = getVarType(pvApiCtx, piAddr_A, &inputType_A);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        sciErr = getVarType(pvApiCtx, piAddr_B, &inputType_B);
        if (sciErr.iErr)
        {
            throw sciErr;
        }

        if (inputType_A == sci_pointer)
        {
            sciErr = getPointer(pvApiCtx, piAddr_A, (void**)&pvPtrA);
            if (sciErr.iErr)
            {
                throw sciErr;
            }

            gpuPtrA = (GpuPointer*)pvPtrA;
            if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrA))
            {
                throw "gpuKronecker : Bad type for input argument #1: Variables created with GPU functions expected.";
            }

            if (useCuda() && gpuPtrA->getGpuType() != GpuPointer::CudaType)
            {
                throw "gpuKronecker : Bad type for input argument #1: A Cuda pointer expected.";
            }

            if (useCuda() == false && gpuPtrA->getGpuType() != GpuPointer::OpenCLType)
            {
                throw "gpuKronecker : Bad type for input argument #1: A OpenCL pointer expected.";
            }
        }
        else if (inputType_A == sci_matrix)
        {
            if (isVarComplex(pvApiCtx, piAddr_A))
            {
                sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h, &hi);
                if (sciErr.iErr)
                {
                    throw sciErr;
                }
#ifdef WITH_CUDA
                if (useCuda())
                {
                    gpuPtrA = new PointerCuda(h, hi, rows, cols);
                }
#endif
#ifdef WITH_OPENCL
                if (!useCuda())
                {
                    throw "gpuKronecker: not implemented with OpenCL.";
                }
#endif
            }
            else
            {
                sciErr = getMatrixOfDouble(pvApiCtx, piAddr_A, &rows, &cols, &h);
                if (sciErr.iErr)
                {
                    throw sciErr;
                }
#ifdef WITH_CUDA
                if (useCuda())
                {
                    gpuPtrA = new PointerCuda(h, rows, cols);
                }
#endif
#ifdef WITH_OPENCL
                if (!useCuda())
                {
                    throw "gpuKronecker: not implemented with OpenCL.";
                }
#endif
            }
        }
        else
        {
            throw "gpuKronecker : Bad type for input argument #1: A GPU or CPU matrix expected.";
        }

        if (inputType_B == sci_pointer)
        {
            sciErr = getPointer(pvApiCtx, piAddr_B, (void**)&pvPtrB);
            if (sciErr.iErr)
            {
                throw sciErr;
            }

            gpuPtrB = (GpuPointer*)pvPtrB;
            if (!PointerManager::getInstance()->findGpuPointerInManager(gpuPtrB))
            {
                throw "gpuKronecker : Bad type for input argument #2: Variables created with GPU functions expected.";
            }

            if (useCuda() && gpuPtrB->getGpuType() != GpuPointer::CudaType)
            {
                throw "gpuKronecker : Bad type for input argument #2: A Cuda pointer expected.";
            }

            if (useCuda() == false && gpuPtrB->getGpuType() != GpuPointer::OpenCLType)
            {
                throw "gpuKronecker : Bad type for input argument #2: A OpenCL pointer expected.";
            }
        }
        else if (inputType_B == sci_matrix)
        {
            if (isVarComplex(pvApiCtx, piAddr_B))
            {
                sciErr = getComplexMatrixOfDouble(pvApiCtx, piAddr_B, &rows, &cols, &h, &hi);
                if (sciErr.iErr)
                {
                    throw sciErr;
                }
#ifdef WITH_CUDA
                if (useCuda())
                {
                    gpuPtrB = new PointerCuda(h, hi, rows, cols);
                }
#endif
#ifdef WITH_OPENCL
                if (!useCuda())
                {
                    Scierror(999, "gpuKronecker: not implemented with OpenCL.\n");
                }
#endif
            }
            else
            {
                sciErr = getMatrixOfDouble(pvApiCtx, piAddr_B, &rows, &cols, &h);
                if (sciErr.iErr)
                {
                    throw sciErr;
                }
#ifdef WITH_CUDA
                if (useCuda())
                {
                    gpuPtrB = new PointerCuda(h, rows, cols);
                }
#endif
#ifdef WITH_OPENCL
                if (!useCuda())
                {
                    Scierror(999, "gpuKronecker: not implemented with OpenCL.\n");
                }
#endif
            }
        }
        else
        {
            throw "gpuKronecker : Bad type for input argument #2: A GPU or CPU matrix expected.";
        }

#ifdef WITH_OPENCL
        if (!useCuda())
        {
            throw "gpuKronecker: not implemented with OpenCL.";
        }
#endif

        //performe operation.
        gpuPtrC = gpuKronecker(gpuPtrA, gpuPtrB);

        // Keep the result on the Device.
        PointerManager::getInstance()->addGpuPointerInManager(gpuPtrC);
        sciErr = createPointer(pvApiCtx, Rhs + 1, (void*)gpuPtrC);
        if (sciErr.iErr)
        {
            throw sciErr;
        }
        LhsVar(1) = Rhs + 1;

        if (inputType_A == sci_matrix && gpuPtrA != NULL)
        {
            delete gpuPtrA;
        }
        if (inputType_B == sci_matrix && gpuPtrB != NULL)
        {
            delete gpuPtrB;
        }

        PutLhsVar();
        return 0;
    }
    catch (const char* str)
    {
        Scierror(999, "%s\n", str);
    }
    catch (SciErr E)
    {
        printError(&E, 0);
    }

    if (useCuda())
    {
        if (inputType_A == sci_matrix && gpuPtrA != NULL)
        {
            delete gpuPtrA;
        }
        if (inputType_B == sci_matrix && gpuPtrB != NULL)
        {
            delete gpuPtrB;
        }
        if (gpuPtrC != NULL)
        {
            delete gpuPtrC;
        }
    }

    return EXIT_FAILURE;
}
Пример #24
0
  bool compile(const nstr& name,
               NNet& network,
               size_t threads){

    RunNetwork* runNetwork = new RunNetwork;

    NNet::Layer* inputLayer = network.layer(0);

    size_t numLayers = network.numLayers();

    RunLayer* lastRunLayer = 0;

    for(size_t l = 1; l < numLayers; ++l){
      RunLayer* runLayer = new RunLayer;
      runLayer->queue = new Queue(threads);

      size_t inputLayerSize = inputLayer->size();

      NNet::Layer* layer = network.layer(l);

      size_t layerSize  = layer->size();

      if(l > 1){
        runLayer->inputVecStart = lastRunLayer->outputVecStart;
        runLayer->inputVec = lastRunLayer->outputVec;
      }

      if(l < numLayers - 1){
        double* outputVecPtrStart;
        double* outputVecPtr;
        allocVector(layerSize, &outputVecPtrStart, &outputVecPtr);
        runLayer->outputVecStart = outputVecPtrStart;
        runLayer->outputVec = outputVecPtr;
      }

      TypeVec args;
      args.push_back(getPointer(doubleVecType(inputLayerSize)));
      args.push_back(getPointer(doubleVecType(inputLayerSize)));
      args.push_back(getPointer(doubleType()));
      args.push_back(int32Type());

      FunctionType* ft = FunctionType::get(voidType(), args, false);
      
      Function* f = 
        Function::Create(ft, Function::ExternalLinkage,
                         name.c_str(), &module_);

      BasicBlock* entry = BasicBlock::Create(context_, "entry", f);
      
      builder_.SetInsertPoint(entry);

      auto aitr = f->arg_begin();
      
      Value* inputVecPtr = aitr;
      inputVecPtr->setName("input_vec_ptr");

      ++aitr;
      Value* weightVecPtr = aitr;
      weightVecPtr->setName("weight_vec_ptr");

      ++aitr;
      Value* outputVecPtr = aitr;
      outputVecPtr->setName("output_vec_ptr");

      ++aitr;
      Value* outputIndex = aitr;
      outputIndex->setName("output_index");

      Value* inputVec = 
        builder_.CreateLoad(inputVecPtr, "input_vec");

      Value* weightVec = 
        builder_.CreateLoad(weightVecPtr, "weight_vec");

      Value* mulVec = 
        builder_.CreateFMul(inputVec, weightVec, "mul_vec");
      
      Value* sumActivation = 
        builder_.CreateExtractElement(mulVec, getInt32(0), "sum_elem");

      for(size_t i = 1; i < inputLayerSize; ++i){
        Value* elem = 
          builder_.CreateExtractElement(mulVec, getInt32(i), "sum_elem");
        
        sumActivation = 
          builder_.CreateFAdd(sumActivation, elem, "sum_activation");
      }

      Value* output = 
        getActivationOutput(layer->neuron(0), sumActivation);

      Value* outputElement = 
        builder_.CreateGEP(outputVecPtr, outputIndex, "out_elem");

      builder_.CreateStore(output, outputElement);

      builder_.CreateRetVoid(); 

      runLayer->f = f;

      runLayer->fp = (void (*)(void*, void*, void*, int))
        engine_->getPointerToFunction(f);

      for(size_t j = 0; j < layerSize; ++j){
        NNet::Neuron* nj = layer->neuron(j);

        RunNeuron* runNeuron = new RunNeuron;
        runNeuron->layer = runLayer;
        runNeuron->outputIndex = j;

        double* weightVecPtrStart;
        double* weightVecPtr;
        allocVector(inputLayerSize, &weightVecPtrStart, &weightVecPtr);
        runNeuron->weightVecStart = weightVecPtrStart;
        runNeuron->weightVec = weightVecPtr;

        for(size_t i = 0; i < inputLayerSize; ++i){
          NNet::Neuron* ni = inputLayer->neuron(i);
          weightVecPtr[i] = nj->weight(ni);
        }

        runLayer->queue->add(runNeuron);
      }

      runNetwork->layerVec.push_back(runLayer);

      inputLayer = layer;
      lastRunLayer = runLayer;
    }

    networkMap_.insert(make_pair(name, runNetwork));

    return true;
  }