Esempi in C++ (Cpp) per wT

Esempio n. 1

0

Mostra file

File: householder.cpp Progetto: smarthi/libnd4j

NDArray<T> Householder<T>::evalHHmatrix(const NDArray<T>& x) {

	// input validation
	if(!x.isVector() && !x.isScalar())
		throw "ops::helpers::Householder::evalHHmatrix method: input array must be vector or scalar!";

	NDArray<T> w((int)x.lengthOf(), 1,  x.ordering(), x.getWorkspace());							// column-vector
	NDArray<T> wT(1, (int)x.lengthOf(), x.ordering(), x.getWorkspace());							// row-vector (transposed w)	

	T coeff;
	T normX = x.template reduceNumber<simdOps::Norm2<T>>();	
	const T min = DataTypeUtils::min<T>();
	
	if(normX*normX - x(0)*x(0) <= min) {

		normX = x(0); 
		coeff = (T)0.;		
		w = (T)0.;
		
	} 	
	else {
		
		if(x(0) >= (T)0.)
			normX = -normX;									// choose opposite sign to lessen roundoff error
		
		T u0 = x(0) - normX;
		coeff = -u0 / normX;				
		w.assign(x / u0);		
	}
	
	w(0) = (T)1.;
	wT.assign(&w);
	
	NDArray<T> identity((int)x.lengthOf(), (int)x.lengthOf(), x.ordering(), x.getWorkspace());					 
	identity.setIdentity();																			// identity matrix	

	return identity - mmul(w, wT) * coeff;	

}

Esempio n. 2

0

Mostra file

File: PBiCICG.C Progetto: ADGlassby/OpenFOAM-2.2.x

Foam::SolverPerformance<Type>
Foam::PBiCICG<Type, DType, LUType>::solve(Field<Type>& psi) const
{
    word preconditionerName(this->controlDict_.lookup("preconditioner"));

    // --- Setup class containing solver performance data
    SolverPerformance<Type> solverPerf
    (
        preconditionerName + typeName,
        this->fieldName_
    );

    register label nCells = psi.size();

    Type* __restrict__ psiPtr = psi.begin();

    Field<Type> pA(nCells);
    Type* __restrict__ pAPtr = pA.begin();

    Field<Type> pT(nCells, pTraits<Type>::zero);
    Type* __restrict__ pTPtr = pT.begin();

    Field<Type> wA(nCells);
    Type* __restrict__ wAPtr = wA.begin();

    Field<Type> wT(nCells);
    Type* __restrict__ wTPtr = wT.begin();

    Type wArT = solverPerf.great_*pTraits<Type>::one;
    Type wArTold = wArT;

    // --- Calculate A.psi and T.psi
    this->matrix_.Amul(wA, psi);
    this->matrix_.Tmul(wT, psi);

    // --- Calculate initial residual and transpose residual fields
    Field<Type> rA(this->matrix_.source() - wA);
    Field<Type> rT(this->matrix_.source() - wT);
    Type* __restrict__ rAPtr = rA.begin();
    Type* __restrict__ rTPtr = rT.begin();

    // --- Calculate normalisation factor
    Type normFactor = this->normFactor(psi, wA, pA);

    if (LduMatrix<Type, DType, LUType>::debug >= 2)
    {
        Info<< "   Normalisation factor = " << normFactor << endl;
    }

    // --- Calculate normalised residual norm
    solverPerf.initialResidual() = cmptDivide(gSumCmptMag(rA), normFactor);
    solverPerf.finalResidual() = solverPerf.initialResidual();

    // --- Check convergence, solve if not converged
    if (!solverPerf.checkConvergence(this->tolerance_, this->relTol_))
    {
        // --- Select and construct the preconditioner
        autoPtr<typename LduMatrix<Type, DType, LUType>::preconditioner>
        preconPtr = LduMatrix<Type, DType, LUType>::preconditioner::New
        (
            *this,
            this->controlDict_
        );

        // --- Solver iteration
        do
        {
            // --- Store previous wArT
            wArTold = wArT;

            // --- Precondition residuals
            preconPtr->precondition(wA, rA);
            preconPtr->preconditionT(wT, rT);

            // --- Update search directions:
            wArT = gSumCmptProd(wA, rT);

            if (solverPerf.nIterations() == 0)
            {
                for (register label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] = wAPtr[cell];
                    pTPtr[cell] = wTPtr[cell];
                }
            }
            else
            {
                Type beta = cmptDivide
                (
                    wArT,
                    stabilise(wArTold, solverPerf.vsmall_)
                );

                for (register label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] = wAPtr[cell] + cmptMultiply(beta, pAPtr[cell]);
                    pTPtr[cell] = wTPtr[cell] + cmptMultiply(beta, pTPtr[cell]);
                }
            }


            // --- Update preconditioned residuals
            this->matrix_.Amul(wA, pA);
            this->matrix_.Tmul(wT, pT);

            Type wApT = gSumCmptProd(wA, pT);

            // --- Test for singularity
            if
            (
                solverPerf.checkSingularity
                (
                    cmptDivide(cmptMag(wApT), normFactor)
                )
            )
            {
                break;
            }


            // --- Update solution and residual:

            Type alpha = cmptDivide
            (
                wArT,
                stabilise(wApT, solverPerf.vsmall_)
            );

            for (register label cell=0; cell<nCells; cell++)
            {
                psiPtr[cell] += cmptMultiply(alpha, pAPtr[cell]);
                rAPtr[cell] -= cmptMultiply(alpha, wAPtr[cell]);
                rTPtr[cell] -= cmptMultiply(alpha, wTPtr[cell]);
            }

            solverPerf.finalResidual() =
                cmptDivide(gSumCmptMag(rA), normFactor);

        } while
        (
            solverPerf.nIterations()++ < this->maxIter_
        && !(solverPerf.checkConvergence(this->tolerance_, this->relTol_))
        );
    }

    return solverPerf;
}

Esempio n. 3

0

Mostra file

File: PBiCG.C Progetto: Brzous/WindFOAM

Foam::lduSolverPerformance Foam::PBiCG::solve
(
    scalarField& x,
    const scalarField& b,
    const direction cmpt
) const
{
    // --- Setup class containing solver performance data
    lduSolverPerformance solverPerf
    (
        lduMatrix::preconditioner::getName(dict()) + typeName,
        fieldName()
    );


    register label nCells = x.size();

    scalar* __restrict__ xPtr = x.begin();

    scalarField pA(nCells);
    scalar* __restrict__ pAPtr = pA.begin();

    scalarField pT(nCells, 0.0);
    scalar* __restrict__ pTPtr = pT.begin();

    scalarField wA(nCells);
    scalar* __restrict__ wAPtr = wA.begin();

    scalarField wT(nCells);
    scalar* __restrict__ wTPtr = wT.begin();

    scalar wArT = matrix_.great_;
    scalar wArTold = wArT;

    // Calculate A.x and T.x
    matrix_.Amul(wA, x, coupleBouCoeffs_, interfaces_, cmpt);
    matrix_.Tmul(wT, x, coupleIntCoeffs_, interfaces_, cmpt);

    // Calculate initial residual and transpose residual fields
    scalarField rA(b - wA);
    scalarField rT(b - wT);
    scalar* __restrict__ rAPtr = rA.begin();
    scalar* __restrict__ rTPtr = rT.begin();

    // Calculate normalisation factor
    scalar normFactor = this->normFactor(x, b, wA, pA, cmpt);

    if (lduMatrix::debug >= 2)
    {
        Info<< "   Normalisation factor = " << normFactor << endl;
    }

    // Calculate normalised residual norm
    solverPerf.initialResidual() = gSumMag(rA)/normFactor;
    solverPerf.finalResidual() = solverPerf.initialResidual();

    // Check convergence, solve if not converged
    if (!stop(solverPerf))
    {
        // Select and construct the preconditioner
        autoPtr<lduPreconditioner> preconPtr;

        preconPtr =
            lduPreconditioner::New
            (
                matrix_,
                coupleBouCoeffs_,
                coupleIntCoeffs_,
                interfaces_,
                dict()
            );

        // Solver iteration
        do
        {
            // Store previous wArT
            wArTold = wArT;

            // Precondition residuals
            preconPtr->precondition(wA, rA, cmpt);
            preconPtr->preconditionT(wT, rT, cmpt);

            // Update search directions:
            wArT = gSumProd(wA, rT);

            if (solverPerf.nIterations() == 0)
            {
                for (register label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] = wAPtr[cell];
                    pTPtr[cell] = wTPtr[cell];
                }
            }
            else
            {
                scalar beta = wArT/wArTold;

                for (register label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell];
                    pTPtr[cell] = wTPtr[cell] + beta*pTPtr[cell];
                }
            }


            // Update preconditioned residuals
            matrix_.Amul(wA, pA, coupleBouCoeffs_, interfaces_, cmpt);
            matrix_.Tmul(wT, pT, coupleIntCoeffs_, interfaces_, cmpt);

            scalar wApT = gSumProd(wA, pT);


            // Test for singularity
            if (solverPerf.checkSingularity(mag(wApT)/normFactor)) break;


            // Update solution and residual:

            scalar alpha = wArT/wApT;

            for (register label cell=0; cell<nCells; cell++)
            {
                xPtr[cell] += alpha*pAPtr[cell];
                rAPtr[cell] -= alpha*wAPtr[cell];
                rTPtr[cell] -= alpha*wTPtr[cell];
            }

            solverPerf.finalResidual() = gSumMag(rA)/normFactor;
            solverPerf.nIterations()++;
        } while (!stop(solverPerf));
    }

    return solverPerf;
}

Esempio n. 4

0

Mostra file

File: BernoulliRBM.cpp Progetto: BryanBo-Cao/grt

bool BernoulliRBM::train_(MatrixFloat &data){
    
    const UINT numTrainingSamples = data.getNumRows();
    numInputDimensions = data.getNumCols();
    numOutputDimensions = numHiddenUnits;
    numVisibleUnits = numInputDimensions;
    
    trainingLog << "NumInputDimensions: " << numInputDimensions << std::endl;
    trainingLog << "NumOutputDimensions: " << numOutputDimensions << std::endl;
    
    if( randomizeWeightsForTraining ){
    
        //Init the weights matrix
        weightsMatrix.resize(numHiddenUnits, numVisibleUnits);
        
        Float a = 1.0 / numVisibleUnits;
        for(UINT i=0; i<numHiddenUnits; i++) {
            for(UINT j=0; j<numVisibleUnits; j++) {
                weightsMatrix[i][j] = rand.getRandomNumberUniform(-a, a);
            }
        }

        //Init the bias units
        visibleLayerBias.resize( numVisibleUnits );
        hiddenLayerBias.resize( numHiddenUnits );
        std::fill(visibleLayerBias.begin(),visibleLayerBias.end(),0);
        std::fill(hiddenLayerBias.begin(),hiddenLayerBias.end(),0);
        
    }else{
        if( weightsMatrix.getNumRows() != numHiddenUnits ){
            errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of hidden units!" << std::endl;
            return false;
        }
        if( weightsMatrix.getNumCols() != numVisibleUnits ){
            errorLog << "train_(MatrixFloat &data) - Weights matrix row size does not match the number of visible units!" << std::endl;
            return false;
        }
        if( visibleLayerBias.size() != numVisibleUnits ){
            errorLog << "train_(MatrixFloat &data) - Visible layer bias size does not match the number of visible units!" << std::endl;
            return false;
        }
        if( hiddenLayerBias.size() != numHiddenUnits ){
            errorLog << "train_(MatrixFloat &data) - Hidden layer bias size does not match the number of hidden units!" << std::endl;
            return false;
        }
    }
    
    //Flag the model has been trained encase the user wants to save the model during a training iteration using an observer
    trained = true;
    
    //Make sure the data is scaled between [0 1]
    ranges = data.getRanges();
    if( useScaling ){
        for(UINT i=0; i<numTrainingSamples; i++){
            for(UINT j=0; j<numInputDimensions; j++){
                data[i][j] = grt_scale(data[i][j], ranges[j].minValue, ranges[j].maxValue, 0.0, 1.0);
            }
        }
    }
    

    const UINT numBatches = static_cast<UINT>( ceil( Float(numTrainingSamples)/batchSize ) );
    
    //Setup the batch indexs
    Vector< BatchIndexs > batchIndexs( numBatches );
    UINT startIndex = 0;
    for(UINT i=0; i<numBatches; i++){
        batchIndexs[i].startIndex = startIndex;
        batchIndexs[i].endIndex = startIndex + batchSize;
        
        //Make sure the last batch end index is not larger than the number of training examples
        if( batchIndexs[i].endIndex >= numTrainingSamples ){
            batchIndexs[i].endIndex = numTrainingSamples;
        }
        
        //Get the batch size
        batchIndexs[i].batchSize = batchIndexs[i].endIndex - batchIndexs[i].startIndex;
        
        //Set the start index for the next batch
        startIndex = batchIndexs[i].endIndex;
    }
    
    Timer timer;
    UINT i,j,n,epoch,noChangeCounter = 0;
    Float startTime = 0;
    Float alpha = learningRate;
    Float error = 0;
    Float err = 0;
    Float delta = 0;
    Float lastError = 0;
    Vector< UINT > indexList(numTrainingSamples);
    TrainingResult trainingResult;
    MatrixFloat wT( numVisibleUnits, numHiddenUnits );       //Stores a transposed copy of the weights vector
    MatrixFloat vW( numHiddenUnits, numVisibleUnits );       //Stores the weight velocity updates
    MatrixFloat tmpW( numHiddenUnits, numVisibleUnits );     //Stores the weight values that will be used to update the main weights matrix at each batch update
    MatrixFloat v1( batchSize, numVisibleUnits );            //Stores the real batch data during a batch update
    MatrixFloat v2( batchSize, numVisibleUnits );            //Stores the sampled batch data during a batch update
    MatrixFloat h1( batchSize, numHiddenUnits );             //Stores the hidden states given v1 and the current weightsMatrix
    MatrixFloat h2( batchSize, numHiddenUnits );             //Stores the sampled hidden states given v2 and the current weightsMatrix
    MatrixFloat c1( numHiddenUnits, numVisibleUnits );       //Stores h1' * v1
    MatrixFloat c2( numHiddenUnits, numVisibleUnits );       //Stores h2' * v2
    MatrixFloat vDiff( batchSize, numVisibleUnits );         //Stores the difference between v1-v2
    MatrixFloat hDiff( batchSize, numVisibleUnits );         //Stores the difference between h1-h2
    MatrixFloat cDiff( numHiddenUnits, numVisibleUnits );    //Stores the difference between c1-c2
    VectorFloat vDiffSum( numVisibleUnits );                 //Stores the column sum of vDiff
    VectorFloat hDiffSum( numHiddenUnits );                  //Stores the column sum of hDiff
    VectorFloat visibleLayerBiasVelocity( numVisibleUnits ); //Stores the velocity update of the visibleLayerBias
    VectorFloat hiddenLayerBiasVelocity( numHiddenUnits );   //Stores the velocity update of the hiddenLayerBias
    
    //Set all the velocity weights to zero
    vW.setAllValues( 0 );
    std::fill(visibleLayerBiasVelocity.begin(),visibleLayerBiasVelocity.end(),0);
    std::fill(hiddenLayerBiasVelocity.begin(),hiddenLayerBiasVelocity.end(),0);
    
    //Randomize the order that the training samples will be used in
    for(UINT i=0; i<numTrainingSamples; i++) indexList[i] = i;
    if( randomiseTrainingOrder ){
        std::random_shuffle(indexList.begin(), indexList.end());
    }
    
    //Start the main training loop
    timer.start();
    for(epoch=0; epoch<maxNumEpochs; epoch++) {
        startTime = timer.getMilliSeconds();
        error = 0;
        
        //Randomize the batch order
        std::random_shuffle(batchIndexs.begin(),batchIndexs.end());
        
        //Run each of the batch updates
        for(UINT k=0; k<numBatches; k+=batchStepSize){
            
            //Resize the data matrices, the matrices will only be resized if the rows cols are different
            v1.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h1.resize( batchIndexs[k].batchSize, numHiddenUnits );
            v2.resize( batchIndexs[k].batchSize, numVisibleUnits );
            h2.resize( batchIndexs[k].batchSize, numHiddenUnits );
            
            //Setup the data pointers, using data pointers saves a few ms on large matrix updates
            Float **w_p = weightsMatrix.getDataPointer();
            Float **wT_p = wT.getDataPointer();
            Float **vW_p = vW.getDataPointer();
            Float **data_p = data.getDataPointer();
            Float **v1_p = v1.getDataPointer();
            Float **v2_p = v2.getDataPointer();
            Float **h1_p = h1.getDataPointer();
            Float **h2_p = h2.getDataPointer();
            Float *vlb_p = &visibleLayerBias[0];
            Float *hlb_p = &hiddenLayerBias[0];
            
            //Get the batch data
            UINT index = 0;
            for(i=batchIndexs[k].startIndex; i<batchIndexs[k].endIndex; i++){
                for(j=0; j<numVisibleUnits; j++){
                    v1_p[index][j] = data_p[ indexList[i] ][j];
                }
                index++;
            }
            
            //Copy a transposed version of the weights matrix, this is used to compute h1 and h2
            for(i=0; i<numHiddenUnits; i++)
                for(j=0; j<numVisibleUnits; j++)
                    wT_p[j][i] = w_p[i][j];
            
            //Compute h1
            h1.multiple(v1, wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h1_p[n][i] = sigmoidRandom( h1_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute v2
            v2.multiple(h1, weightsMatrix);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numVisibleUnits; i++){
                    v2_p[n][i] = sigmoidRandom( v2_p[n][i] + vlb_p[i] );
                }
            }
            
            //Compute h2
            h2.multiple(v2,wT);
            for(n=0; n<batchIndexs[k].batchSize; n++){
                for(i=0; i<numHiddenUnits; i++){
                    h2_p[n][i] = grt_sigmoid( h2_p[n][i] + hlb_p[i] );
                }
            }
            
            //Compute c1, c2 and the difference between v1-v2
            c1.multiple(h1,v1,true);
            c2.multiple(h2,v2,true);
            vDiff.subtract(v1, v2);
            
            //Compute the sum of vdiff
            for(j=0; j<numVisibleUnits; j++){
                vDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    vDiffSum[j] += vDiff[i][j];
                }
            }
            
            //Compute the difference between h1 and h2
            hDiff.subtract(h1, h2);
            for(j=0; j<numHiddenUnits; j++){
                hDiffSum[j] = 0;
                for(i=0; i<batchIndexs[k].batchSize; i++){
                    hDiffSum[j] += hDiff[i][j];
                }
            }
            
            //Compute the difference between c1 and c2
            cDiff.subtract(c1,c2);
            
            //Update the weight velocities
            for(i=0; i<numHiddenUnits; i++){
                for(j=0; j<numVisibleUnits; j++){
                    vW_p[i][j] = ((momentum * vW_p[i][j]) + (alpha * cDiff[i][j])) / batchIndexs[k].batchSize;
                }
            }
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBiasVelocity[i] = ((momentum * visibleLayerBiasVelocity[i]) + (alpha * vDiffSum[i])) / batchIndexs[k].batchSize;
            }
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBiasVelocity[i] = ((momentum * hiddenLayerBiasVelocity[i]) + (alpha * hDiffSum[i])) / batchIndexs[k].batchSize;
            }
            
            //Update the weights
            weightsMatrix.add( vW );
            
            //Update the bias for the visible layer
            for(i=0; i<numVisibleUnits; i++){
                visibleLayerBias[i] += visibleLayerBiasVelocity[i];
            }
            
            //Update the bias for the visible layer
            for(i=0; i<numHiddenUnits; i++){
                hiddenLayerBias[i] += hiddenLayerBiasVelocity[i];
            }
            
            //Compute the reconstruction error
            err = 0;
            for(i=0; i<batchIndexs[k].batchSize; i++){
                for(j=0; j<numVisibleUnits; j++){
                    err += SQR( v1[i][j] - v2[i][j] );
                }
            }
            
            error += err / batchIndexs[k].batchSize;
        }
        error /= numBatches;
        delta = lastError - error;
        lastError = error;
        
        trainingLog << "Epoch: " << epoch+1 << "/" << maxNumEpochs;
        trainingLog << " Epoch time: " << (timer.getMilliSeconds()-startTime)/1000.0 << " seconds";
        trainingLog << " Learning rate: " << alpha;
        trainingLog << " Momentum: " << momentum;
        trainingLog << " Average reconstruction error: " << error;
        trainingLog << " Delta: " << delta << std::endl;
        
        //Update the learning rate
        alpha *= learningRateUpdate;
        
        trainingResult.setClassificationResult(epoch, error, this);
        trainingResults.push_back(trainingResult);
        trainingResultsObserverManager.notifyObservers( trainingResult );
        
        //Check for convergance
        if( fabs(delta) < minChange ){
            if( ++noChangeCounter >= minNumEpochs ){
                trainingLog << "Stopping training. MinChange limit reached!" << std::endl;
                break;
            }
        }else noChangeCounter = 0;
        
    }
    trainingLog << "Training complete after " << epoch << " epochs. Total training time: " << timer.getMilliSeconds()/1000.0 << " seconds" << std::endl;
    
    trained = true;
    
    return true;
}

Esempio n. 5

0

Mostra file

Foam::solverPerformance Foam::PBiCG::solve
(
    scalarField& psi,
    const scalarField& source,
    const direction cmpt
) const
{
    // --- Setup class containing solver performance data
    solverPerformance solverPerf
    (
        lduMatrix::preconditioner::getName(controlDict_) + typeName,
        fieldName_
    );

    label nCells = psi.size();

    scalar* __restrict__ psiPtr = psi.begin();

    scalarField pA(nCells);
    scalar* __restrict__ pAPtr = pA.begin();

    scalarField pT(nCells, 0.0);
    scalar* __restrict__ pTPtr = pT.begin();

    scalarField wA(nCells);
    scalar* __restrict__ wAPtr = wA.begin();

    scalarField wT(nCells);
    scalar* __restrict__ wTPtr = wT.begin();

    scalar wArT = solverPerf.great_;
    scalar wArTold = wArT;

    // --- Calculate A.psi and T.psi
    matrix_.Amul(wA, psi, interfaceBouCoeffs_, interfaces_, cmpt);
    matrix_.Tmul(wT, psi, interfaceIntCoeffs_, interfaces_, cmpt);

    // --- Calculate initial residual and transpose residual fields
    scalarField rA(source - wA);
    scalarField rT(source - wT);
    scalar* __restrict__ rAPtr = rA.begin();
    scalar* __restrict__ rTPtr = rT.begin();

    // --- Calculate normalisation factor
    scalar normFactor = this->normFactor(psi, source, wA, pA);

    if (lduMatrix::debug >= 2)
    {
        Info<< "   Normalisation factor = " << normFactor << endl;
    }

    // --- Calculate normalised residual norm
    solverPerf.initialResidual() =
        gSumMag(rA, matrix().mesh().comm())
       /normFactor;
    solverPerf.finalResidual() = solverPerf.initialResidual();

    // --- Check convergence, solve if not converged
    if
    (
        minIter_ > 0
     || !solverPerf.checkConvergence(tolerance_, relTol_)
    )
    {
        // --- Select and construct the preconditioner
        autoPtr<lduMatrix::preconditioner> preconPtr =
        lduMatrix::preconditioner::New
        (
            *this,
            controlDict_
        );

        // --- Solver iteration
        do
        {
            // --- Store previous wArT
            wArTold = wArT;

            // --- Precondition residuals
            preconPtr->precondition(wA, rA, cmpt);
            preconPtr->preconditionT(wT, rT, cmpt);

            // --- Update search directions:
            wArT = gSumProd(wA, rT, matrix().mesh().comm());

            if (solverPerf.nIterations() == 0)
            {
                for (label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] = wAPtr[cell];
                    pTPtr[cell] = wTPtr[cell];
                }
            }
            else
            {
                scalar beta = wArT/wArTold;

                for (label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell];
                    pTPtr[cell] = wTPtr[cell] + beta*pTPtr[cell];
                }
            }


            // --- Update preconditioned residuals
            matrix_.Amul(wA, pA, interfaceBouCoeffs_, interfaces_, cmpt);
            matrix_.Tmul(wT, pT, interfaceIntCoeffs_, interfaces_, cmpt);

            scalar wApT = gSumProd(wA, pT, matrix().mesh().comm());

            // --- Test for singularity
            if (solverPerf.checkSingularity(mag(wApT)/normFactor))
            {
                break;
            }


            // --- Update solution and residual:

            scalar alpha = wArT/wApT;

            for (label cell=0; cell<nCells; cell++)
            {
                psiPtr[cell] += alpha*pAPtr[cell];
                rAPtr[cell] -= alpha*wAPtr[cell];
                rTPtr[cell] -= alpha*wTPtr[cell];
            }

            solverPerf.finalResidual() =
                gSumMag(rA, matrix().mesh().comm())
               /normFactor;
        } while
        (
            (
                solverPerf.nIterations()++ < maxIter_
            && !solverPerf.checkConvergence(tolerance_, relTol_)
            )
         || solverPerf.nIterations() < minIter_
        );
    }

    return solverPerf;
}

Esempio n. 6

0

Mostra file

File: PBiCCCG.C Progetto: Kiiree/RapidCFD-dev

Foam::SolverPerformance<Type>
Foam::PBiCCCG<Type, DType, LUType>::solve
(
    gpuField<Type>& psi
) const
{
    word preconditionerName(this->controlDict_.lookup("preconditioner"));

    // --- Setup class containing solver performance data
    SolverPerformance<Type> solverPerf
    (
        preconditionerName + typeName,
        this->fieldName_
    );

    register label nCells = psi.size();

    gpuField<Type> pA(nCells);

    gpuField<Type> pT(nCells, pTraits<Type>::zero);

    gpuField<Type> wA(nCells);

    gpuField<Type> wT(nCells);

    scalar wArT = 1e15; //this->matrix_.great_;
    scalar wArTold = wArT;

    // --- Calculate A.psi and T.psi
    this->matrix_.Amul(wA, psi);
    this->matrix_.Tmul(wT, psi);

    // --- Calculate initial residual and transpose residual fields
    gpuField<Type> rA(this->matrix_.source() - wA);
    gpuField<Type> rT(this->matrix_.source() - wT);

    // --- Calculate normalisation factor
    Type normFactor = this->normFactor(psi, wA, pA);

    if (LduMatrix<Type, DType, LUType>::debug >= 2)
    {
        Info<< "   Normalisation factor = " << normFactor << endl;
    }

    // --- Calculate normalised residual norm
    solverPerf.initialResidual() = cmptDivide(gSumCmptMag(rA), normFactor);
    solverPerf.finalResidual() = solverPerf.initialResidual();

    // --- Check convergence, solve if not converged
    if
    (
        this->minIter_ > 0
     || !solverPerf.checkConvergence(this->tolerance_, this->relTol_)
    )
    {
        // --- Select and construct the preconditioner
        autoPtr<typename LduMatrix<Type, DType, LUType>::preconditioner>
        preconPtr = LduMatrix<Type, DType, LUType>::preconditioner::New
        (
            *this,
            this->controlDict_
        );

        // --- Solver iteration
        do
        {
            // --- Store previous wArT
            wArTold = wArT;

            // --- Precondition residuals
            preconPtr->precondition(wA, rA);
            preconPtr->preconditionT(wT, rT);

            // --- Update search directions:
            wArT = gSumProd(wA, rT);

            if (solverPerf.nIterations() == 0)
            {
                thrust::copy(wA.begin(),wA.end(),pA.begin());
                thrust::copy(wT.begin(),wT.end(),pT.begin());
            }
            else
            {
                scalar beta = wArT/wArTold;

                thrust::transform
                (
                    wA.begin(),
                    wA.end(),
                    thrust::make_transform_iterator
                    (
                        pA.begin(),
                        multiplyOperatorSFFunctor<scalar,Type,Type>(beta)
                    ),
                    pA.begin(),
                    addOperatorFunctor<Type,Type,Type>()
                );

                thrust::transform
                (
                    wT.begin(),
                    wT.end(),
                    thrust::make_transform_iterator
                    (
                        pT.begin(),
                        multiplyOperatorSFFunctor<scalar,Type,Type>(beta)
                    ),
                    pT.begin(),
                    addOperatorFunctor<Type,Type,Type>()
                );
            }


            // --- Update preconditioned residuals
            this->matrix_.Amul(wA, pA);
            this->matrix_.Tmul(wT, pT);

            scalar wApT = gSumProd(wA, pT);

            // --- Test for singularity
            if
            (
                solverPerf.checkSingularity
                (
                    cmptDivide(pTraits<Type>::one*mag(wApT), normFactor)
                )
            )
            {
                break;
            }


            // --- Update solution and residual:

            scalar alpha = wArT/wApT;

            thrust::transform
            (
                psi.begin(),
                psi.end(),
                thrust::make_transform_iterator
                (
                    pA.begin(),
                    multiplyOperatorSFFunctor<scalar,Type,Type>(alpha)
                ),
                psi.begin(),
                addOperatorFunctor<Type,Type,Type>()
            );

            thrust::transform
            (
                rA.begin(),
                rA.end(),
                thrust::make_transform_iterator
                (
                    wA.begin(),
                    multiplyOperatorSFFunctor<scalar,Type,Type>(alpha)
                ),
                rA.begin(),
                subtractOperatorFunctor<Type,Type,Type>()
            );

            thrust::transform
            (
                rT.begin(),
                rT.end(),
                thrust::make_transform_iterator
                (
                    wT.begin(),
                    multiplyOperatorSFFunctor<scalar,Type,Type>(alpha)
                ),
                rT.begin(),
                subtractOperatorFunctor<Type,Type,Type>()
            );

            solverPerf.finalResidual() =
                cmptDivide(gSumCmptMag(rA), normFactor);

        } while
        (
            (
                solverPerf.nIterations()++ < this->maxIter_
            && !solverPerf.checkConvergence(this->tolerance_, this->relTol_)
            )
         || solverPerf.nIterations() < this->minIter_
        );
    }

    return solverPerf;
}

Esempio n. 7

0

Mostra file

File: HermitianFunction.hpp Progetto: ahmadia/elemental

inline void
ReformHermitianMatrix
( UpperOrLower uplo,
        DistMatrix<R,MC,MR>& A,
  const DistMatrix<R,VR,STAR>& w,
  const DistMatrix<R,MC,MR>& Z,
  const RealFunctor& f )
{
#ifndef RELEASE
    PushCallStack("hermitian_function::ReformHermitianMatrix");
#endif
    const Grid& g = A.Grid();

    DistMatrix<R,MC,MR> ZL(g), ZR(g),
                        Z0(g), Z1(g), Z2(g);
    DistMatrix<R,VR,STAR> wT(g),  w0(g),
                          wB(g),  w1(g),
                                  w2(g);

    DistMatrix<R,MC,  STAR> Z1_MC_STAR(g);
    DistMatrix<R,VR,  STAR> Z1_VR_STAR(g);
    DistMatrix<R,STAR,MR  > Z1Trans_STAR_MR(g);
    DistMatrix<R,STAR,STAR> w1_STAR_STAR(g);

    if( uplo == LOWER )
        MakeTrapezoidal( LEFT, UPPER, 1, A );
    else
        MakeTrapezoidal( LEFT, LOWER, -1, A );
    LockedPartitionRight( Z, ZL, ZR, 0 );
    LockedPartitionDown
    ( w, wT,
         wB, 0 );
    while( ZL.Width() < Z.Width() )
    {
        LockedRepartitionRight
        ( ZL, /**/ ZR,
          Z0, /**/ Z1, Z2 );
        LockedRepartitionDown
        ( wT,  w0,
         /**/ /**/
               w1,
          wB,  w2 );

        Z1_MC_STAR.AlignWith( A );
        Z1_VR_STAR.AlignWith( A );
        Z1Trans_STAR_MR.AlignWith( A );
        //--------------------------------------------------------------------//
        Z1_MC_STAR = Z1;
        Z1_VR_STAR = Z1_MC_STAR;
        w1_STAR_STAR = w1;

        // Scale Z1[VR,* ] with the modified eigenvalues
        const int width = Z1_VR_STAR.Width();
        const int localHeight = Z1_VR_STAR.LocalHeight();
        for( int j=0; j<width; ++j )
        {
            const R omega = f(w1_STAR_STAR.GetLocalEntry(j,0));
            R* buffer = Z1_VR_STAR.LocalBuffer(0,j);
            for( int iLocal=0; iLocal<localHeight; ++iLocal )
                buffer[iLocal] *= omega;
        }

        Z1Trans_STAR_MR.TransposeFrom( Z1_VR_STAR );
        internal::LocalTrrk( uplo, (R)1, Z1_MC_STAR, Z1Trans_STAR_MR, (R)1, A );
        //--------------------------------------------------------------------//
        Z1Trans_STAR_MR.FreeAlignments();
        Z1_VR_STAR.FreeAlignments();
        Z1_MC_STAR.FreeAlignments();

        SlideLockedPartitionDown
        ( wT,  w0,
               w1,
         /**/ /**/
          wB,  w2 );
        SlideLockedPartitionRight
        ( ZL,     /**/ ZR,
          Z0, Z1, /**/ Z2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}

Esempio n. 8

0

Mostra file

File: HermitianFunction.hpp Progetto: ahmadia/elemental

inline void
ReformNormalMatrix
(       DistMatrix<Complex<R>,MC,MR  >& A,
  const DistMatrix<R,         VR,STAR>& w,
  const DistMatrix<Complex<R>,MC,MR  >& Z,
  const ComplexFunctor& f )
{
#ifndef RELEASE
    PushCallStack("hermitian_function::ReformNormalMatrix");
#endif
    const Grid& g = A.Grid();
    typedef Complex<R> C;

    DistMatrix<C,MC,MR> ZL(g), ZR(g),
                        Z0(g), Z1(g), Z2(g);
    DistMatrix<R,VR,STAR> wT(g),  w0(g),
                          wB(g),  w1(g),
                                  w2(g);

    DistMatrix<C,MC,  STAR> Z1_MC_STAR(g);
    DistMatrix<C,VR,  STAR> Z1_VR_STAR(g);
    DistMatrix<C,STAR,MR  > Z1Adj_STAR_MR(g);
    DistMatrix<R,STAR,STAR> w1_STAR_STAR(g);

    Zero( A );
    LockedPartitionRight( Z, ZL, ZR, 0 );
    LockedPartitionDown
    ( w, wT,
         wB, 0 );
    while( ZL.Width() < Z.Width() )
    {
        LockedRepartitionRight
        ( ZL, /**/ ZR,
          Z0, /**/ Z1, Z2 );
        LockedRepartitionDown
        ( wT,  w0,
         /**/ /**/
               w1,
          wB,  w2 );

        Z1_MC_STAR.AlignWith( A );
        Z1_VR_STAR.AlignWith( A );
        Z1Adj_STAR_MR.AlignWith( A );
        //--------------------------------------------------------------------//
        Z1_MC_STAR = Z1;
        Z1_VR_STAR = Z1_MC_STAR;
        w1_STAR_STAR = w1;

        // Scale Z1[VR,* ] with the modified eigenvalues
        const int width = Z1_VR_STAR.Width();
        const int localHeight = Z1_VR_STAR.LocalHeight();
        for( int j=0; j<width; ++j )
        {
            const C conjOmega = Conj(f(w1_STAR_STAR.GetLocalEntry(j,0)));
            C* buffer = Z1_VR_STAR.LocalBuffer(0,j);
            for( int iLocal=0; iLocal<localHeight; ++iLocal )
                buffer[iLocal] *= conjOmega;
        }

        Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR );
        internal::LocalGemm
        ( NORMAL, NORMAL, (C)1, Z1_MC_STAR, Z1Adj_STAR_MR, (C)1, A );
        //--------------------------------------------------------------------//
        Z1Adj_STAR_MR.FreeAlignments();
        Z1_VR_STAR.FreeAlignments();
        Z1_MC_STAR.FreeAlignments();

        SlideLockedPartitionDown
        ( wT,  w0,
               w1,
         /**/ /**/
          wB,  w2 );
        SlideLockedPartitionRight
        ( ZL,     /**/ ZR,
          Z0, Z1, /**/ Z2 );
    }
#ifndef RELEASE
    PopCallStack();
#endif
}

Esempio n. 9

0

Mostra file

File: HermitianFromEVD.hpp Progetto: khalid-hasanov/Elemental

inline void
HermitianFromEVD
( UpperOrLower uplo,
        DistMatrix<F>& A,
  const DistMatrix<BASE(F),VR,STAR>& w,
  const DistMatrix<F>& Z )
{
#ifndef RELEASE
    CallStackEntry entry("HermitianFromEVD");
#endif
    const Grid& g = A.Grid();
    typedef BASE(F) R;

    DistMatrix<F> ZL(g), ZR(g),
                  Z0(g), Z1(g), Z2(g);
    DistMatrix<R,VR,STAR> wT(g),  w0(g),
                          wB(g),  w1(g),
                                  w2(g);

    DistMatrix<F,MC,  STAR> Z1_MC_STAR(g);
    DistMatrix<F,VR,  STAR> Z1_VR_STAR(g);
    DistMatrix<F,STAR,MR  > Z1Adj_STAR_MR(g);
    DistMatrix<R,STAR,STAR> w1_STAR_STAR(g);

    A.ResizeTo( Z.Height(), Z.Height() );
    if( uplo == LOWER )
        MakeTrapezoidal( UPPER, A, 1 );
    else
        MakeTrapezoidal( LOWER, A, -1 );
    LockedPartitionRight( Z, ZL, ZR, 0 );
    LockedPartitionDown
    ( w, wT,
         wB, 0 );
    while( ZL.Width() < Z.Width() )
    {
        LockedRepartitionRight
        ( ZL, /**/ ZR,
          Z0, /**/ Z1, Z2 );
        LockedRepartitionDown
        ( wT,  w0,
         /**/ /**/
               w1,
          wB,  w2 );

        Z1_MC_STAR.AlignWith( A );
        Z1_VR_STAR.AlignWith( A );
        Z1Adj_STAR_MR.AlignWith( A );
        //--------------------------------------------------------------------//
        Z1_MC_STAR = Z1;
        Z1_VR_STAR = Z1_MC_STAR;
        w1_STAR_STAR = w1;

        DiagonalScale( RIGHT, NORMAL, w1_STAR_STAR, Z1_VR_STAR );

        Z1Adj_STAR_MR.AdjointFrom( Z1_VR_STAR );
        LocalTrrk( uplo, F(1), Z1_MC_STAR, Z1Adj_STAR_MR, F(1), A );
        //--------------------------------------------------------------------//

        SlideLockedPartitionDown
        ( wT,  w0,
               w1,
         /**/ /**/
          wB,  w2 );
        SlideLockedPartitionRight
        ( ZL,     /**/ ZR,
          Z0, Z1, /**/ Z2 );
    }
}